In [None]:
import json
import requests
from pprint import pprint
import os

# Variables

index_name = "<index-name>"
datasource_name = "<datasource-name>" #blob store name
skillset_name = "<skillset-name>"
indexer_name = "<indexer-name>"
cognitiveservices_key = os.getenv("COGNITIVE_SERVICE_API_KEY") # cognitive service key
cognitiveservices_resourceid = os.getenv("COGNITIVE_SERVICE_RESOURCE_ID")
searchapikey = os.getenv("SEARCH_API_KEY")  # search api key
storage_connectionstring = os.getenv("STORAGE_CONNECTION_STRING") #Storage Connection String
container_name = "<container-name>"

# Data Source Definition 
datasouce =   {   
    "name" : datasource_name,  
    "type" : "azureblob",
    "credentials" : { "connectionString" : storage_connectionstring },  
    "container" : { "name" : container_name }
}
endpoint = f'https://{os.getenv("SEARCH_ENDPOINT")}/'
api_version = '?api-version=2021-04-30-Preview'
headers = {'Content-Type': 'application/json',
        'api-key': searchapikey }
url = endpoint + "indexes" + api_version + "&$select=name"
pprint(url)

In [None]:
# Delete Index

url = endpoint + "indexes/" + index_name + api_version 
response  = requests.delete(url, headers=headers)

In [None]:
# Delete skillset

url = endpoint + "skillsets/" + skillset_name + api_version 
response  = requests.delete(url, headers=headers)

In [None]:
# Delete Indexer

url = endpoint + "indexers/" + indexer_name + api_version 
response  = requests.delete(url, headers=headers)
pprint(response)

In [None]:
# Index Schema Definition 

ir_index_schema = {
  "name": index_name,
  "fields": [
    {
      "name": "ItemId",
      "type": "Edm.String",
      "facetable": False,
      "filterable": False,
      "key": True,
      "retrievable": True,
      "searchable": True,
      "sortable": False,
      "analyzer": "standard.lucene",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "ItemVariantId",
      "type": "Edm.String",
      "facetable": False,
      "filterable": False,
      "key": False,
      "retrievable": True,
      "searchable": True,
      "sortable": False,
      "analyzer": "standard.lucene",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "Title",
      "type": "Edm.String",
      "facetable": False,
      "filterable": False,
      "key": False,
      "retrievable": True,
      "searchable": True,
      "sortable": False,
      "analyzer": "en.Microsoft",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "Description",
      "type": "Edm.String",
      "facetable": False,
      "filterable": False,
      "key": False,
      "retrievable": True,
      "searchable": True,
      "sortable": False,
      "analyzer": "en.Microsoft",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "ReleaseDate",
      "type": "Edm.DateTimeOffset",
      "facetable": False,
      "filterable": False,
      "key": False,
      "retrievable": True,
      "searchable": False,
      "sortable": False,
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "keyphrases",
      "type": "Collection(Edm.String)",
      "facetable": True,
      "filterable": True,
      "retrievable": True,
      "searchable": True,
      "analyzer": "en.Microsoft",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "text",
      "type": "Collection(Edm.String)",
      "facetable": False,
      "filterable": False,
      "retrievable": True,
      "searchable": True,
      "analyzer": "standard.lucene",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "layoutText",
      "type": "Collection(Edm.String)",
      "facetable": False,
      "filterable": False,
      "retrievable": True,
      "searchable": True,
      "analyzer": "standard.lucene",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    }
  ],
  "suggesters": [],
  "scoringProfiles": [],
  "defaultScoringProfile": "",
  "corsOptions": None,
  "analyzers": [],
  "charFilters": [],
  "tokenFilters": [],
  "tokenizers": [],
  "@odata.etag": "\"0x8D8B90E3409E48F\""
}

In [None]:
# Index Schema Definition 

ir_index_schema = {
  "name": index_name,
  "fields": [
    {
      "name": "ItemId",
      "type": "Edm.String",
      "facetable": False,
      "filterable": False,
      "key": True,
      "retrievable": True,
      "searchable": True,
      "sortable": False,
      "analyzer": "standard.lucene",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "ItemVariantId",
      "type": "Edm.String",
      "facetable": False,
      "filterable": False,
      "key": False,
      "retrievable": True,
      "searchable": True,
      "sortable": False,
      "analyzer": "standard.lucene",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "Title",
      "type": "Edm.String",
      "facetable": False,
      "filterable": False,
      "key": False,
      "retrievable": True,
      "searchable": True,
      "sortable": False,
      "analyzer": "en.Microsoft",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "Description",
      "type": "Edm.String",
      "facetable": False,
      "filterable": False,
      "key": False,
      "retrievable": True,
      "searchable": True,
      "sortable": False,
      "analyzer": "en.Microsoft",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "ReleaseDate",
      "type": "Edm.DateTimeOffset",
      "facetable": False,
      "filterable": False,
      "key": False,
      "retrievable": True,
      "searchable": False,
      "sortable": False,
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "keyphrases",
      "type": "Collection(Edm.String)",
      "facetable": True,
      "filterable": True,
      "retrievable": True,
      "searchable": True,
      "analyzer": "en.Microsoft",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "text",
      "type": "Collection(Edm.String)",
      "facetable": False,
      "filterable": False,
      "retrievable": True,
      "searchable": True,
      "analyzer": "standard.lucene",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    },
    {
      "name": "layoutText",
      "type": "Collection(Edm.String)",
      "facetable": False,
      "filterable": False,
      "retrievable": True,
      "searchable": True,
      "analyzer": "standard.lucene",
      "indexAnalyzer": None,
      "searchAnalyzer": None,
      "synonymMaps": [],
      "fields": []
    }
  ],
  "suggesters": [],
  "scoringProfiles": [],
  "defaultScoringProfile": "",
  "corsOptions": None,
  "analyzers": [],
  "charFilters": [],
  "tokenFilters": [],
  "tokenizers": [],
  "@odata.etag": "\"0x8D8B90E3409E48F\""
}

In [None]:
# Create Index
url = endpoint + "indexes" + api_version
response  = requests.post(url, headers=headers, json=ir_index_schema)
index = response.json()
pprint(index)

In [None]:
# Create Data Source
url = endpoint + "datasources" + api_version
response  = requests.post(url, headers=headers, json=datasouce)
datasourceresponse = response.json()
pprint(datasourceresponse)

In [None]:
# Create a skillset

skillset_definition = {
    "name": skillset_name,
    "skills":
    [
        
      {
      "@odata.type": "#Microsoft.Skills.Text.KeyPhraseExtractionSkill",
      "inputs": [
        {
          "name": "text",
          "source": "/document/Description"
        },
        {
          "name": "languageCode",
          "source": "/document/language" 
        }
      ],
      "outputs": [
        {
          "name": "keyPhrases",
          "targetName": "keyphrases"
        }
      ]
    }
    
    
    
  ],
  "cognitiveServices": {
    "@odata.type": "#Microsoft.Azure.Search.CognitiveServicesByKey",
    "description": cognitiveservices_resourceid,
    "key": cognitiveservices_key
  }
}

url = endpoint + "skillsets/" + skillset_name + api_version

response  = requests.put(url, headers=headers, json=skillset_definition)
response.json()




In [None]:
# Indexer definition

indexer_definition = {
    "dataSourceName": datasource_name,
    "targetIndexName": index_name,
    "skillsetName" : skillset_name,
    "parameters": {
        "batchSize": None,
        "maxFailedItems": 0,
        "maxFailedItemsPerBatch": 0,
        "base64EncodeKeys": None,
        "configuration": {
        "dataToExtract": "contentAndMetadata",
        "parsingMode": "delimitedText",
        "delimitedTextHeaders" : "ItemId,ItemVariantId,Title,Description,ReleaseDate"
        
        }
  },
  "fieldMappings": [
    {
      "sourceFieldName": "ItemId",
      "targetFieldName": "ItemId"
      
    }
  ],
  "outputFieldMappings": [
    {
      "sourceFieldName": "/document/ItemId",
      "targetFieldName": "ItemId"
    },
    {
      "sourceFieldName": "/document/ItemVariantId",
      "targetFieldName": "ItemVariantId"
    },
    {
      "sourceFieldName": "/document/Title",
      "targetFieldName": "Title"
    },
    {
      "sourceFieldName": "/document/Description",
      "targetFieldName": "Description"
    },
    {
      "sourceFieldName": "/document/ReleaseDate",
      "targetFieldName": "ReleaseDate"
    },
    {
      "sourceFieldName": "/document/keyphrases",
      "targetFieldName": "keyphrases"
    }
  ],
}

In [None]:
# Delete Indexer

url = endpoint + "indexers/" + indexer_name + api_version 
response  = requests.delete(url, headers=headers)
pprint(response)

In [None]:
# Create Indexer

url = endpoint + "indexers/" + indexer_name + api_version 
response  = requests.put(url, headers=headers, json=sage_indexer_definition)
query = response.json()
pprint(query)

In [None]:
# Get Indexer Status

url = endpoint + "indexers/" + indexer_name + "/status" + api_version
response  = requests.get(url, headers=headers)
response.json()

In [None]:
# Search Index

searchstring = '&search=Water Factor Rec&$count=true&$select=metadata_storage_path,text'

url = endpoint + "indexes/" + index_name + "/docs" + api_version + searchstring
response  = requests.get(url, headers=headers, json=searchstring)
query = response.json()
pprint(query)