In [2]:
import os
import json
import time
import requests
import random
import base64
from collections import OrderedDict
import urllib.request
# from tqdm import tqdm
import openai
from dotenv import load_dotenv
from typing import List

load_dotenv()

headers = {'Content-Type': 'application/json','api-key': os.environ['AZURE_SEARCH_KEY']}
params = {'api-version': os.environ['AZURE_SEARCH_API_VERSION']}



## 1. Create Data Source

https://learn.microsoft.com/en-us/rest/api/searchservice/create-data-source

```json
data_source = {   
    "name" : (optional on PUT; required on POST) "Name of the data source",  
    "description" : (optional) "Anything you want, or nothing at all",  
    "type" : (required) "Must be a supported data source",
    "credentials" : (required) { "connectionString" : "Connection string for your data source" },
    "container": {
        "name": "Name of the table, view, collection, or blob container you wish to index",
        "query": (optional) 
    },
    "dataChangeDetectionPolicy" : (optional) {See below for details },
    "dataDeletionDetectionPolicy" : (optional) {See below for details },
    "encryptionKey":(optional) { }
}

```

In [3]:
# define data source
BLOB_CONTAINER_NAME = "document-chunks"
DATA_SOURCE_NAME = "demo-blob-source"

data_source = {
  "name": DATA_SOURCE_NAME,
  "description": "Sample data",
  "type": "azureblob",
  "credentials": {
    "connectionString": os.getenv("CONNECTION_STRING")
  },
  "container": {
    "name": BLOB_CONTAINER_NAME,
  },
}


In [None]:

r = requests.POST(os.environ['AZURE_SEARCH_ENDPOINT'] + "/indexes/" + INDEX_NAME, data=json.dumps(index_payload), headers=headers, params=params)
print(r.status_code)
print(r.ok)    

## 2. Create an Index

In [4]:
INDEX_NAME = "demo-index-ai-skilled"

index_payload = {
    "name": INDEX_NAME,
    "fields": [
        {"name": "id", "type": "Edm.String", "key": "true", "filterable": "true" },
        {"name": "title","type": "Edm.String","searchable": "true","retrievable": "true"},
        {"name": "content","type": "Edm.String","searchable": "true","retrievable": "true"},
        {"name": "name", "type": "Edm.String", "searchable": "true", "retrievable": "true", "sortable": "false", "filterable": "false", "facetable": "false"},
        {"name": "location", "type": "Edm.String", "searchable": "false", "retrievable": "true", "sortable": "false", "filterable": "false", "facetable": "false"},
        {"name": "page_num","type": "Edm.Int32","searchable": "false","retrievable": "true"},
        {"name": "keyphrases","type": "Collection(Edm.String)","searchable": "true","filterable": "false","retrievable": "true","sortable": "false","facetable": "false","key": "false","analyzer": "standard.lucene","synonymMaps": []}    
    ],
    "semantic": {
        "configurations": [
            {
                "name": "default",
                "prioritizedFields": {
                    "titleField": {
                        "fieldName": "title"
                    },
                    "prioritizedContentFields": [
                        {
                            "fieldName": "content"
                        }
                    ],
                    "prioritizedKeywordsFields": [
                        {
                            "fieldName": "keyphrases"
                        }
                    ]
                }
            }
        ]
    }
}

In [None]:
r = requests.put(os.environ['AZURE_SEARCH_ENDPOINT'] + "/indexes/" + INDEX_NAME, data=json.dumps(index_payload), headers=headers, params=params)
print(r.status_code)
print(r.ok)    

## 3. Create SkillSet

```json
{
    "name": "{{skillset-name}}",
    "description": "Skillset to detect language, translate text, extract key phrases, and score sentiment",
    "skills": [ 
        {
            "@odata.type": "#Microsoft.Skills.Text.KeyPhraseExtractionSkill",
            "context": "/document/reviews_text/pages/*",
            "inputs": [
                { "name": "text",  "source": "/document/reviews_text/pages/*" },
                { "name": "languageCode",  "source": "/document/language" }
            ],
            "outputs": [
                { "name": "keyPhrases" , "targetName": "keyphrases" }
            ]
        },
    ]
}
```

In [5]:
SKILL_NAME = "keyphrase-extraction-skillset"

skillset_payload = {
    "name": SKILL_NAME,
    "description": "Skillset to detect language, translate text, extract key phrases, and score sentiment",
    "skills": [ 
        {
            "@odata.type": "#Microsoft.Skills.Text.KeyPhraseExtractionSkill",
            "context": "/document/content/*",
            "inputs": [
                { "name": "text",  "source": "/document/content/*" },
                { "name": "languageCode",  "source": "/document/language" }
            ],
            "outputs": [
                { "name": "keyPhrases" , "targetName": "keyphrases" }
            ]
        },
    ]
}

## 4. Create an Indexer


```json
{
  "name": (required) String that uniquely identifies the indexer,
  "dataSourceName": (required) String, provides raw content that will be enriched,
  "targetIndexName": (required) String, name of an existing index,
  "skillsetName" : (required for AI enrichment) String, name of an existing skillset,
  "cache":  {
    "storageConnectionString" : (required if you enable the cache) Connection string to a blob container,
    "enableReprocessing": true
    },
  "parameters": { },
  "fieldMappings": (optional) Maps fields in the underlying data source to fields in an index,
  "outputFieldMappings" : (required) Maps skill outputs to fields in an index,
}
```

In [6]:
indexer_payload = {
    
}

In [7]:
INDEXER_NAME = "demo-indexer-ai-skilled-"
indexer= {
  "name": INDEXER_NAME,
  "description": "",
  "dataSourceName": DATA_SOURCE_NAME,
  "skillsetName": SKILL_NAME,
  "targetIndexName": INDEX_NAME,
  "parameters": {
  },
  "fieldMappings": [
  ],
  "outputFieldMappings": [
    {
      "sourceFieldName": "/document/content/keyphrases",
      "targetFieldName": "keyphrases"
    }
  ],
}

In [None]:

# Define the rest of the indexer (not shown)

# Convert the indexer definition to JSON
indexer_json = json.dumps(indexer)

# Send the POST request
url = "https://[service name].search.windows.net/indexers?api-version=2020-06-30"
headers = {"Content-Type": "application/json", "api-key": "[your api key]"}
response = requests.post(url, headers=headers, data=indexer_json)

# Check the response
if response.status_code == 201:
    print("Indexer created successfully")
else:
    print(f"Error creating indexer: {response.text}")
