## Exercise:
- This notebook only works with existing Azure AI Search Service
- Create a Python function to create Azure AI Search objects

In [1]:
import requests
import json

def create_az_ai_search_objects(endpoint, api_key, objecttype, jsonfile, version):
    # Define the URL for creating/updating the data source
    url = f"https://{endpoint}.search.windows.net/{objecttype}?api-version={version}"
    print(url)
    # Define the request headers with the query key
    headers = {
        "api-key": api_key,
        "Content-Type": "application/json"
    }

    # Load your JSON data from the file
    with open(jsonfile, 'r') as f:
        data = json.load(f)

    # Make the POST request
    response = requests.post(url, headers=headers, json=data)
    # Check the status of the request
    if response.status_code == 200 or response.status_code == 201:
        print("Request was successful.")
    else:
        print(f"Request failed. Status code: {response.status_code}")

### Execute the Python function script to create:
- Azure AI Data Source point to Azure Data Lake
- Azure AI Search Skill
- Azure AI Search Index
- Azure AI Search Indexer

In [2]:
# Define the variables for the function
import os
from dotenv import load_dotenv  

load_dotenv()

endpoint = os.getenv('endpoint')
api_key = os.getenv('api_key')
version = os.getenv('version')
az_adls_key = os.getenv('az_adls_key')
# Call the function using data_source, skillsets, Indexes, indexers

In [3]:
# Create the data source
objecttype = "datasources"
jsonfile = "data_source.json"
create_az_ai_search_objects(endpoint, api_key, objecttype, jsonfile,version)

https://az-ai-search-sandbox.search.windows.net/datasources?api-version=2023-11-01
Request was successful.


In [4]:
# ceate the skillsets
objecttype = "skillsets"
jsonfile = "skillsets.json"
create_az_ai_search_objects(endpoint, api_key, objecttype, jsonfile,version)

https://az-ai-search-sandbox.search.windows.net/skillsets?api-version=2023-11-01
Request was successful.


In [5]:
# create the Indexes
objecttype = "Indexes"
jsonfile = "Indexes.json"
create_az_ai_search_objects(endpoint, api_key, objecttype, jsonfile,version)

https://az-ai-search-sandbox.search.windows.net/Indexes?api-version=2023-11-01
Request was successful.


In [6]:
# create the indexers
objecttype = "indexers"
jsonfile = "indexers.json"
create_az_ai_search_objects(endpoint, api_key, objecttype, jsonfile,version)

https://az-ai-search-sandbox.search.windows.net/indexers?api-version=2023-11-01
Request was successful.


### Imagine you have a large number of review files, and your task is to determine the percentage count of reviews that are positive, negative, neutral, and mixed.
- Then this is exactly what these scripts are doing.

In [7]:
# Import the requests library
import requests

objecttype = "Indexes"
index_name = "skillup-ai-custom-index-02"
endpoint = os.getenv('endpoint')
# Define the endpoint, object type, and API version
endpoint_url = f"https://{endpoint}.search.windows.net/{objecttype}/{index_name}/docs?api-version={version}"

# Define the search query
search_query = "&search=*&$select=sentiment"

query_url = f"{endpoint_url}{search_query}"

query_key = api_key

# Define the request headers with the query key
headers = {"api-key": query_key}

# Send a GET request to the query URL
response = requests.get(query_url, headers=headers)

# Check if the response status code is 200 (OK)
if response.status_code == 200:
    # Parse the response as JSON
    data = response.json()

    # Initialize the counters for positive and negative sentiment
    positive_count = 0
    negative_count = 0
    neutral_count = 0
    mixed_count = 0

    # Iterate over the value array, which contains the documents
    for document in data["value"]:
        # Get the sentiment value for each document
        sentiment = document["sentiment"]

        # Increment the counter for positive or negative sentiment
        if sentiment == "positive":
            positive_count += 1
        elif sentiment == "negative":
            negative_count += 1
        elif sentiment == "neutral":
            neutral_count += 1
        elif sentiment == "mixed":
            mixed_count += 1

    # Get the total number of documents
    total_count = len(data["value"])

    # Calculate the percentage of positive and negative reviews
    positive_percentage = (positive_count / total_count) * 100
    negative_percentage = (negative_count / total_count) * 100
    neutral_percentage = (neutral_count / total_count) * 100
    mixed_percentage = (mixed_count / total_count) * 100

    # Print the results
    print(f"Total number of reviews: {total_count}")
    print(f"Positive reviews: {positive_count} ({positive_percentage:.2f}%)")
    print(f"Negative reviews: {negative_count} ({negative_percentage:.2f}%)")
    print(f"Neutral reviews: {negative_count} ({neutral_percentage:.2f}%)")
    print(f"mixed reviews: {negative_count} ({mixed_percentage:.2f}%)")
else:
    # Print the response status code if not 200
    print(f"Error: {response.status_code}")


Total number of reviews: 50
Positive reviews: 15 (30.00%)
Negative reviews: 8 (16.00%)
Neutral reviews: 8 (4.00%)
mixed reviews: 8 (50.00%)


In [8]:
# Define the request headers with the query key
headers = {"api-key": query_key}

# Send a GET request to the query URL
response = requests.get(query_url, headers=headers)

# Check if the response status code is 200 (OK)
if response.status_code == 200:
    # Parse the response as JSON
    data = response.json()
    # Iterate over the value array, which contains the documents
    for document in data["value"]:
        print(document)

{'@search.score': 1.0, 'sentiment': 'negative'}
{'@search.score': 1.0, 'sentiment': 'positive'}
{'@search.score': 1.0, 'sentiment': 'positive'}
{'@search.score': 1.0, 'sentiment': 'positive'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'sentiment': 'neutral'}
{'@search.score': 1.0, 'sentiment': 'negative'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'sentiment': 'positive'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'sentiment': 'negative'}
{'@search.score': 1.0, 'sentiment': 'positive'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'sentiment': 'mixed'}
{'@search.score': 1.0, 'senti

### Performing semantic analysis using Azure Custom Skill.
- Get the sentiment
- Get the key phrases
- Get the full file path name

In [9]:
import requests
import json
import pprint

def query_az_ai_search (api_key,endpoint,index_name,search_query,objecttype="Indexes"):
    # Define the headers for the API request
    headers = {
        'api-key': api_key,  # replace <your-api-key> with your actual API key
        'Content-Type': 'application/json'
    }

    # Make the API request
    az_ai_search_endpoint = f"https://{endpoint}.search.windows.net/Indexes/{index_name}/docs"

    response = requests.get(az_ai_search_endpoint, headers=headers, params=search_query)

    # Parse the JSON response
    data = response.json()

    # Create a pretty printer
    pp = pprint.PrettyPrinter(indent=4)

    # Pretty print the data
    pp.pprint(data)

In [13]:
# Define the parameters for the API request
search_query = {
    'api-version': '2023-11-01',
    'search': '*',
    '$filter': "sentiment eq 'negative'",
    '$select': 'sentiment,keyphrases,locations,url'
}

query_az_ai_search (api_key,endpoint,index_name,search_query)

{   '@odata.context': "https://az-ai-search-sandbox.search.windows.net/indexes('skillup-ai-custom-index-02')/$metadata#docs(*)",
    'value': [   {   '@search.score': 1.0,
                     'keyphrases': [   'Contoso Hotel',
                                       'Dirty rooms',
                                       'rude staff',
                                       'terrible service',
                                       'money'],
                     'locations': ['rooms'],
                     'sentiment': 'negative',
                     'url': 'https://adlesilabs.blob.core.windows.net/az-ai-search-skillset/otherreviews/document_9.txt'},
                 {   '@search.score': 1.0,
                     'keyphrases': [   'good night',
                                       'Contoso hotel',
                                       'air conditioning',
                                       'sleep',
                                       'Seattle',
                                  

In [None]:
!pip install azure-storage-file-datalake

In [14]:
load_dotenv()

storage_account_name = os.getenv('storage_account_name')
storage_account_key = os.getenv('storage_account_key')

print(storage_account_name)

adlesilabs


In [15]:
from azure.storage.filedatalake import DataLakeServiceClient

def initialize_storage_account(storage_account_name, storage_account_key):
    try:  
        global service_client

        service_client = DataLakeServiceClient(account_url="{}://{}.dfs.core.windows.net".format(
            "https", storage_account_name), credential=storage_account_key)

    except Exception as e:
        print(e)

def read_data_from_adls(file_system_name, file_path):
    try:
        file_system_client = service_client.get_file_system_client(file_system=file_system_name)
        file_client = file_system_client.get_file_client(file_path)

        download = file_client.download_file()

        downloaded_bytes = download.readall()

        return downloaded_bytes

    except Exception as e:
        print(e)

In [17]:
initialize_storage_account(storage_account_name, storage_account_key)

file_system_name = 'az-ai-search-skillset/otherreviews'
file_path = 'document_14.txt'

data = read_data_from_adls(file_system_name, file_path)

print(data)


b"If you're looking for a good night's sleep, don't stay at Contoso hotel in Seattle. The bed was uncomfortable and the pillows were lumpy. Plus, the air conditioning was loud and kept me up all night."
