In [5]:
import json
import logging
import time
from azure.cosmos.aio import CosmosClient
from azure.cosmos import  PartitionKey, exceptions
from openai import AzureOpenAI
from time import sleep
import time
import json
import pandas as pd
import uuid
import gradio as gr
from azure.cosmos import exceptions, PartitionKey #CosmosClient
from azure.cosmos.aio import CosmosClient
from azure.keyvault.secrets import SecretClient
import os
import azure.identity
from azure.identity import DefaultAzureCredential

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
try:
    keyVaultName = os.environ["KEY_VAULT_NAME"]
except KeyError:
    # Get input from user if not set
    keyVaultName = input("Please enter your Key Vault name: ")
    # Save for future cells in this session
    os.environ["KEY_VAULT_NAME"] = keyVaultName


keyVaultName = os.environ["KEY_VAULT_NAME"]
KVUri = f"https://{keyVaultName}.vault.azure.net"

credential = DefaultAzureCredential()
client = SecretClient(vault_url=KVUri, credential=credential)

In [7]:
"""
This code loads and sets the necessary variables for Azure services.
The variables are loaded from Azure Key Vault.
"""
# Open AI
azure_openai_endpoint=client.get_secret(name="aoai-endpoint").value
azure_openai_api_key=client.get_secret(name="aoai-api-key").value
azure_openai_api_version = "2024-02-15-preview"
# Embedding
azure_openai_embedding_deployment = "text-embedding-3-small"
azure_openai_embedding_model =client.get_secret(name="aoai-embedding-model").value
azure_openai_vector_dimension = 1536

cosmosdb_url =client.get_secret(name="cosmosdb-url").value

cosmosdb_database_name = "moviesdb"
cosmosdb_collection_name = "movies"
cosmos_vector_property_name = "vector"
cosmosdb_chathistory_cache_name = "chat_cache"

In [8]:
cosmos_client = CosmosClient(url=cosmosdb_url, credential=credential)
azure_openai_client = AzureOpenAI(
    api_key=azure_openai_api_key,
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
)

####Create a database and containers with vector policies

In [9]:
# create a database
db = await cosmos_client.create_database_if_not_exists(id=cosmosdb_database_name)

In [10]:
# Create the vector embedding policy
vector_embedding_policy = {
    "vectorEmbeddings": [
        {
            "path": "/" + cosmos_vector_property_name,
            "dataType": "float32",
            "distanceFunction": "dotproduct",
            "dimensions": 1536
        }
    ]
}


# Create the vector index policy
indexing_policy = {
    "includedPaths": [{"path": "/*"}],
    "excludedPaths": [
        {"path": '/"_etag"/?', "path": "/" + cosmos_vector_property_name + "/*"}
    ],
    "vectorIndexes": [
        {"path": "/" + cosmos_vector_property_name, "type": "quantizedFlat"}
    ],
}

In [11]:
# create the collection using the vector index policies
try:
    pass
    container = await db.create_container_if_not_exists(
        id=cosmosdb_collection_name,
        partition_key=PartitionKey(path="/id"),
        vector_embedding_policy=vector_embedding_policy
    )
    print('Container with id \'{0}\' created'.format(id))
except exceptions.CosmosHttpResponseError:
    raise

Container with id '<built-in function id>' created


In [12]:
# create the cache collection for storing chat history using the vector index policies
try:
    pass
    container_cache = await db.create_container_if_not_exists(
        id=cosmosdb_chathistory_cache_name,
        partition_key=PartitionKey(path="/id"),
        vector_embedding_policy=indexing_policy
    )
    print('Container with id \'{0}\' created'.format(id))
except exceptions.CosmosHttpResponseError:
    raise

Container with id '<built-in function id>' created


In [None]:
# develop function to embed data attributes.

def generate_embeddings(text: str):
    """
    Generate embeddings from string of text.
    This will be used to vectorize data and user input for interactions with Azure OpenAI.
    """
    response = azure_openai_client.embeddings.create(
        input=text, model=azure_openai_embedding_deployment
    )
    embeddings = response.model_dump()
    return embeddings["data"][0]["embedding"]

####Load data and upload to azure cosmos db container
####Azure Cosmos DB Python SDK does not currently support bulk inserts so we'll have to insert the items sequentially

In [16]:
df = pd.read_csv("data\movie_dataset.csv")

In [20]:
df.head(1)

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron


In [28]:
len(df)

4803

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   index                 4803 non-null   int64  
 1   budget                4803 non-null   int64  
 2   genres                4775 non-null   object 
 3   homepage              1712 non-null   object 
 4   id                    4803 non-null   int64  
 5   keywords              4391 non-null   object 
 6   original_language     4803 non-null   object 
 7   original_title        4803 non-null   object 
 8   overview              4800 non-null   object 
 9   popularity            4803 non-null   float64
 10  production_companies  4803 non-null   object 
 11  production_countries  4803 non-null   object 
 12  release_date          4802 non-null   object 
 13  revenue               4803 non-null   int64  
 14  runtime               4801 non-null   float64
 15  spoken_languages     

In [32]:
async def upload_data_to_cosmosdb():

    df = pd.read_csv("data\movie_dataset.csv")

    # Convert id to string in the DataFrame itself (recommended approach)
    df["id"] = df["id"].astype(str)

    # Capture just 5 records from the pyspark.pandas DataFrame
    df = df.head(50)

    # Convert the captured records to JSON
    json_records = df.to_json(orient='records')

    # Convert JSON string to JSON object
    json_object = json.loads(json_records)

    # Generate embeddings for title and content fields and upload to cosmos db collection/container
    n = 0
    for item in json_object:
        n+=1
        id = item['id']
        overview = item["overview"]
        tagline = item["tagline"]
        title = item["title"]
        overview_embeddings = generate_embeddings(overview)
        item["vector"] = overview_embeddings
        item["@search.action"] = "upload"
        #print("Creating embeddings for item:", n, "/" ,len(json_object), end='\r')
        #print(".....uploading documents")
        await container.upsert_item(body=item)
    print("Inserted {} documents into collection.".format(n))

In [33]:
# run function
json_data = "data\movie_dataset.csv"
df_ = pd.read_csv(json_data)
if len(df_.head(99)) < 100:
    pass
    await upload_data_to_cosmosdb()
else:
    print("The number of records is greater than 100. The function will not run.")

Inserted 50 documents into collection.


####Vector search in Azure Cosmos DB for NoSQL
#### Simple a function that will take in user's query, generate embeddings for the query text and then use the embedding to run a vector search to find the similar items. The most similar items must be used as additional knowledgebase for the completions model to answer the user's query

In [0]:
# https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/how-to-python-vector-index-query

In [15]:
# Search retrieval function
async def vector_search(query, container, similarity_score=0.03, num_results=2):
    query_embedding = generate_embeddings(query)
    search_result = ""
    results = container.query_items(
            query='SELECT TOP @num_results c.overview, c.title, VectorDistance(c.vector,@embedding) AS SimilarityScore  FROM c WHERE VectorDistance(c.vector,@embedding) > @similarity_score ORDER BY VectorDistance(c.vector,@embedding)',
            parameters=[
                {"name": "@embedding", "value": query_embedding}, 
                {"name": "@num_results", "value": num_results},
                {"name": "@similarity_score", "value": similarity_score} 
            ],
            #enable_cross_partition_query=True
            )
    formatted_results = []
    async for result in results: 
        score = result.pop("SimilarityScore")
        formatted_result = {
            'SimilarityScore': score,
            "document": result
        }
        formatted_results.append(formatted_result)

    return formatted_results

In [16]:
# test the search retrieval function

query= "do you have a fantasy movie?"
results = await vector_search(query=query, container=container, num_results=2)
results

[{'SimilarityScore': 0.3094660837231652,
  'document': {'overview': 'The story of an ancient war that is reignited when a young farmhand unwittingly opens a gateway between our world and a fearsome race of giants. Unleashed on the Earth for the first time in centuries, the giants strive to reclaim the land they once lost, forcing the young man, Jack into the battle of his life to stop them. Fighting for a kingdom, its people, and the love of a brave princess, he comes face to face with the unstoppable warriors he thought only existed in legend–and gets the chance to become a legend himself.',
   'title': 'Jack the Giant Slayer'}},
 {'SimilarityScore': 0.30230984528471677,
  'document': {'overview': 'In 1933 New York, an overly ambitious movie producer coerces his cast and hired ship crew to travel to mysterious Skull Island, where they encounter Kong, a giant ape who is immediately smitten with the leading lady.',
   'title': 'King Kong'}}]

In [17]:
async def get_chat_history(container_cache, completions=1):
    results = container_cache.query_items(
        query= '''
        SELECT TOP @completions *
        FROM c
        ORDER BY c._ts DESC
        ''',
        parameters=[
            {"name": "@completions", "value": completions},
        ])
    items = []
    async for result in results:
        items.append(result)
    
    return items

In [18]:
try:
  pass
  chat_hist = await get_chat_history(container_cache=container_cache)
  print(chat_hist[0]["completion"])
except Exception as e:
  print(e)

{"Title": "The Dark Knight Rises", "Overview": "Following the death of District Attorney Harvey Dent, Batman assumes responsibility for Dent's crimes to protect the late attorney's reputation and is subsequently hunted by the Gotham City Police Department. Eight years later, Batman encounters the mysterious Selina Kyle and the villainous Bane, a new terrorist leader who overwhelms Gotham's finest. The Dark Knight resurfaces to protect a city that has branded him an enemy."}


In [19]:
azure_openai_deployment = "gpt-4o"

In [21]:
#This function grounds the model with system prompts, user queries and vector search results to enable accurate and relevant responses.

def generate_completion(vector_search_results, user_input, chat_history_list):
    system_prompt = '''
    You are an intelligent assistant for imdb movies.
    You are designed to provide helpful answers to user questions about movies given the information about provided.
        - Only answer questions related to the information provided below in the context.
        - Write the response as key value pairs.
        Example:
            Title: Matrix
            Overview: A movie about a man who is awoken from his sleep and finds himself in a strange new
        - If you're unsure of an answer, you can say ""I don't know"" or ""I'm not sure"" and recommend users search themselves."
        - Only provide answers that have movie titles and overview that are part of the provided context.
    '''
    # add system prompt
    messages=[{"role": "system", "content": system_prompt}]
    #chat history
    for chat in chat_history_list:
        messages.append({'role': 'user', 'content': chat['prompt'] + " " + chat['completion']})
    #print("Temp Chat history Messages going to openai:\n", messages)
    # add current user_input
    messages.append({"role": "user", "content": user_input})
    for item in vector_search_results:
        messages.append({"role": "system", "content": json.dumps(item["document"])})
        #print("####")
    #print("Vector search Messages going to openai:\n", messages)
    response = azure_openai_client.chat.completions.create(model=azure_openai_deployment, messages=messages,temperature=0.1)
    
    return response

In [22]:
# test the model generation function

question = "do you have a batman movie?"
#user_input = "tell me about a tom cruise movie?"
search_results = await vector_search(query=question, container=container, similarity_score=0.03, num_results=2)
x = generate_completion(vector_search_results=search_results, user_input=question, chat_history_list=await get_chat_history(container_cache=container_cache, completions=1))

# x
# print("\n\n")
x.to_dict()["choices"][0]["message"]["content"]

"Title: The Dark Knight Rises  \nOverview: Following the death of District Attorney Harvey Dent, Batman assumes responsibility for Dent's crimes to protect the late attorney's reputation and is subsequently hunted by the Gotham City Police Department. Eight years later, Batman encounters the mysterious Selina Kyle and the villainous Bane, a new terrorist leader who overwhelms Gotham's finest. The Dark Knight resurfaces to protect a city that has branded him an enemy."

In [23]:
search_results[0]["document"]

{'overview': "Following the death of District Attorney Harvey Dent, Batman assumes responsibility for Dent's crimes to protect the late attorney's reputation and is subsequently hunted by the Gotham City Police Department. Eight years later, Batman encounters the mysterious Selina Kyle and the villainous Bane, a new terrorist leader who overwhelms Gotham's finest. The Dark Knight resurfaces to protect a city that has branded him an enemy.",
 'title': 'The Dark Knight Rises'}

In [None]:
async def save_chat_history(container_cache, user_input, user_input_embedding, completion_results):
    chat_history_object = {
        "id": str(uuid.uuid4()),
        "prompt": user_input,
        "completion": completion_results.to_dict()["choices"][0]["message"]["content"],
        "completionTokens": str(completion_results.to_dict()["usage"]["completion_tokens"]),
        'promptTokens': str(completion_results.to_dict()['usage']['prompt_tokens']),
        'totalTokens': str(completion_results.to_dict()['usage']['total_tokens']),
        'model': completion_results.to_dict()['model'],
        'vector': user_input_embedding
        }
    try:
        pass
        # Insert the chat document into the Cosmos DB container
        await container_cache.create_item(body=chat_history_object)
        #print("item inserted into cache.", chat_history_object)
    except Exception as e:
        print(e)


# Check if the question has been asked before and retrieve response from cache container
async def get_cache(container, vectors, similarity_score=0.02, num_results=1):
    # Execute the query
    formatted_results = []
    results = container.query_items(
        query= '''
        SELECT TOP @num_results *
        FROM c
        WHERE VectorDistance(c.vector,@embedding) > @similarity_score
        ORDER BY VectorDistance(c.vector,@embedding)
        ''',
        parameters=[
            {"name": "@embedding", "value": vectors},
            {"name": "@num_results", "value": num_results},
            {"name": "@similarity_score", "value": similarity_score},
        ], populate_query_metrics=True)
    #results = list(results)
    #print(results)
    async for result in results: 
        #print(f"Similarity Score: {result['SimilarityScore']}") 
        formatted_results.append(result['completion'])

    return formatted_results

# Test the get chat history function
await get_cache(container=container_cache, vectors=generate_embeddings(text="do you have a spy movie?"), similarity_score=0.99)

[]

#### Loop function to perform Q&A over the sample movie data! It uses the user input, search result to generate the model output.

#### Also, add a chat history cache container/collection to the cosmos db database. This will first be checked before routing queries to the LLM.

In [None]:
async def chat_loop():
    print("*** Please ask your model questions about imdb movies. Type 'end' to end the session.")
    while True:
        try:
            user_input = input("User prompt: ").lower()
                
            if user_input.lower() in ["quit", "exit", "q", "end"]:
                print("\n\nExiting chat..")
                print("Good bye, please let me know if you need further help.")
                break
                
            user_input_embeddings = generate_embeddings(text=user_input)
            
            # Query the chat history cache first to see if this question has been asked before
            cache_results = await get_cache(container=container_cache, vectors=user_input_embeddings, similarity_score=0.02, num_results=1)
            if len(cache_results) > 0:
                print("Cached Result\n")
                print(cache_results[0])
                continue  # Skip to next iteration - no need to do vector search or call LLM
            
            # If no cache hit, perform vector search and generate new completion
            search_results = await vector_search(query=user_input, container=container, similarity_score=0.03, num_results=2)
            chat_history_list = await get_chat_history(container_cache=container_cache, completions=1)
            completion_results = generate_completion(vector_search_results=search_results, user_input=user_input, chat_history_list=chat_history_list)
            print("Completion Result\n")
            
            # Save the chat history to cosmos db
            await save_chat_history(container_cache=container_cache, user_input=user_input, user_input_embedding=user_input_embeddings, completion_results=completion_results)
            print(completion_results.to_dict()["choices"][0]["message"]["content"])    
        except Exception as e:
            print(e)

In [None]:
await chat_loop()

*** Please ask your model questions about imdb movies. Type 'end' to end the session.
Cached Result

Title: Battleship  
Overview: When mankind beams a radio signal into space, a reply comes from ‘Planet G’, in the form of several alien crafts that splash down in the waters off Hawaii. Lieutenant Alex Hopper is a weapons officer assigned to the USS John Paul Jones, part of an international naval coalition which becomes the world's last hope for survival as they engage the hostile alien force of unimaginable strength. While taking on the invaders, Hopper must also try to live up to the potential his brother, and his fiancée's father, Admiral Shane, expect of him.


Exiting chat..
Good bye, please let me know if you need further help.


#### Create a Gradio UI for the Chatbot

In [0]:
# Create a loop of user input and model output. You can now perform Q&A over the sample data!

async def chat_bot_function(user_input):
    
    #print("*** Please ask your model questions about imdb movies. Type 'end' to end the session.")
    try:
        user_input = user_input.lower()
        # Query the chat history cache first to see if this question has been asked before
        user_input_embeddings = generate_embeddings(text=user_input)
        cache_results = await get_cache(container=container_cache, vectors=user_input_embeddings, similarity_score=0.99, num_results=1)
        if len(cache_results) > 0:
            print("Cached Result\n")
            return cache_results[0]

        search_results = await vector_search(query=user_input, container=container, similarity_score=0.8, num_results=1)
        chat_history_list = await get_chat_history(container_cache=container_cache, completions=1)
        completion_results = generate_completion(vector_search_results=search_results, user_input=user_input, chat_history_list=chat_history_list)
        await save_chat_history(container_cache=container_cache, user_input=user_input, user_input_embedding=user_input_embeddings, completion_results=completion_results)
        response = completion_results.to_dict()["choices"][0]["message"]["content"]
        print("Completion Result")
        return response
    except Exception as e:
        print(e)

In [0]:
query = "do you have a spy movie?"
await chat_bot_function(user_input=query)

In [0]:
def generate_response(message, history):
    formatted_history = []
    for user, assistant in history:
        formatted_history.append({"role": "user", "content": user })
        formatted_history.append({"role": "assistant", "content":assistant})

    formatted_history.append({"role": "user", "content": message})
  
    response = client.chat.completions.create(model='gpt-3.5-turbo',
    messages= formatted_history,
    temperature=1.0)

    return response.choices[0].message.content

In [0]:
gr.ChatInterface(
    fn=chat_bot_function,
    chatbot=gr.Chatbot(label="Assistant", height=250),
    textbox=gr.Textbox(placeholder="Ask me a question about any movie", scale=7),
    title="RAG Movie Recommender",
    #description="I will try to answer your movie related questions as accurately as possible",
    theme="soft",
    retry_btn=None,
    undo_btn="Delete Previous",
).launch(share=True)