In [37]:
import pandas as pd
import chromadb
from chromadb.utils import embedding_functions
from sentence_transformers import SentenceTransformer
from huggingface_hub import InferenceClient
import numpy as np
import os

In [38]:
def text_embedding(text) -> None:
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    return model.encode(text)

In [39]:
def generate_context(query):
    vector=text_embedding(query).tolist()
    
    results=collection.query(    
        query_embeddings=vector,
        n_results=15,
        include=["documents"]
    )
    
    res = "\n".join(str(item) for item in results['documents'][0])
    return res

In [40]:
def chat_completion(system_prompt, user_prompt,length=1000):
    final_prompt=f"""<s>[INST]<<SYS>>
    {system_prompt}
    <</SYS>>
    
    {user_prompt} [/INST]"""
    return client.text_generation(prompt=final_prompt,max_new_tokens = length).strip()

In [41]:
df=pd.read_csv('./data/oscars.csv')
df=df.loc[df['year_ceremony'] == 2023]
df=df.dropna(subset=['film'])
df.loc[:, 'category'] = df['category'].str.lower()
df.loc[:, 'text'] = df['name'] + ' got nominated under the category, ' + df['category'] + ', for the film ' + df['film'] + ' to win the award'
df.loc[df['winner'] == False, 'text'] = df['name'] + ' got nominated under the category, ' + df['category'] + ', for the film ' + df['film'] + ' but did not win'               

In [42]:
client = chromadb.Client()
collection = client.get_or_create_collection("oscars-2023")

In [43]:
docs=df["text"].tolist() 
ids= [str(x) for x in df.index.tolist()]
collection.add(
    documents=docs,
    ids=ids
)

Insert of existing embedding ID: 10639
Insert of existing embedding ID: 10640
Insert of existing embedding ID: 10641
Insert of existing embedding ID: 10642
Insert of existing embedding ID: 10643
Insert of existing embedding ID: 10644
Insert of existing embedding ID: 10645
Insert of existing embedding ID: 10646
Insert of existing embedding ID: 10647
Insert of existing embedding ID: 10648
Insert of existing embedding ID: 10649
Insert of existing embedding ID: 10650
Insert of existing embedding ID: 10651
Insert of existing embedding ID: 10652
Insert of existing embedding ID: 10653
Insert of existing embedding ID: 10654
Insert of existing embedding ID: 10655
Insert of existing embedding ID: 10656
Insert of existing embedding ID: 10657
Insert of existing embedding ID: 10658
Insert of existing embedding ID: 10659
Insert of existing embedding ID: 10660
Insert of existing embedding ID: 10661
Insert of existing embedding ID: 10662
Insert of existing embedding ID: 10663
Insert of existing embedd

In [44]:
URI='http://139.84.142.100:8080'
client = InferenceClient(model=URI)

In [58]:
#query="What did Ke Huy Quan work on?"
#query="Which movie won the best music award?"
query="Did Lady Gaga win an award at Oscars 2023?"
#query="Who is the music director of RRR?"
context=generate_context(query)

In [59]:
context

'Monika Willi got nominated under the category, film editing, for the film Tár but did not win\nMary Zophres got nominated under the category, costume design, for the film Babylon but did not win\nMusic and Lyric by Lady Gaga and BloodPop got nominated under the category, music (original song), for the film Top Gun: Maverick but did not win\nMark Coulier, Jason Baird and Aldo Signoretti got nominated under the category, makeup and hairstyling, for the film Elvis but did not win\nSara Gunnarsdóttir and Pamela Ribon got nominated under the category, short film (animated), for the film My Year of Dicks but did not win\nDede Gardner, Jeremy Kleiner and Frances McDormand, Producers got nominated under the category, best picture, for the film Women Talking but did not win\nBaz Luhrmann, Catherine Martin, Gail Berman, Patrick McCormick and Schuyler Weiss, Producers got nominated under the category, best picture, for the film Elvis but did not win\nLaura Poitras, Howard Gertler, John Lyons, Na

In [60]:
system_prompt="""\
You are a helpful AI assistant that can answer questions on Oscar 2023 awards. Answer based on the context provided. If you cannot find the correct answerm, say I don't know. Be concise and just include the response.
"""

In [61]:
user_prompt=f"""
Based on the context:
{context}
Answer the below query:
{query}
"""

In [62]:
user_prompt

'\nBased on the context:\nMonika Willi got nominated under the category, film editing, for the film Tár but did not win\nMary Zophres got nominated under the category, costume design, for the film Babylon but did not win\nMusic and Lyric by Lady Gaga and BloodPop got nominated under the category, music (original song), for the film Top Gun: Maverick but did not win\nMark Coulier, Jason Baird and Aldo Signoretti got nominated under the category, makeup and hairstyling, for the film Elvis but did not win\nSara Gunnarsdóttir and Pamela Ribon got nominated under the category, short film (animated), for the film My Year of Dicks but did not win\nDede Gardner, Jeremy Kleiner and Frances McDormand, Producers got nominated under the category, best picture, for the film Women Talking but did not win\nBaz Luhrmann, Catherine Martin, Gail Berman, Patrick McCormick and Schuyler Weiss, Producers got nominated under the category, best picture, for the film Elvis but did not win\nLaura Poitras, Howar

In [63]:
chat_completion(system_prompt,user_prompt)

'No, Lady Gaga did not win an award at the Oscars 2023.'