In [None]:
import pandas as pd
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from dotenv import load_dotenv
import os
import pandas as pd
import os
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

In [25]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

In [7]:
df = pd.read_csv('final_cocktails.csv')
df.columns

Index(['Unnamed: 0', 'id', 'name', 'alcoholic', 'category', 'glassType',
       'instructions', 'drinkThumbnail', 'ingredients', 'ingredientMeasures',
       'text'],
      dtype='object')

In [8]:
df.head()

Unnamed: 0.1,Unnamed: 0,id,name,alcoholic,category,glassType,instructions,drinkThumbnail,ingredients,ingredientMeasures,text
0,0,0,A1,Alcoholic,Cocktail,Cocktail glass,"Pour all ingredients into a cocktail shaker, m...",https://www.thecocktaildb.com/images/media/dri...,"['Gin', 'Grand Marnier', 'Lemon Juice', 'Grena...","['1 3/4 shot ', '1 Shot ', '1/4 Shot', '1/8 Sh...",question Generate a cocktail with Gin Grand Ma...
1,1,1,ABC,Alcoholic,Shot,Shot glass,Layered in a shot glass.,https://www.thecocktaildb.com/images/media/dri...,"['Amaretto', 'Baileys irish cream', 'Cognac']","['1/3 ', '1/3 ', '1/3 ']",question Generate a cocktail with Amaretto Bai...
2,2,2,Ace,Alcoholic,Cocktail,Martini Glass,Shake all the ingredients in a cocktail shaker...,https://www.thecocktaildb.com/images/media/dri...,"['Gin', 'Grenadine', 'Heavy cream', 'Milk', 'E...","['2 shots ', '1/2 shot ', '1/2 shot ', '1/2 sh...",question Generate a cocktail with Gin Grenadin...
3,3,3,ACID,Alcoholic,Shot,Shot glass,Poor in the 151 first followed by the 101 serv...,https://www.thecocktaildb.com/images/media/dri...,"['151 proof rum', 'Wild Turkey']","['1 oz Bacardi ', '1 oz ']",question Generate a cocktail with 151 proof ru...
4,4,4,Adam,Alcoholic,Ordinary Drink,Cocktail glass,"In a shaker half-filled with ice cubes, combin...",https://www.thecocktaildb.com/images/media/dri...,"['Dark rum', 'Lemon juice', 'Grenadine']","['2 oz ', '1 oz ', '1 tsp ']",question Generate a cocktail with Dark rum Lem...


In [20]:
df["combined_text"] = df.apply(
    lambda row: " ".join([f"{key}: {row[key]}" for key in df.columns if key != "id"]),
    axis=1
)

In [27]:
embedding_model = OpenAIEmbeddings(model="text-embedding-ada-002")

  embedding_model = OpenAIEmbeddings(model="text-embedding-ada-002")


In [28]:
documents = [
    Document(page_content=row["combined_text"], metadata=row.to_dict())
    for _, row in df.iterrows()
]


In [29]:
faiss_index = FAISS.from_documents(documents, embedding_model)


In [31]:
faiss_index.save_local("faiss_index")

In [32]:
retriever = faiss_index.as_retriever(search_kwargs={"k": 5})  # Retrieve top 5 documents


In [34]:
from langchain.chat_models import init_chat_model
model = init_chat_model("gpt-4o-mini", model_provider="openai", api_key=api_key)


In [58]:
template = """Answer the question based only on the following context and without bullet points containing only short answers:
{context}

Question: {question}
Answer:"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context", "question"]
)

In [59]:
rag_chain = RetrievalQA.from_chain_type(
    llm=model,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

In [60]:
def query_rag(question):
    result = rag_chain({"query": question})
    return {
        "answer": result["result"],
        "source_documents": [(doc.page_content, doc.metadata) for doc in result["source_documents"]]
    }

In [61]:
response = query_rag("What are the 5 non-alcoholic cocktails containing sugar?")
print(response["answer"])

The five non-alcoholic cocktails containing sugar are Orangeade, Iced Coffee, Fruit Cooler, Limeade, and Sweet Sangria.
