<a href="https://colab.research.google.com/github/beyg1/Q3/blob/main/Rag_CSV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qU langchain chromadb langchain_community langchain-google-genai

In [2]:
# Step 2: Import Required Modules
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Chroma
from langchain.document_loaders import CSVLoader
from langchain.prompts import PromptTemplate

In [3]:
# Step 3: Download the dataset and load it locally
import requests


file_path = '/content/travel_destination_guide.csv'

loader = CSVLoader(file_path='/content/travel_destination_guide.csv')
documents = loader.load()

In [None]:
documents

In [10]:
from langchain.text_splitter import CharacterTextSplitter
pages = [doc.page_content for doc in documents] #Extract page-content from doc
# Initialize character text splitter
text_splitter = CharacterTextSplitter(chunk_size=500,chunk_overlap=0)
chunks = text_splitter.create_documents(pages)
print(f"Number of Chunks: {len(chunks)}")

Number of Chunks: 1000


In [11]:
chunks[0]

Document(metadata={}, page_content='Destination: Destination 1\nActivities: Activity 1, Activity 2, Activity 3\nClimate: Warm\nDescription: Description for Destination 1')

In [12]:
# Step 4: Initialize Embeddings
from google.colab import userdata
key = userdata.get('GOOGLE_API_KEY')
embedding_model = GoogleGenerativeAIEmbeddings(google_api_key=key,  model="models/text-embedding-004")

In [13]:
# Step 5: Create a Chroma Vector Store
vector_store = Chroma.from_documents(documents, embedding=embedding_model)

In [15]:
llm = ChatGoogleGenerativeAI(google_api_key=key,
                                   model="gemini-1.5-flash")

In [16]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

print(type(retriever))

<class 'langchain_core.vectorstores.base.VectorStoreRetriever'>


In [17]:
sample_docs = retriever.get_relevant_documents("Climate")
for doc in sample_docs:
    print(doc.page_content)

  sample_docs = retriever.get_relevant_documents("Climate")


Destination: Destination 123
Activities: Activity 3, Activity 4, Activity 5
Climate: Cold
Description: Description for Destination 123
Destination: Destination 141
Activities: Activity 1, Activity 2, Activity 3
Climate: Cold
Description: Description for Destination 141
Destination: Destination 147
Activities: Activity 7, Activity 8, Activity 9
Climate: Cold
Description: Description for Destination 147
Destination: Destination 144
Activities: Activity 4, Activity 5, Activity 6
Climate: Cold
Description: Description for Destination 144
Destination: Destination 102
Activities: Activity 2, Activity 3, Activity 4
Climate: Cold
Description: Description for Destination 102


In [18]:
from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
chat_template = ChatPromptTemplate.from_messages([
    # System Message Prompt Template
    SystemMessage(content="""You are a Helpful Travel Agent.
                  Given a context and question from user,
                  you should answer based on the given context."""),
    # Human Message Prompt Template
    HumanMessagePromptTemplate.from_template("""Answer the question based on the given context.
    Context: {context}
    Question: {question}
    Answer: """)
])

output_parser = StrOutputParser()

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | chat_template
    | llm
    | output_parser
)

In [19]:
#  Irrelevant retreival
response = rag_chain.invoke("""Please summarize Leave No Context Behind:
                             Efficient Infinite Context Transformers with Infini-attention""")

response

'I\'m sorry, but the provided text gives information about various travel destinations but contains no information about "Leave No Context Behind: Efficient Infinite Context Transformers with Infini-attention".  Therefore, I cannot answer your question.'

In [20]:
response = rag_chain.invoke("""What are top 3 destinations with warm climate and fun activities?""")

response

'Based on the provided text, the top 3 destinations with a warm climate and fun activities are Destination 391, Destination 331, and Destination 361.  They all share Activities 1, 2, and 3, and have a warm climate.'

In [21]:
# Step 7: Set Up Retrieval-Augmented Generation (RAG) Chain
from langchain.chains import RetrievalQA
faq_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever,chain_type="stuff",)

In [24]:
# Step 9: Ask a Question
question = "Suggest Top destination with moderate climate"
response = faq_chain.run(question)

print("Response:", response)

Response: Based on the provided data, I cannot definitively say which is the "top" destination with a moderate climate, as there is no ranking or preference information given.  However, Destinations 2, 92, and 62 all have moderate climates.
