# NAIVE RAG Example

In [2]:
! source .venv/bin/activate
! pip3 install -r requirements.txt



In [4]:
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import DataFrameLoader
from langchain_google_vertexai import VertexAIEmbeddings, VertexAI


import pandas as pd

ModuleNotFoundError: No module named 'langchain_community'

## Donwloading some data

In [2]:
!gsutil cp gs://cloud-samples-data/langchain/common/first_five_netflix_titles.csv .

Copying gs://cloud-samples-data/langchain/common/first_five_netflix_titles.csv...
/ [1 files][  2.0 KiB/  2.0 KiB]                                                
Operation completed over 1 objects/2.0 KiB.                                      


In [2]:
df = pd.read_csv("first_five_netflix_titles.csv")
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [12]:
def merge_content(row):
    return f""" **Title**: {row["title"]}
        **director**: {row["director"]}
        **cast**: {row["cast"]}
        **description**: {row["description"]}    
    """

df["content"] = df.apply(merge_content, axis=1)
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,content
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",**Title**: Dick Johnson Is Dead\n **di...
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",**Title**: Blood & Water\n **director*...
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,**Title**: Ganglands\n **director**: J...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",**Title**: Jailbirds New Orleans\n **d...
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,**Title**: Kota Factory\n **director**...


## Load and split data

In [14]:
loader = DataFrameLoader(df, page_content_column="content", )
documents = loader.load()


In [15]:
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=20)
docs = text_splitter.split_documents(documents)

## Creating embeddings and store in vector store

In [20]:
embeddings_service = VertexAIEmbeddings(model_name="textembedding-gecko@003")


In [21]:
db = FAISS.from_documents(docs, embeddings_service)
print(db.index.ntotal)

5


## Creating RAG chain (Retriever + Generation)

In [33]:
retriever = db.as_retriever(search_kwargs={"k": 1})
prompt = ChatPromptTemplate.from_template("""Answer the question based only on the following context:
{context}

Question: {question}
""")

In [34]:
llm = VertexAI(model_name="gemini-1.0-pro-002")

In [37]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

## lets test it!

In [39]:
chain.invoke("Who is the director from Ganglands?")

'The director of Ganglands is Julien Leclercq. \n'

In [40]:
chain.invoke("Describe the filme that Mayur More was in")

'Based on the provided context, the film "Kota Factory" features Mayur More as a cast member. The film\'s description mentions that it follows the story of an earnest but unexceptional student and his friends as they navigate campus life in a city filled with coaching centers known for training India\'s brightest minds. However, the context does not provide any specific details about the character Mayur More portrays or the film\'s plot beyond the general premise. \n'