<a href="https://colab.research.google.com/github/arishp/srm-ap-genai-2024/blob/main/02_Intro_to_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Document Loaders

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
from langchain.document_loaders import TextLoader

loader = TextLoader('sample.txt')
documents = loader.load()
len(documents)

1

# Document Transformers

In [3]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(separator=" ", chunk_size=300, chunk_overlap=50)
texts = text_splitter.split_documents(documents)
len(texts)

129

In [4]:
texts[:2]

[Document(metadata={'source': 'sample.txt'}, page_content='Dhoni" redirects here. For other uses, see Dhoni (disambiguation).\nLieutenant colonel\nMahendra Singh Dhoni\n\nDhoni in 2023\nPersonal details\nBorn\t7 July 1981 (age 43)\nRanchi, Bihar (present-day Jharkhand), India\nHeight\t5 ft 9 in (175 cm)[1]\nSpouse\tSakshi Dhoni\nAwards\t\n Padma Bhushan (2018)\n Padma Shri'),
 Document(metadata={'source': 'sample.txt'}, page_content='Dhoni\nAwards\t\n Padma Bhushan (2018)\n Padma Shri (2009)\nMajor Dhyan Chand Khel Ratna Award (2008)\nNickname(s)\tMahi, Thala, Captain Cool[2]\nMilitary service\nAllegiance\t India\nBranch/service\t Indian Army\nYears of service\t2011–present\nRank\t Lieutenant colonel\nUnit\t Territorial Army\nPersonal')]

# Text Embedding Models

In [5]:
# from langchain_huggingface import HuggingFaceEmbeddings
# embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [7]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings=GoogleGenerativeAIEmbeddings(model='models/embedding-001')

In [8]:
from langchain_community.vectorstores import Chroma
db = Chroma.from_documents(texts, embeddings)

# Vector Stores

In [15]:
from langchain.vectorstores import Chroma
db = Chroma.from_documents(texts, embeddings)

In [10]:
# db._collection.get(include=['embeddings'])

# Retrievers

In [11]:
retriever = db.as_retriever(search_kwargs={"k":4})

In [12]:
retriever

VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x138ad3f40>, search_kwargs={'k': 4})

# Question 1

In [13]:
question = input("enter your question?")

In [14]:
question

'who is dhoni married to?'

In [15]:
# docs = retriever.get_relevant_documents(question)
docs = retriever.invoke(question)

In [16]:
docs

[Document(metadata={'source': 'sample.txt'}, page_content='of being a successful leader.[176][177] Dhoni is also known for his cool-headed demeanor on the field which has earned him the monicker "Captain cool".[178]\n\nPersonal life\nDhoni married Sakshi Singh Rawat on 4 July 2010 in Dehradun.[179][180] Dhoni and his wife have a daughter, Ziva Dhoni who was'),
 Document(metadata={'source': 'sample.txt'}, page_content='(now in Jharkhand) in a Hindu Rajput family to Pan Singh and Devaki Devi.[3] His parents hailed from Lwali village in Uttar Pradesh (now Uttarakhand) and he was the youngest of three children.[4][5][6] His family spells the surname as "Dhauni".[7] The spelling "Dhoni" emerged due to a spelling'),
 Document(metadata={'source': 'sample.txt'}, page_content='in India Cements Ltd, the company owned by former BCCI president N. Srinivasan.[145][146] Dhoni is a co-owner of Chennai-based football club Chennaiyin FC, a franchise of the Indian Super League.[147] He also co-owns Ranc

In [17]:
docs[1].page_content

'(now in Jharkhand) in a Hindu Rajput family to Pan Singh and Devaki Devi.[3] His parents hailed from Lwali village in Uttar Pradesh (now Uttarakhand) and he was the youngest of three children.[4][5][6] His family spells the surname as "Dhauni".[7] The spelling "Dhoni" emerged due to a spelling'

In [18]:
relevant_text = ''
for doc in docs:
  relevant_text += doc.page_content
print(relevant_text)

of being a successful leader.[176][177] Dhoni is also known for his cool-headed demeanor on the field which has earned him the monicker "Captain cool".[178]

Personal life
Dhoni married Sakshi Singh Rawat on 4 July 2010 in Dehradun.[179][180] Dhoni and his wife have a daughter, Ziva Dhoni who was(now in Jharkhand) in a Hindu Rajput family to Pan Singh and Devaki Devi.[3] His parents hailed from Lwali village in Uttar Pradesh (now Uttarakhand) and he was the youngest of three children.[4][5][6] His family spells the surname as "Dhauni".[7] The spelling "Dhoni" emerged due to a spellingin India Cements Ltd, the company owned by former BCCI president N. Srinivasan.[145][146] Dhoni is a co-owner of Chennai-based football club Chennaiyin FC, a franchise of the Indian Super League.[147] He also co-owns Ranchi-based hockey club Ranchi Rays, a franchise of the Hockey India League.[148]and his wife have a daughter, Ziva Dhoni who was born on 6 February 2015.[181][182] He lives in his farmhouse 

In [19]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [20]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

In [21]:
from langchain.prompts import PromptTemplate

template = """Answer the question in one line using the following information:

```{information}```.


*** Question ***

{question}

*** Answer ***"""

prompt = PromptTemplate.from_template(template=template)

In [22]:
question

'who is dhoni married to?'

In [23]:
relevant_text

'of being a successful leader.[176][177] Dhoni is also known for his cool-headed demeanor on the field which has earned him the monicker "Captain cool".[178]\n\nPersonal life\nDhoni married Sakshi Singh Rawat on 4 July 2010 in Dehradun.[179][180] Dhoni and his wife have a daughter, Ziva Dhoni who was(now in Jharkhand) in a Hindu Rajput family to Pan Singh and Devaki Devi.[3] His parents hailed from Lwali village in Uttar Pradesh (now Uttarakhand) and he was the youngest of three children.[4][5][6] His family spells the surname as "Dhauni".[7] The spelling "Dhoni" emerged due to a spellingin India Cements Ltd, the company owned by former BCCI president N. Srinivasan.[145][146] Dhoni is a co-owner of Chennai-based football club Chennaiyin FC, a franchise of the Indian Super League.[147] He also co-owns Ranchi-based hockey club Ranchi Rays, a franchise of the Hockey India League.[148]and his wife have a daughter, Ziva Dhoni who was born on 6 February 2015.[181][182] He lives in his farmho

In [24]:
prompt_formatted_str: str = prompt.format(question=question, information=relevant_text)

In [25]:
prompt_formatted_str

'Answer the question in one line using the following information:\n\n```of being a successful leader.[176][177] Dhoni is also known for his cool-headed demeanor on the field which has earned him the monicker "Captain cool".[178]\n\nPersonal life\nDhoni married Sakshi Singh Rawat on 4 July 2010 in Dehradun.[179][180] Dhoni and his wife have a daughter, Ziva Dhoni who was(now in Jharkhand) in a Hindu Rajput family to Pan Singh and Devaki Devi.[3] His parents hailed from Lwali village in Uttar Pradesh (now Uttarakhand) and he was the youngest of three children.[4][5][6] His family spells the surname as "Dhauni".[7] The spelling "Dhoni" emerged due to a spellingin India Cements Ltd, the company owned by former BCCI president N. Srinivasan.[145][146] Dhoni is a co-owner of Chennai-based football club Chennaiyin FC, a franchise of the Indian Super League.[147] He also co-owns Ranchi-based hockey club Ranchi Rays, a franchise of the Hockey India League.[148]and his wife have a daughter, Ziva 

In [27]:
response = llm.invoke(prompt_formatted_str)

In [28]:
response.content

'Dhoni is married to Sakshi Singh Rawat.'