<a href="https://colab.research.google.com/github/arishp/srm-ap-genai-2024/blob/main/02_Intro_to_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_huggingface import HuggingFaceEndpoint
from langchain_huggingface import HuggingFaceEmbeddings

# Document Loaders

In [3]:
loader = TextLoader('sample.txt')
documents = loader.load()
len(documents)

1

# Document Transformers

In [4]:
text_splitter = CharacterTextSplitter(separator=" ", chunk_size=200, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
len(texts)

Created a chunk of size 259, which is longer than the specified 200


152

In [5]:
texts[:2]

[Document(metadata={'source': 'sample.txt'}, page_content='Mahendra Singh Dhoni (/məˈheɪndrə ˈsɪŋ dhæˈnɪ/ ⓘ; born 7 July 1981) is an Indian professional cricketer who plays as a right-handed batter and a wicket-keeper. Widely regarded as one of the most'),
 Document(metadata={'source': 'sample.txt'}, page_content='prolific wicket-keeper batsmen and captains, he represented the Indian cricket team and was the captain of the side in limited overs formats from 2007 to 2017 and in test cricket from 2008 to 2014.')]

# Text Embedding Models

In [8]:
# embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Vector Stores

In [9]:
db = Chroma.from_documents(texts, embeddings)

In [10]:
db._collection.get(include=['embeddings'])

{'ids': ['c5be1219-49a0-487a-8358-811437da2f8e',
  'b7ac003e-657b-4d78-aae2-af28d020f713',
  'a06ef9b6-5e56-4a96-9362-2cfb6d21dff5',
  'c587ff4d-8c49-4097-9e46-40456bfb914c',
  '58356f8d-fbce-4841-883b-7b534f375217',
  '6cd29e14-1d7d-4dfa-9904-6a7e22228fea',
  'e8898176-7625-480b-bc95-e46dc2736c7c',
  '2deac7f3-63f0-4aec-9052-baabb40845e7',
  '81b65161-0376-46c3-a7f5-a49887d57cb7',
  '33c2d11e-1871-4ea5-befd-edb7c24f47c5',
  '63434b85-3ff8-4599-b4c6-37d5df4ff314',
  '8872a67d-c3ff-4fee-a996-20e4f1c8c65a',
  'ae0d2a3f-05e4-4e2b-8cac-3e328535f297',
  '93e66af3-a9ee-451a-8787-dd922166ebdc',
  '5d77b261-84d2-4fa8-96f2-6f30ecb89520',
  '8c094775-9ee9-4dd7-b893-7eb8d99fd5ec',
  'dca4c860-9c31-4f6e-be75-b90f4b88bde2',
  'ef0d2e68-027b-4cdb-a6a4-82914624d7fe',
  '5f38384b-574d-47f0-b32a-d2ddd3bf0746',
  '185c8c70-c6f8-440f-913e-223407c66867',
  '3f4943ef-f228-4823-b03b-bf31cc5f6daf',
  '60d420e8-5c36-45b5-b995-21c81d35d67f',
  '2c6e8961-d3ee-460c-a484-b242f7ed17e2',
  'c0c437d8-5af2-4ea6-a548-

# Retrievers

In [11]:
retriever = db.as_retriever(search_kwargs={"k":3})

In [12]:
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x170f81880>, search_kwargs={'k': 3})

# Question 1

In [13]:
question = input("enter your question?")

In [14]:
# docs = retriever.get_relevant_documents(question)
docs = retriever.invoke(question)

In [15]:
docs

[Document(metadata={'source': 'sample.txt'}, page_content='Dehradun.[179][180] Dhoni and his wife have a daughter, Ziva Dhoni who was born on 6 February 2015.[181][182] He lives in his farmhouse outside Ranchi.[183] Dhoni is an automotive enthusiast and owns'),
 Document(metadata={'source': 'sample.txt'}, page_content='Dhoni is also known for his cool-headed demeanor on the field which has earned him the monicker "Captain cool".[178]\n\nPersonal life\nDhoni married Sakshi Singh Rawat on 4 July 2010 in'),
 Document(metadata={'source': 'sample.txt'}, page_content='wicket-keeper.[119] Dhoni is an unorthodox batsman who deviates from conventional coaching manuals to showcase his batting technique.[167] Dhoni displays a proclivity for hitting full-length')]

In [16]:
relevant_text = ''
for doc in docs:
  relevant_text += doc.page_content
print(relevant_text)

Dehradun.[179][180] Dhoni and his wife have a daughter, Ziva Dhoni who was born on 6 February 2015.[181][182] He lives in his farmhouse outside Ranchi.[183] Dhoni is an automotive enthusiast and ownsDhoni is also known for his cool-headed demeanor on the field which has earned him the monicker "Captain cool".[178]

Personal life
Dhoni married Sakshi Singh Rawat on 4 July 2010 inwicket-keeper.[119] Dhoni is an unorthodox batsman who deviates from conventional coaching manuals to showcase his batting technique.[167] Dhoni displays a proclivity for hitting full-length


In [21]:
from langchain_community.llms import HuggingFaceEndpoint
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
import os

load_dotenv()

# hf_token = userdata.get('HUGGINGFACE_HUB_API_KEY')

# Define the repository ID for the Gemma 2b model
repo_id = "google/gemma-2b"

# Set up a Hugging Face Endpoint for Gemma 2b model
llm = HuggingFaceEndpoint(
    repo_id=repo_id, temperature=0.1, huggingfacehub_api_token=os.environ['HUGGINGFACE_HUB_API_KEY']
)

In [22]:
template = """Answer the question in one line using the following information:

```{information}```.


*** Question ***

{question}

*** Answer ***"""

prompt = PromptTemplate.from_template(template=template)

In [23]:
prompt_formatted_str: str = prompt.format(question=question, information=relevant_text)
response = llm.invoke(prompt_formatted_str)
response.strip()

"Dhoni's wife is Sakshi Singh Rawat."