<a href="https://colab.research.google.com/github/arishp/veltech_genai/blob/main/langchain_data_connection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install langchain
# !pip install sentence-transformers
# !pip install chromadb

In [4]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

# Document Loaders

In [5]:
loader = TextLoader('sample.txt')
documents = loader.load()
len(documents)

1

# Document Transformers

In [6]:
text_splitter = CharacterTextSplitter(separator=" ", chunk_size=200, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
len(texts)

150

In [8]:
texts[:2]

[Document(page_content='Mahendra Singh Dhoni (/məˈheɪndrə ˈsɪŋ dhæˈnɪ/ ⓘ; born 7 July 1981) is an Indian professional cricketer. He is a right handed batter and a wicket-keeper. Widely regarded as one of the most prolific', metadata={'source': 'sample.txt'}),
 Document(page_content='wicket-keeper-batsmen and captains, he represented the Indian cricket team and was the captain of the side in limited-overs formats from 2007 to 2017 and in test cricket from 2008 to 2014. Dhoni has', metadata={'source': 'sample.txt'})]

# Text Embedding Models

In [None]:
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Vector Stores

In [10]:
db = Chroma.from_documents(texts, embeddings)

In [None]:
db._collection.get(include=['embeddings'])

# Retrievers

In [12]:
retriever = db.as_retriever(search_kwargs={"k":5})

In [13]:
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7947ebd41cf0>, search_kwargs={'k': 5})

# Question 1

In [14]:
docs = retriever.get_relevant_documents("who married dhoni")

In [15]:
docs

[Document(page_content='Dhoni is also known for his cool-headed demeanor on the field which has earned him the monicker "Captain cool".[173]\nPersonal life\n\nDhoni married Sakshi Singh Rawat on 4 July 2010 in', metadata={'source': 'sample.txt'}),
 Document(page_content='Dehradun.[174][175] Dhoni and his wife have a daughter, Zeeva born on 6 February 2015.[176][177] He lives in his farmhouse outside Ranchi.[178] Dhoni owns a number of bikes and cars in his', metadata={'source': 'sample.txt'}),
 Document(page_content='On 9 July 2019, Dhoni played in his 350th and final ODI in the semi-final loss against New Zealand.[114] Dhoni announced his retirement from international cricket on 15 August 2020 as he had not', metadata={'source': 'sample.txt'}),
 Document(page_content='three children.[5][6][7] His family spells the surname as "Dhauni".[8]\n\nDhoni did his schooling at DAV Jawahar Vidya Mandir where he started playing football as a goal keeper but later moved to play', metadata={'source