<a href="https://colab.research.google.com/github/arishp/veltech_genai/blob/main/langchain_data_connection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install langchain
# !pip install sentence-transformers
# !pip install chromadb

In [5]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

# Document Loaders

In [7]:
loader = TextLoader('sample.txt')
documents = loader.load()
len(documents)

1

# Document Transformers

In [27]:
text_splitter = CharacterTextSplitter(separator=" ", chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
len(texts)

169

In [28]:
texts[:3]

[Document(page_content="India, officially the Republic of India (ISO: Bhārat Gaṇarājya),[22] is a country in South Asia. It is the seventh-largest country by area; the most populous country as of June 2023;[23][24] and from the time of its independence in 1947, the world's most populous democracy.[25][26][27] Bounded by the Indian Ocean on the south, the Arabian Sea on the southwest, and the Bay of Bengal on the southeast, it shares land borders with Pakistan to the west;[j] China, Nepal, and Bhutan to the north; and", metadata={'source': 'sample.txt'}),
 Document(page_content='Bangladesh and Myanmar[k] to the east. In the Indian Ocean, India is in the vicinity of Sri Lanka and the Maldives; its Andaman and Nicobar Islands share a maritime border with Thailand, Myanmar, and Indonesia.\n\nModern humans arrived on the Indian subcontinent from Africa no later than 55,000 years ago.[28][29][30] Their long occupation, initially in varying forms of isolation as hunter-gatherers, has made the

# Text Embedding Models

In [29]:
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Vector Stores

In [30]:
db = Chroma.from_documents(texts, embeddings)

In [None]:
db._collection.get(include=['embeddings'])

# Retrievers

In [32]:
retriever = db.as_retriever(search_kwargs={"k":3})

In [33]:
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7b0cd5d0b760>, search_kwargs={'k': 3})

# Question 1

In [34]:
docs = retriever.get_relevant_documents("what is the capital of india?")

In [35]:
docs

[Document(page_content='India, officially the Republic of India (ISO: Bhārat Gaṇarājya),[22] is a country in South Asia. It is the seventh-largest country by area; the most populous country as of June 2023;[23][24] and from', metadata={'source': 'sample.txt'}),
 Document(page_content='Nagar Haveli and Daman and Diu\n Jammu and Kashmir\n Ladakh\n Lakshadweep\n National Capital Territory of Delhi\n Puducherry\n\nForeign, economic and strategic relations\nMain articles: Foreign relations of', metadata={'source': 'sample.txt'}),
 Document(page_content="India, officially the Republic of India (ISO: Bhārat Gaṇarājya),[22] is a country in South Asia. It is the seventh-largest country by area; the most populous country as of June 2023;[23][24] and from the time of its independence in 1947, the world's most populous democracy.[25][26][27] Bounded by the Indian Ocean on the south, the Arabian Sea on the southwest, and the Bay of Bengal on the southeast, it shares land borders with Pakistan to th

In [36]:
docs = retriever.get_relevant_documents("what is the currency of india?")

In [37]:
docs

[Document(page_content='result of political, economic, and social changes.[247][248]\nNational symbols[1]Emblem\tSarnath Lion Capital\nAnthem\tJana Gana Mana\nSong\t"Vande Mataram"\nLanguage\tNone[249][250][251]\nCurrency\t₹ (Indian', metadata={'source': 'sample.txt'}),
 Document(page_content="(Indian rupees) to ₹235.24 billion (US$3.94 billion at June 2013 exchange rates).[342]\nEnergy\nMain articles: Energy in India and Energy policy of India\n\nIndia's capacity to generate electrical power", metadata={'source': 'sample.txt'}),
 Document(page_content="economy;[310] since then, it has moved increasingly towards a free-market system[311][312] by emphasising both foreign trade and direct investment inflows.[313] India has been a member of World Trade Organization since 1 January 1995.[314]\n\nThe 522-million-worker Indian labour force is the world's second-largest, as of 2017.[295] The service sector makes up 55.6% of GDP, the industrial sector 26.3% and the agricultural sector 18.1%. I