In [55]:
import os
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader, UnstructuredURLLoader
from langchain.embeddings import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
import time

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
groq_api_key = os.environ["GROQ_API_KEY"]

In [57]:
# loader = WebBaseLoader("https://en.wikipedia.org/wiki/Diljit_Dosanjh")
# docs = loader.load()
# docs

In [83]:
loader = UnstructuredURLLoader(urls=["https://en.wikipedia.org/wiki/Diljit_Dosanjh"])
docs = loader.load()
docs

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Diljit_Dosanjh'}, page_content='Toggle the table of contents\n\nDiljit Dosanjh\n\n19 languages\n\nتۆرکجه\n\nবাংলা\n\nEspañol\n\nفارسی\n\nहिन्दी\n\nBahasa Indonesia\n\nItaliano\n\nमैथिली\n\nमराठी\n\nꯃꯤꯇꯩ ꯂꯣꯟ\n\nनेपाली\n\nਪੰਜਾਬੀ\n\nپنجابی\n\nPortuguês\n\nРусский\n\nSuomi\n\nతెలుగు\n\nTürkçe\n\nاردو\n\nEdit links\n\nArticle\n\nTalk\n\nEnglish\n\nRead\n\nEdit\n\nView history\n\nTools\n\nTools\n\nActions\n\nRead\n\nEdit\n\nView history\n\nGeneral\n\nWhat links here\n\nRelated changes\n\nUpload file\n\nSpecial pages\n\nPermanent link\n\nPage information\n\nCite this page\n\nGet shortened URL\n\nDownload QR code\n\nWikidata item\n\nPrint/export\n\nDownload as PDF\n\nPrintable version\n\nIn other projects\n\nWikimedia Commons\n\nAppearance\n\nFrom Wikipedia, the free encyclopedia\n\nIndian singer and actor (born 1984)\n\nDiljit Dosanjh\n\nDosanjh at the media meet of Phillauri\n\n(1984-01-06) 6 January 1984 (age\xa040)Dosanjh Kalan, 

In [84]:
len(docs[0].page_content)

31670

In [94]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)
chunks

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Diljit_Dosanjh'}, page_content='Toggle the table of contents\n\nDiljit Dosanjh\n\n19 languages\n\nتۆرکجه\n\nবাংলা\n\nEspañol\n\nفارسی\n\nहिन्दी\n\nBahasa Indonesia\n\nItaliano\n\nमैथिली\n\nमराठी\n\nꯃꯤꯇꯩ ꯂꯣꯟ\n\nनेपाली\n\nਪੰਜਾਬੀ\n\nپنجابی\n\nPortuguês\n\nРусский\n\nSuomi\n\nతెలుగు\n\nTürkçe\n\nاردو\n\nEdit links\n\nArticle\n\nTalk\n\nEnglish\n\nRead\n\nEdit\n\nView history\n\nTools\n\nTools\n\nActions\n\nRead\n\nEdit\n\nView history\n\nGeneral\n\nWhat links here\n\nRelated changes\n\nUpload file\n\nSpecial pages\n\nPermanent link\n\nPage information\n\nCite this page\n\nGet shortened URL\n\nDownload QR code'),
 Document(metadata={'source': 'https://en.wikipedia.org/wiki/Diljit_Dosanjh'}, page_content='Permanent link\n\nPage information\n\nCite this page\n\nGet shortened URL\n\nDownload QR code\n\nWikidata item\n\nPrint/export\n\nDownload as PDF\n\nPrintable version\n\nIn other projects\n\nWikimedia Commons\n\nAppearance\n\nFrom

In [95]:
len(chunks)

83

In [110]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# embeddings = HuggingFaceEmbeddings(model_name="LaBSE")
# embeddings = SentenceTransformer("all-mpnet-base-v2")
vectors = FAISS.from_documents(chunks, embeddings)



In [111]:
llm = ChatGroq(groq_api_key=groq_api_key, model_name="llama3-8b-8192", temperature=0.2)

template = """
Answer the question based on given context only. Please provide the most accurate response based on given question
<context>
{context}
<context>
Question:{input}
"""
prompt = ChatPromptTemplate.from_template(template)

document_chain = create_stuff_documents_chain(llm, prompt)
retriever = vectors.as_retriever(search_kwargs={"k":6})
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [112]:
prompt = "how was diljit's childhood"
response = retrieval_chain.invoke({"input":prompt})
response

{'input': "how was diljit's childhood",
 'context': [Document(metadata={'source': 'https://en.wikipedia.org/wiki/Diljit_Dosanjh'}, page_content='^ Kaur, Kiran (3 August 2015). "Diljit started an NGO, Sanjh Foundation, in 2013". Bollywoodlife. Retrieved 23 August 2015.\n\nExternal links[edit]\n\nWikimedia Commons has media related to \n\nDiljit Dosanjh.\n\nOfficial website\n\nDiljit Dosanjh at IMDb\n\nv\n\nt\n\ne\n\nFilmfare Award for\n\nBest Male Debut'),
  Document(metadata={'source': 'https://en.wikipedia.org/wiki/Diljit_Dosanjh'}, page_content='Diljit released his first album Ishq Da Uda Ada in 2003 with Finetone Cassettes, a division of T-Series. Rajinder Singh of Finetone, who helped Dosanjh make a debut in the Punjabi music industry, suggested to him to spell his first name as Diljit instead of Daljit.[9] The music was composed by Bablu Mahindra and the lyrics were written by Balvir Boparai. Dosanjh gave vocals to all the eight tracks and the producers made a music video for the 

In [109]:
from pprint import pprint 
pprint(response['answer'])

("There is no information provided in the given context about Diljit Dosanjh's "
 'childhood. The context only provides information about his birthday, awards, '
 'interviews, and performances, but does not mention his childhood.')
