In [1]:
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

In [3]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

In [4]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map = "auto",
    dtype = 'auto'
)

In [5]:
pipe = pipeline(
    model = model,
    task = 'text-generation',
    tokenizer = tokenizer,
    max_new_tokens = 256,
    temperature = 0.5
)

Device set to use cuda:0


In [6]:
llm = HuggingFacePipeline(pipeline = pipe)

In [7]:
chat_model = ChatHuggingFace(llm = llm)

In [10]:
from langchain_community.retrievers import WikipediaRetriever

In [11]:
retriever = WikipediaRetriever(load_max_docs = 2, lang = 'en')

In [12]:
query = "Give me a short summary about Shivaji Maharaj of Marathas"

In [13]:
docs = retriever.invoke(query)

In [14]:
docs

[Document(metadata={'title': 'List of 2020s films based on actual events', 'summary': 'This is a list of films and miniseries that are based on actual events. All films on this list are from American production unless indicated otherwise.\n\n', 'source': 'https://en.wikipedia.org/wiki/List_of_2020s_films_based_on_actual_events'}, page_content='This is a list of films and miniseries that are based on actual events. All films on this list are from American production unless indicated otherwise.\n\n\n== 2020 ==\n2 Hearts (2020) – romantic drama film based on the true story of Leslie and Jorge Bacardi and Christopher Gregory\n18 Presents (Italian: 18 regali) (2020) – Italian drama film based on an actual Italian woman, Elisa Girotto, who had planned and allocated 17 years of birthday gifts for her daughter Anna before her death in September 2017 due to a terminal breast cancer.\nAdam (2020) – biographical drama film about a hard-living salesman who becomes a quadriplegic after an accident,

In [15]:
for i, doc in enumerate(docs):
  print(f"\n --- Result{i+1}---")
  print(f"content: \n {doc.page_content}---")


 --- Result1---
content: 
 This is a list of films and miniseries that are based on actual events. All films on this list are from American production unless indicated otherwise.


== 2020 ==
2 Hearts (2020) – romantic drama film based on the true story of Leslie and Jorge Bacardi and Christopher Gregory
18 Presents (Italian: 18 regali) (2020) – Italian drama film based on an actual Italian woman, Elisa Girotto, who had planned and allocated 17 years of birthday gifts for her daughter Anna before her death in September 2017 due to a terminal breast cancer.
Adam (2020) – biographical drama film about a hard-living salesman who becomes a quadriplegic after an accident, based on a true story
Admitted (Hindi: स्वीकार किया) (2020) – Indian Hindi-language biographical drama film about Dhananjay Chauhan, the first transgender student at Panjab University
AK-47 (Russian: Калашников) (2020) – Russian biographical film about the experiences of Mikhail Kalashnikov, inventor of the AK-47 assault 

In [16]:
# Vector Store Retriever

In [27]:
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document

In [28]:
documents=[
    Document(page_content = "Virat Kolhi,  master of chase with nerves of steel, His bat writes records that the world can feel.",
                metadata = {"team":"RCB"}),

    Document(page_content = "Rohit Sharma, The Hitman rises with elegance and might, Turning every loose ball into a dazzling sight.",
                metadata = {"team":"MI"}),

    Document(page_content = "Jasprit Bhumra, With fire in his run-up and magic in his seam, He breaks stumps and hopes with precision supreme.",
                metadata = {"team":"MI"}),

    Document(page_content = "Ravindra Jadeja, A warrior who spins, fields, and swings with flair,He lifts his team with brilliance everywhere.",
                metadata = {"team":"CSK"}),

    Document(page_content = "MS Dhoni, Calm as the moon in the fiercest fight, He finishes games with thunder and quiet might.",
                metadata = {"team":"CSK"})
    ]

In [29]:
from langchain_huggingface import HuggingFaceEmbeddings

In [30]:
embedding_model = HuggingFaceEmbeddings()

In [31]:
vectorstore = Chroma.from_documents(documents=documents, embedding = embedding_model,
                                    collection_name = 'my_collection')

In [32]:
retriever = vectorstore.as_retriever(search_kargs = {"k": 2})

In [33]:
query = "Who can bowl?"

In [34]:
result = retriever.invoke(query)

In [35]:
for i , doc in enumerate(result):
  print(f"\n---{i+1}---")
  print(doc.page_content)


---1---
Jasprit Bhumra, With fire in his run-up and magic in his seam, He breaks stumps and hopes with precision supreme.

---2---
Ravindra Jadeja, A warrior who spins, fields, and swings with flair,He lifts his team with brilliance everywhere.

---3---
Rohit Sharma, The Hitman rises with elegance and might, Turning every loose ball into a dazzling sight.

---4---
MS Dhoni, Calm as the moon in the fiercest fight, He finishes games with thunder and quiet might.


In [36]:
result = vectorstore.similarity_search(query ,k=2)

In [37]:
for i, doc in enumerate(result):
  print(f"{i+1}")
  print(doc.page_content)

1
Jasprit Bhumra, With fire in his run-up and magic in his seam, He breaks stumps and hopes with precision supreme.
2
Ravindra Jadeja, A warrior who spins, fields, and swings with flair,He lifts his team with brilliance everywhere.


In [38]:
# MMR(Maximum Mariginal Relevance)

In [39]:
docs = [
    Document(page_content="Aviation Crisis in India: Following widespread flight cancellations and disruptions at IndiGo, the government has ordered the airline to implement a 10% cut in its flight schedules to help stabilize operations.",
             metadata ={"type":"aviation news"}),
    Document(page_content="Supreme Court Slams Election Commission: The Supreme Court strongly criticized the Election Commission for providing mechanical and cyclostyled responses to practical issues faced by voters during the ongoing Special Intensive Revision (SIR) of electoral rolls.",
             metadata ={"type":"election news"}),
    Document(page_content="Australia Enforces Social Media Ban: Australia has begun enforcing a world-first law that requires social media platforms to take reasonable steps to prevent users under the age of 16 from having accounts.",
             metadata ={"type":"international news"}),
    Document(page_content="Major Microsoft Investment in India: Microsoft CEO Satya Nadella announced plans for a significant investment of $17.5 billion in India to help build infrastructure and capabilities for an AI-first future.",
             metadata ={"type":"international news"}),
    Document(page_content="Goa Nightclub Fire Investigation: Authorities have ordered the demolition of a second club owned by the Luthra brothers, who are the fugitive co-owners of the Goa nightclub where a recent fire claimed 25 lives.",
             metadata ={"type":"Goa news"}),
]

In [43]:
from langchain_community.vectorstores import FAISS

In [44]:
embedding_model = HuggingFaceEmbeddings()

In [45]:
vectorstore = FAISS.from_documents(documents=docs, embedding = embedding_model)

In [46]:
retriever = vectorstore.as_retriever(search_type = 'mmr', search_kwargs= {"k":2,'lambda_mult':0.5})

In [47]:
query = "What is todays international news?"

In [48]:
result = retriever.invoke(query)

In [49]:
for i, doc in enumerate(docs):
    print(f"---{i+1}---")
    print(doc.page_content)

---1---
Aviation Crisis in India: Following widespread flight cancellations and disruptions at IndiGo, the government has ordered the airline to implement a 10% cut in its flight schedules to help stabilize operations.
---2---
Supreme Court Slams Election Commission: The Supreme Court strongly criticized the Election Commission for providing mechanical and cyclostyled responses to practical issues faced by voters during the ongoing Special Intensive Revision (SIR) of electoral rolls.
---3---
Australia Enforces Social Media Ban: Australia has begun enforcing a world-first law that requires social media platforms to take reasonable steps to prevent users under the age of 16 from having accounts.
---4---
Major Microsoft Investment in India: Microsoft CEO Satya Nadella announced plans for a significant investment of $17.5 billion in India to help build infrastructure and capabilities for an AI-first future.
---5---
Goa Nightclub Fire Investigation: Authorities have ordered the demolition o

In [50]:
# Multi Query Retriever

In [56]:
from langchain_classic.retrievers.multi_query import MultiQueryRetriever

In [57]:
docs = [
    Document(page_content="Aviation Crisis in India: Following widespread flight cancellations and disruptions at IndiGo, the government has ordered the airline to implement a 10% cut in its flight schedules to help stabilize operations.",
             metadata ={"type":"aviation news"}),
    Document(page_content="Supreme Court Slams Election Commission: The Supreme Court strongly criticized the Election Commission for providing mechanical and cyclostyled responses to practical issues faced by voters during the ongoing Special Intensive Revision (SIR) of electoral rolls.",
             metadata ={"type":"election news"}),
    Document(page_content="Australia Enforces Social Media Ban: Australia has begun enforcing a world-first law that requires social media platforms to take reasonable steps to prevent users under the age of 16 from having accounts.",
             metadata ={"type":"international news"}),
    Document(page_content="Major Microsoft Investment in India: Microsoft CEO Satya Nadella announced plans for a significant investment of $17.5 billion in India to help build infrastructure and capabilities for an AI-first future.",
             metadata ={"type":"international news"}),
    Document(page_content="Goa Nightclub Fire Investigation: Authorities have ordered the demolition of a second club owned by the Luthra brothers, who are the fugitive co-owners of the Goa nightclub where a recent fire claimed 25 lives.",
             metadata ={"type":"Goa news"}),
]

In [58]:
embedding_model = HuggingFaceEmbeddings()

In [59]:
vectorstore = FAISS.from_documents(documents=docs, embedding=embedding_model)

In [60]:
similarity_retrievers = vectorstore.as_retriever(search_type = "similarity",
                                                 search_kwargs={"k":3})

In [63]:
multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever=vectorstore.as_retriever(search_kwargs={"k":3}),
    llm = chat_model
)

In [69]:
query = "can you tell me about international news?"

In [70]:
similarity_result = similarity_retrievers.invoke(query)

In [71]:
multi_query_result = multi_query_retriever.invoke(query)

In [72]:
for i, doc in enumerate(similarity_result):
    print(f"---{i+1}---")
    print(doc.page_content)

---1---
Major Microsoft Investment in India: Microsoft CEO Satya Nadella announced plans for a significant investment of $17.5 billion in India to help build infrastructure and capabilities for an AI-first future.
---2---
Australia Enforces Social Media Ban: Australia has begun enforcing a world-first law that requires social media platforms to take reasonable steps to prevent users under the age of 16 from having accounts.
---3---
Aviation Crisis in India: Following widespread flight cancellations and disruptions at IndiGo, the government has ordered the airline to implement a 10% cut in its flight schedules to help stabilize operations.


In [73]:
for i, doc in enumerate(multi_query_result):
    print(f"---{i+1}---")
    print(doc.page_content)

---1---
Australia Enforces Social Media Ban: Australia has begun enforcing a world-first law that requires social media platforms to take reasonable steps to prevent users under the age of 16 from having accounts.
---2---
Major Microsoft Investment in India: Microsoft CEO Satya Nadella announced plans for a significant investment of $17.5 billion in India to help build infrastructure and capabilities for an AI-first future.
---3---
Supreme Court Slams Election Commission: The Supreme Court strongly criticized the Election Commission for providing mechanical and cyclostyled responses to practical issues faced by voters during the ongoing Special Intensive Revision (SIR) of electoral rolls.
---4---
Aviation Crisis in India: Following widespread flight cancellations and disruptions at IndiGo, the government has ordered the airline to implement a 10% cut in its flight schedules to help stabilize operations.
---5---
Goa Nightclub Fire Investigation: Authorities have ordered the demolition o

In [74]:
# Contextual Compression Retriever

In [82]:
from langchain_classic.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_classic.retrievers.document_compressors import LLMChainExtractor

In [77]:
docs = [
    Document(page_content="Aviation Crisis in India: Following widespread flight cancellations and disruptions at IndiGo, the government has ordered the airline to implement a 10% cut in its flight schedules to help stabilize operations.",
             metadata ={"type":"national news"}),
    Document(page_content="Supreme Court Slams Election Commission: The Supreme Court strongly criticized the Election Commission for providing mechanical and cyclostyled responses to practical issues faced by voters during the ongoing Special Intensive Revision (SIR) of electoral rolls.",
             metadata ={"type":"national news"}),
    Document(page_content="Australia Enforces Social Media Ban: Australia has begun enforcing a world-first law that requires social media platforms to take reasonable steps to prevent users under the age of 16 from having accounts.",
             metadata ={"type":"international news"}),
    Document(page_content="Major Microsoft Investment in India: Microsoft CEO Satya Nadella announced plans for a significant investment of $17.5 billion in India to help build infrastructure and capabilities for an AI-first future.",
             metadata ={"type":"international news"}),
    Document(page_content="Goa Nightclub Fire Investigation: Authorities have ordered the demolition of a second club owned by the Luthra brothers, who are the fugitive co-owners of the Goa nightclub where a recent fire claimed 25 lives.",
             metadata ={"type":"Goa news"}),
]

In [78]:
embeddings = HuggingFaceEmbeddings()

In [79]:
vectorstore =FAISS.from_documents(docs,embedding_model)

In [80]:
base_retriever = vectorstore.as_retriever(search_kwargs={"k":2})

In [83]:
compressor = LLMChainExtractor.from_llm(chat_model)

In [84]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=base_retriever
)

In [85]:
query = "What is the national news?"

In [86]:
commpressed_result = compression_retriever.invoke(query)

In [87]:
for i, doc in enumerate(commpressed_result):
    print(f"---{i+1}---")
    print(doc.page_content)

---1---
<|user|>
Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context is relevant return NO_OUTPUT.

Remember, *DO NOT* edit the extracted parts of the context.

> Question: What is the national news?
> Context:
>>>
Australia Enforces Social Media Ban: Australia has begun enforcing a world-first law that requires social media platforms to take reasonable steps to prevent users under the age of 16 from having accounts.
>>>
Extracted relevant parts:</s>
<|assistant|>
Part of the context "Australia Enforces Social Media Ban" that is relevant to answer the question is:

> Australia has begun enforcing a world-first law that requires social media platforms to take reasonable steps to prevent users under the age of 16 from having accounts.
---2---
<|user|>
Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context 