## Imports

In [245]:
# %pip install -q langchain langchain-nvidia-ai-endpoints gradio

In [244]:
from functools import partial
from rich.console import Console
from rich.style import Style
from rich.theme import Theme

console = Console()
base_style = Style(color="#76B900", bold=True)
pprint = partial(console.print, style=base_style)

In [243]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
import os

api_key = os.getenv("NVIDIA_API_KEY")
os.environ["NVIDIA_API_KEY"] = api_key
print(api_key)
# ChatNVIDIA.get_available_models()

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


In [2]:
# Useful utility method for printing intermediate states
from langchain_core.runnables import RunnableLambda
from functools import partial


def RPrint(preface="State: "):
    def print_and_return(x, preface=""):
        print(f"{preface}{x}")
        return x
    return RunnableLambda(partial(print_and_return, preface=preface))


def PPrint(preface="State: "):
    def print_and_return(x, preface=""):
        pprint(preface, x)
        return x
    return RunnableLambda(partial(print_and_return, preface=preface))

# Cricket Chatbot

### Wikipedia Data Generation

In [7]:
%pip install --upgrade --quiet  wikipedia

Note: you may need to restart the kernel to use updated packages.


In [None]:
from langchain_community.document_loaders import WikipediaLoader
from datetime import datetime, timedelta
import os
import json

# Load documents from Wikipedia
docs = WikipediaLoader(query="Cricket and everything related to cricket",
                       load_max_docs=1000).load()

# Function to split text into chunks with headers


def create_chunks_with_headers(doc, doc_index):
    chunk_size = 1536
    chunk_overlap = 200
    chunks = []
    start = 0
    doc_content = doc.page_content
    doc_length = len(doc.page_content)

    while start < doc_length:
        end = min(start + chunk_size, doc_length)
        chunk = doc_content[start:end]

        if start != 0:
            chunk = doc_content[max(start - chunk_overlap, 0):end]

        chunk_json = {
            "meta_data": {
                "title": doc.metadata["title"],
                "summary": doc.metadata['summary'],
                "source_url": doc.metadata['source'],
            },
            "chunk_index": len(chunks) + 1,
            "content": chunk
        }
        chunks.append(chunk_json)

        start += chunk_size

    return chunks


# Create an array to store all document chunks as JSON objects
all_chunks = []

# Create JSON objects for each document with chunks
for i, doc in enumerate(docs):
    chunks = create_chunks_with_headers(doc, i + 1)
    all_chunks.extend(chunks)
    print(f"Data for document {i + 1} has been processed.")

# Output the array of JSON objects (for demonstration purposes)


# If you want to write the array to a file:
# with open('wikipedia_docs_chunks.json', 'w', encoding='utf-8') as file:
#     json.dump(all_chunks, file, ensure_ascii=False, indent=4)

print("All data has been processed.")

In [9]:
len(all_chunks)

1377

Text splitting done into chunks with meta data for context

# Self Query Retriever

In [10]:
%pip install --upgrade --quiet  lark langchain-chroma

Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install langchain-cohere

In [251]:
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_cohere import CohereEmbeddings


embeddings_model = CohereEmbeddings(
    cohere_api_key="")

documents=[]

for chunk in all_chunks:
    doc = Document(
            page_content=chunk["content"],
        metadata=chunk["meta_data"],
        )
    documents.append(doc)

print(documents[0].metadata)

vectorstore = Chroma.from_documents(documents, embeddings_model)

{'title': 'Board of Control for Cricket in India', 'summary': "Board of Control for Cricket in India (BCCI) is the national governing body of cricket in India. Its headquarters is situated at the cricket centre in Churchgate, Mumbai. The BCCI is the wealthiest governing body of cricket in the world.\nThe BCCI was established in 1 December 1928 at Madras (currently Chennai) under Act XXI of 1860 of Madras and was subsequently reregistered under the Tamil Nadu Societies Registration Act, 1975. It is a consortium of state cricket associations that select their own representatives who elect the BCCI president. It joined the Imperial Cricket Conference in 1926 which later became the International Cricket Council. The BCCI is an autonomous, private organization that does not fall under the purview of the National Sports Federation of India of Government of India and does not receive any grants from the Ministry of Youth Affairs and Sports. The BCCI is influential in international cricket. Th

### Creating our self-querying retriever
Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents.

In [13]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever

metadata_field_info = [
    AttributeInfo(
        name="title",
        description="The name of the article",
        type="string",
    ),
    AttributeInfo(
        name="summary",
        description="The short summary of the article contents",
        type="integer",
    ),
    AttributeInfo(
        name="source_url",
        description="The web uri link to the article webpage",
        type="string",
    ),
]
document_content_description = "Data about cricket"
llm = ChatNVIDIA(model="mistralai/mistral-7b-instruct-v0.2")
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
)

In [234]:
from collections import deque

# Initialize memory as a deque with a maximum length of 5
memory = deque(maxlen=5)


def update_memory(user_question, response):

    memory.append({
        "question": user_question,
        "response": response,
    })
    pprint(memory)

### Creating a pipeline for the retriever

In [3]:
from langchain_core.runnables import RunnableLambda, RunnableAssign, RunnablePassthrough
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate


sys_msg = """
You are an intelligent assistant that answers all questions about cricket using contextual information from Wikipedia. Your responses should be conversational and informative, providing clear and concise explanations. When relevant, include the source URL of the articles to give users additional reading material.

Always aim to:
1. Answer the question directly and clearly.
2. Provide context and background information when useful but do not give irrelevant information and answer to the point.
3. Suggest related topics or additional points of interest.
4. Be polite and engaging in your responses.
5. Remove the unnecessary context from the context provided if irrelevant to the question

Now, let's get started!
"""

# Initialize the chat model
instruct_chat = ChatNVIDIA(model="meta/llama3-70b-instruct")
llm = instruct_chat | StrOutputParser()


def generate_embeddings(input_data):
    embeddings = retriever.invoke(input_data)
    if embeddings:
        return embeddings
    else:
        return "No data available"


def generate_embeddings_query(input_data):
    prompt = ChatPromptTemplate.from_template(
        f"""
User's Question: {{input}}
Previous conversation memory {{memory}}
Generate only a query sentence and nothing else from the user's question to fetch from the data from embeddings. If the user's question does not have enough context then create a query based on the Knowledge Base.
"""
    )
    embedding_chain = prompt | llm
    embeddings_query = embedding_chain.invoke(input_data)
    if embeddings_query:
        return embeddings_query
    else:
        return "Process failed"


generate_embeddings_runnable = RunnableLambda(generate_embeddings)
generate_embeddings_query_runnable = RunnableLambda(generate_embeddings_query)



def get_response(prompt):
    return llm.invoke(prompt)


# Create the Runnable chain with memory integration
Runnable = (
    {"input": RunnablePassthrough(), "memory": RunnablePassthrough()}
    | RunnableAssign({"embedding_query": generate_embeddings_query_runnable})
    | RunnableAssign({"context": generate_embeddings_runnable})
    | RunnableAssign({"prompt": lambda x: ChatPromptTemplate.from_template(
        f"""
{sys_msg}

User's Question: {{input}}

Context Information: {{context}}

Previous Conversation memory: {{memory}}

Your Response:
"""
    )})
    | RunnableAssign({"response": lambda x: get_response(x["prompt"])})
    | PPrint()
    | RunnableAssign({"memory": lambda x: update_memory(x["input"]["input"], x["response"])})
)

# Get user input and invoke the chain
user_input = "Tell me more about his batting records"
response = Runnable.invoke({"input": user_input, "memory": memory})

pprint(response["response"])



NameError: name 'memory' is not defined