In [1]:
from tqdm import tqdm
from langchain_community.vectorstores import FAISS

In [2]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI, HarmBlockThreshold, HarmCategory
from langchain.retrievers.document_compressors import FlashrankRerank
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
from langchain.retrievers import RePhraseQueryRetriever

In [3]:
import os
os.environ["GOOGLE_API_KEY"] = "AIzaSyBWt4xrbfIcs1sNz6lhwhl7vW1adeQ8d5U"
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
import re
import fitz
from langchain_core.documents import Document

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=2000,
    chunk_overlap=200,
    length_function=len,
)
doc = fitz.open("../../Reports/jio.pdf")
pages=[]
for page_no in range(doc.page_count):
        text = doc[page_no].get_text()
        text = re.sub(r"\n", " ", text)
        text = text_splitter.split_text(text=text)
        for chunk in text:
            page = Document(page_content=chunk, metadata = {"page":page_no+1})
            pages.append(page)

In [5]:
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")

In [6]:
vectorstore = FAISS.from_documents(pages, embedding=embeddings)

In [7]:
llm = ChatGoogleGenerativeAI(model="gemini-pro",temperature=0)

In [8]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an assistant tasked with taking a natural languge query from a user
    and converting it into a query for a vectorstore. In the process, strip out all 
    information that is not relevant for the retrieval task and return a new, simplified
    question for vectorstore retrieval. Only return the new query text and dont format it.
    Also there might be some short notation and long notation in the query like CEO and Chief executive officer
    so for that it short or long notation should also present in new query vice versa.
    So in query is "CEO of copany" then in new query both Chief executive officer and CEO should present and vice versa/
    EX. query CEO of company new query Chief executive officer (CEO) of company
    Here is the user 
    query:{question}""",
)
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT).pick("text")

  llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT).pick("text")


In [29]:
retriever_from_llm_chain = RePhraseQueryRetriever(
    retriever=vectorstore.as_retriever(search_kwargs={"k": 4}), llm_chain=llm_chain
)

In [30]:
retriever_from_llm_chain.invoke(
    "Ceo of company"
)

[Document(metadata={'page': 58}, page_content='Annual Report 2022-23 57 Corporate Governance Report CERTIFICATE ON COMPLIANCE WITH CODE OF CONDUCT I hereby confirm that the Company has obtained from all the members of the Board and Senior Management Personnel,  the affirmation that they have complied with the ‘Code of Conduct’ in respect of the financial year 2022-23. Pankaj M. Pawar Managing Director DIN: 00085077 Date: April 21, 2023'),
 Document(metadata={'page': 24}, page_content='Annual Report 2022-23 23 Corporate Governance Report RJIL Governance Structure  Role and responsibilities of constituents of Governance Structure  Board of Directors:  The Board of Directors is the apex body constituted by shareholders for overseeing the Company’s overall functioning. The  Board provides strategic direction and leadership and oversees the management policies and their effectiveness looking  at long-term interests of shareholders and other stakeholders. The Board, inter-alia, reviews and g

In [11]:
docs = retriever_from_llm_chain.invoke(
    "Name the Boadr od directors of company"
)

In [12]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",temperature=0, stream=True)

In [24]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an assistant tasked with taking a natural languge query from a user
    and converting it into a query for a vectorstore. In the process, strip out all 
    information that is not relevant for the retrieval task and return a new, simplified
    question for vectorstore retrieval. Only return the new query text and dont format it.
    Also there might be some short notation and long notation in the query like CEO and Chief executive officer
    so for that it short or long notation should also present in new query vice versa.
    So in query is "CEO of copany" then in new query both Chief executive officer and CEO should present and vice versa/
    EX. query CEO of company new query Chief executive officer (CEO) of company
    Here is the user 
    query:{question}""",
)
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT).pick("text")

In [64]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


In [31]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.prompts import ChatPromptTemplate

### Contextualize question ###
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
contextualize_q_llm = llm.with_config(tags=["contextualize_q_llm"])
history_aware_retriever = create_history_aware_retriever(
    contextualize_q_llm, retriever_from_llm_chain, contextualize_q_prompt
)

In [32]:

system_prompt = (
    """You are a financial expert with access to the annual report of the company.
       When answering questions about the company's financial performance, prioritize information from the Financial Statements section.Considering the user's question, provide clear and concise answers from given context.
       {context}"""
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [33]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(history_aware_retriever,question_answer_chain)

'xbc'

In [65]:
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)


In [68]:
conversational_rag_chain.invoke(
    {"input": "Who is chairman of jio?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'Akash M. Ambani is the Chairman of Reliance Jio Infocomm Limited (RJIL). \n'

{'abc123': InMemoryChatMessageHistory(messages=[HumanMessage(content='Who is chairman of jio?'), AIMessage(content='Akash M. Ambani is the Chairman of Reliance Jio Infocomm Limited (RJIL). \n'), HumanMessage(content='What he did in company?'), AIMessage(content="Akash M. Ambani has been Chairman of Reliance Jio Infocomm Limited (RJIL) since June 2022. Prior to that, he served as a Non-Executive Director on the RJIL board since October 2014. He also serves on the Board of Jio Platforms Limited, Reliance Industries' digital services business. \n\nAt Jio, he spearheads the creation of products and services that leverage new-age technologies like 5G, Artificial Intelligence, Blockchain, and the Internet of Things. He is part of the RJIL Executive Committee and a member of the Product Leadership Group, closely involved in the development of products and digital services applications. \n\nUnder his leadership, Jio crossed the 100 million subscriber mark in less than six months of its launch 

In [67]:
conversational_rag_chain.invoke(
    {"input": "What he did in company?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

"Akash M. Ambani has been Chairman of Reliance Jio Infocomm Limited (RJIL) since June 2022. Prior to that, he served as a Non-Executive Director on the RJIL board since October 2014. He also serves on the Board of Jio Platforms Limited, Reliance Industries' digital services business. \n\nAt Jio, he spearheads the creation of products and services that leverage new-age technologies like 5G, Artificial Intelligence, Blockchain, and the Internet of Things. He is part of the RJIL Executive Committee and a member of the Product Leadership Group, closely involved in the development of products and digital services applications. \n\nUnder his leadership, Jio crossed the 100 million subscriber mark in less than six months of its launch in 2016 and today serves over 450 million customers. \n"

In [59]:
from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

question = "Who is chairman of jio?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)


In [70]:
for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}\n")

User: Who is chairman of jio?

AI: Akash M. Ambani is the Chairman of Reliance Jio Infocomm Limited (RJIL). 


User: What he did in company?

AI: Akash M. Ambani has been Chairman of Reliance Jio Infocomm Limited (RJIL) since June 2022. Prior to that, he served as a Non-Executive Director on the RJIL board since October 2014. He also serves on the Board of Jio Platforms Limited, Reliance Industries' digital services business. 

At Jio, he spearheads the creation of products and services that leverage new-age technologies like 5G, Artificial Intelligence, Blockchain, and the Internet of Things. He is part of the RJIL Executive Committee and a member of the Product Leadership Group, closely involved in the development of products and digital services applications. 

Under his leadership, Jio crossed the 100 million subscriber mark in less than six months of its launch in 2016 and today serves over 450 million customers. 


User: Who is chairman of jio?

AI: Akash M. Ambani is the Chairma

In [49]:
question = "Wht is profit of previous year?"
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

In [60]:
chat_history

[HumanMessage(content='Who is chairman of jio?'),
 AIMessage(content='Akash M. Ambani is the Chairman of Reliance Jio Infocomm Limited (RJIL). \n')]

In [61]:
question = "Give brief info about him"

In [62]:
output = {}
curr_key = None
for chunk in rag_chain.stream({"input": question, "chat_history": chat_history}):
    for key in chunk:
        if key not in output:
            output[key] = chunk[key]
        else:
            output[key] += chunk[key]
        if key != curr_key:
            print(f"\n\n{key}: {chunk[key]}", end="", flush=True)
        else:
            print(chunk[key], end="", flush=True)
        curr_key = key



input: Give brief info about him

chat_history: [HumanMessage(content='Who is chairman of jio?'), AIMessage(content='Akash M. Ambani is the Chairman of Reliance Jio Infocomm Limited (RJIL). \n')]

context: [Document(metadata={'page': 27}, page_content='engagement initiatives. He is an  avid sportsperson and has played a key role in the management of Mumbai Indians, contributing  immensely in their winning the IPL 2013, 2015, 2017, 2019 and 2020 as well as CLT20 2011 and  CLT20 2013 Championships. Mr. Akash M. Ambani received a Bachelor’s in Economics from Brown University, USA. He is also  a member of the President’s Leadership Council of the Brown University. He has been named in  TIME magazine’s TIME100 Next list of rising stars from across industries around the world, and  Fortune’s 40 Under-40 young leaders who stood out for their impact on business Appointed 11.10.2014 Areas of  expertise • Leadership  • Strategic Planning  • Industry Experience • Financial, Regulatory / Legal &

In [58]:
output

{'input': 'Who is chairman of jio?',
 'chat_history': [HumanMessage(content='Who is chairman of jio?'),
  AIMessage(content='Akash M. Ambani is the Chairman of Reliance Jio Infocomm Limited (RJIL). \n')],
 'context': [Document(metadata={'page': 3}, page_content='Board of Directors Non-Executive Director Akash M. Ambani (Chairman)  Isha M. Ambani  Mahendra Nahata Kiran M. Thomas  Mathew Oommen Managing Director  Sanjay Mashruwala Pankaj M. Pawar Independent Director Adil Zainulbhai Dipak C. Jain Mohanbir S. Sawhney Ranjit V. Pandit Shumeet Banerji Raminder S. Gujral  Veerayya Chowdary Kosaraju Committees Audit Committee Adil Zainulbhai (Chairman)  Dipak C. Jain Mohanbir S. Sawhney Ranjit V. Pandit Pankaj M. Pawar Nomination and Remuneration  Committee Ranjit V. Pandit (Chairman)  Adil Zainulbhai Dipak C. Jain Risk Management Committee Shumeet Banerji (Chairman)  Pankaj M. Pawar Kiran M. Thomas Rajneesh Jain Corporate Social Responsibility  Committee Adil Zainulbhai (Chairman)  Sanjay Ma

In [51]:
ai_msg_1

{'input': 'Wht is profit of previous year?',
 'chat_history': [HumanMessage(content='Who is chairman of jio?'),
  AIMessage(content='Akash M. Ambani is the Chairman of Reliance Jio Infocomm Limited (RJIL). \n'),
  HumanMessage(content='Give about him?'),
  AIMessage(content="Akash M. Ambani is an Indian business leader serving as the Chairman of Reliance Jio Infocomm Limited (RJIL) since June 2022. He was previously a Non-Executive Director on the RJIL board from October 2014. He also serves on the Board of Jio Platforms Limited, Reliance Industries' digital services business. \n\nAt Jio, he leads the development of products and services that utilize new technologies like 5G, Artificial Intelligence, Blockchain, and the Internet of Things to transform Commerce, Education, and Healthcare in India. Under his leadership, Jio achieved 100 million subscribers within six months of its launch in 2016 and currently serves over 450 million customers. \n\nHe is a member of the RJIL Executive Com

In [52]:
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)
follow_up_question="Give brief Akash M. Ambani."
async for event in rag_chain.astream_events(
    {
        "input": follow_up_question,
        "chat_history": chat_history,
    },
    version="v2",
):
    if (
        event["event"] == "on_chat_model_stream"
        and "contextualize_q_llm" in event["tags"]
    ):
        ai_message_chunk = event["data"]["chunk"]
        print(f"{ai_message_chunk.content}|", end="")

Ak|ash M. Ambani is the Chairman of Reliance Jio Infocomm Limited (RJ|IL), the telecom arm of Reliance Industries. He is a young business leader known| for his role in driving Jio's growth and expansion. He is also involved in Reliance Industries' retail businesses. Akash holds a degree in Economics from Brown| University and is recognized for his leadership in the digital space. 
|

NotImplementedError: 

In [20]:
async for event in rag_chain.astream_events(
    {
        "input": follow_up_question,
        "chat_history": chat_history,
    },
    version="v1",
):
    if event["name"] == "Retriever":
        print(event)
        print()

{'event': 'on_retriever_start', 'name': 'Retriever', 'run_id': '5b9f8dbc-8511-42bd-91c2-3768185892ec', 'tags': ['seq:step:4'], 'metadata': {'ls_retriever_name': 'rephrasequery'}, 'data': {'input': {'query': 'What is Jio? \n'}}, 'parent_ids': []}

{'event': 'on_retriever_end', 'name': 'Retriever', 'run_id': '5b9f8dbc-8511-42bd-91c2-3768185892ec', 'tags': ['seq:step:4'], 'metadata': {'ls_retriever_name': 'rephrasequery'}, 'data': {'input': {'query': 'What is Jio? \n'}, 'output': None}, 'parent_ids': []}



NotImplementedError: 

In [81]:
retriever_from_llm_chain.invoke("The| Chief Financial Officer (CFO) of Jio is **Rajneesh Jain**. |")

[Document(metadata={'page': 60}, page_content='Code of Conduct received by ECTF are reviewed and reported to the Audit Committee on a quarterly basis. Jio operates in a regulated business environment. Jio’s position on key industry issues like customer welfare, Data privacy,  Promotion of broadband, services, Network and telecom equipment manufacturing in India, AI and Big data and Ease of  Doing Business are transparently disclosed through the regulator. Jio also actively participates in national and international  industry bodies who may also engage with regulators. Jio’s activities are subject to robust internal controls framework set  forth in its policies including the Code of Conduct and the Anti-Bribery and Anti-Corruption Policy. Materiality Assessment: Jio has conducted in-depth materiality assessment to identify the topics that are pertinent to its  business as well as to its internal and external stakeholders. The approach followed for conducting materiality assessment  incl

In [78]:
rag_chain.invoke({"input": "hi i m mukesh. Can u tell me financial profit of jio in current year"})

{'input': 'hi i m mukesh. Can u tell me financial profit of jio in current year',
 'context': [Document(metadata={'page': 5}, page_content='4 Board’s Report  Reliance Jio Infocomm Limited BOARD’S REPORT Dear Members, The Board of Directors present the Company’s Sixteenth Annual Report (“Report”) and the Company’s audited financial  statements for the financial year ended March 31, 2023. FINANCIAL RESULTS The Company’s financial performance (standalone and consolidated) for the financial year ended March 31, 2023 is  summarised below: (₹ in crore) Particulars Standalone Consolidated 2022-23 2021-22 2022-23 2021-22 Revenue from Operations 90,786 76,977 91,373 77,356 Other Income 362 227 368 229 Total Income 91,148 77,204 91,741 77,585 Operating Expenses 44,114 39,347 44,476 39,525 EBITDA 47,034 37,857 47,265 38,060 Finance Cost   4,059 4,377 4,059 4,377 Depreciation and Amortisation Expense  18,546 13,615 18,641 13,702 Profit before Tax 24,429 19,865 24,565 19,981 Less: Current Tax - - 1

In [1]:
import pandas as pd