## Install Python libraries

In [None]:
!pip install -q torch transformers accelerate bitsandbytes transformers sentence-transformers faiss-gpu
!pip install -q langchain langchain-community pymupdf

## Set Locale if running in Google Colab

In [2]:
# If running in Google Colab, you may need to run this cell to make sure you're using UTF-8 locale to install LangChain
import locale

locale.getpreferredencoding = lambda: "UTF-8"

## Download the NVIDIA Q1-2025 10-Q report and store it in document.pdf

In [4]:
# prompt: download pdf file and store it in doc variable

import requests

url = "https://s201.q4cdn.com/141608511/files/doc_financials/2025/q1/NVIDIA-10Q-20242905.pdf"
response = requests.get(url)

with open("document.pdf", "wb") as f:
  f.write(response.content)

doc = "document.pdf"


## Now, Load the 10-Q report using PyMuPDF Loader

In [None]:
# prompt: langchain load pdf file

from langchain.document_loaders import PyMuPDFLoader

loader = PyMuPDFLoader("document.pdf")
docs = loader.load()

# Print the text of the first page
print(docs[0].page_content)


## Chunk the documents with some overlap of 30 characters

In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=30)

chunked_docs = splitter.split_documents(docs)

## Now load the chunks into vector DB FAISS from langchain

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

db = FAISS.from_documents(chunked_docs, HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5"))

## Now create retriever from the vector DB

In [9]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})

## Import the Torch and transformers and load the Zephyr Model with quantization for 4-bit

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_name = "HuggingFaceH4/zephyr-7b-beta"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)

## Create a HF pipeline for this model and create chain of operations

In [None]:
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import StrOutputParser

text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=400,
)

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

prompt_template = """
<|system|>
Answer the question based on your knowledge. Use the following context to help:

{context}

</s>
<|user|>
{question}
</s>
<|assistant|>

 """

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

llm_chain = prompt | llm | StrOutputParser()

## Now create RAG chain of operation with retriever for the external data source

In [12]:
from langchain_core.runnables import RunnablePassthrough

retriever = db.as_retriever()

rag_chain = {"context": retriever, "question": RunnablePassthrough()} | llm_chain

## Type your question here and get the output from the RAG chain

In [13]:
question = "What's the EPS, Asset and Liababilities for this quarter for NVDA?"
output = rag_chain.invoke(question)

## Display the output with doing regex to lookup the right format

In [14]:
# prompt: strip everything before <|assistant|> in output

import re

def strip_before_assistant(output):
  """Strips everything before <|assistant|> in the output string.

  Args:
    output: The output string.

  Returns:
    The output string with everything before <|assistant|> removed.
  """

  # Use a regular expression to find the first occurrence of <|assistant|>
  match = re.search(r"<\|assistant\|>", output)

  # If the pattern is found, return the substring after the match
  if match:
    return output[match.end():]

  # Otherwise, return the original string
  return output

stripped_output = strip_before_assistant(output)

print(stripped_output)




  Based on the provided context, here is the information you need:

- EPS (earnings per share): The condensed consolidated statements of income provided in the document show that NVIDIA reported net income of $14,881 million for the three months ended April 28, 2024. Assuming no new shares were issued during this time, dividing this net income by the weighted average number of common shares outstanding would give us the earnings per share (EPS). However, the document does not provide the weighted average number of common shares outstanding for this quarter. You may need to refer to previous filings or contact the company directly for this information.

- Assets: According to the condensed consolidated balance sheets provided, as of April 28, 2024, NVIDIA had total assets of $53,729 million.

- Liabilities: According to the notes to condensed consolidated financial statements, as of April 28, 2024, NVIDIA had accrued and other current liabilities of $11,565 million, which includes ite