![](https://miro.medium.com/v2/resize:fit:1400/format:webp/1*pt3GvRsCd12vtlolavMSZg.png)

In [1]:
import langchain
from langchain.embeddings import CacheBackedEmbeddings,HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.retrievers import BM25Retriever,EnsembleRetriever
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.llms import HuggingFacePipeline
from langchain.cache import InMemoryCache
from langchain.chains import RetrievalQA
from langchain.callbacks import StdOutCallbackHandler
from langchain import PromptTemplate
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, GPTQConfig
from langchain.docstore.document import Document
from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader
from dotenv import load_dotenv
import nest_asyncio
nest_asyncio.apply()
load_dotenv()



True

In [2]:
# set up llama parser
parser = LlamaParse(
    result_type="markdown"  # "markdown" and "text" are available
)
# use SimpleDirectoryReader to parse our file
file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader(input_files=['files/report.pdf'], file_extractor=file_extractor).load_data()
print(documents)

Started parsing the file under job_id 88915480-6746-4f30-8ece-b658339fa8b3
[Document(id_='db3c53de-466a-4fe2-9c30-c848468ef1ae', embedding=None, metadata={'file_path': 'files\\report.pdf', 'file_name': 'report.pdf', 'file_type': 'application/pdf', 'file_size': 1073470, 'creation_date': '2025-01-13', 'last_modified_date': '2024-12-19'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='# UNITED STATES SECURITIES AND EXCHANGE COMMISSION\n\n# Washington, D.C. 20549\n\n# FORM 10-K\n\n# (Mark One)\n\n☒ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n\nFor the fiscal year ended December 31, 2023\n\nOR\n\n☐ TRANSITION REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934\n\nFor the transition period fr

In [3]:
# create vector db
docs = [Document(page_content=documents[i].text,metadata=documents[i].metadata) for i in range(len(documents))]
store = LocalFileStore("./cache/")
embed_model_id = 'BAAI/bge-small-en-v1.5'
core_embeddings_model = HuggingFaceEmbeddings(model_name=embed_model_id)
embedder = CacheBackedEmbeddings.from_bytes_store(core_embeddings_model,
                                                  store,
                                                  namespace=embed_model_id)
# Create VectorStore
vectorstore = FAISS.from_documents(docs,embedder)

  core_embeddings_model = HuggingFaceEmbeddings(model_name=embed_model_id)


In [4]:
# Setup Ensemble Retriever (Hybrid Search)
bm25_retriever = BM25Retriever.from_documents(docs)
bm25_retriever.k=5
faiss_retriever = vectorstore.as_retriever(search_kwargs={"k":5})
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever,faiss_retriever],
                                       weights=[0.5,0.5])

In [31]:
# Download the quantized GPTQ Model
# post training quantization - trained llm model - quantization 
# full trained LLM weights - fp16 16 bit - 7b -> 4x 28b memory 
# PTQ change the weight fp16 -> 4 bit, weights in memory -> 4bit, inference change back to fp16, 4 bit - > 16 loss, performance decrease
# llama.cpp - c++ - gguf https://github.com/ggerganov/llama.cpp
# microsort - bitnet -  1 bit  https://github.com/microsoft/BitNet
model_name_or_path = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
# To use a different branch, change revision
# For example: revision="gptq-8bit-32g-actorder_True"
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
# you can set you own dataset for GPTQ config
gptq_config = GPTQConfig(bits=4,tokenizer=tokenizer,dataset=documents)
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             device_map="auto",
                                             trust_remote_code=True,
                                             revision="gptq-4bit-32g-actorder_True",
                                             max_memory={0:"8GiB"},
                                             quantization_config=gptq_config)



In [32]:
langchain.llm_cache = InMemoryCache()
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    do_sample=True,
    temperature=0.8,
    top_p=0.95,
    top_k=10,
    repetition_penalty=1.1
)

from langchain.llms import HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=pipe)

In [33]:
# prompt template
PROMPT_TEMPLATE = '''
You are my financial advisor. You are great at providing analyze on financial reports with your knowledge in finances.
With the information being provided try to answer the question. 
If you cant answer the question based on the information either say you cant find an answer or unable to find an answer.
So try to understand in depth about the context and answer only based on the information provided. Dont generate irrelevant answers
If you do not know the answer, please output N/A only. Do not make up answer

Context: {context}
Question: {question}
Do provide only helpful answers

Helpful answer:
'''
#
input_variables = ['context', 'question']
#
custom_prompt = PromptTemplate(template=PROMPT_TEMPLATE,
                            input_variables=input_variables)

In [34]:
# Retrieval chain — without Hybrid Search
handler = StdOutCallbackHandler()
qa_with_sources_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = vectorstore.as_retriever(search_kwargs={"k":5}),
    verbose=True,
    callbacks=[handler],
    chain_type_kwargs={"prompt": custom_prompt},
    return_source_documents=True
)

In [35]:
def process_output(text):
    text = text.split('Helpful answer:')
    return text[-1].strip()

In [None]:
%%time
query = "what is the Revenue for 2021?"
response = qa_with_sources_chain({"query":query})
print(f"Response generated : \n {process_output(response['result'])}")
print('####################################################')
print(f"Source Documents : \n {response['source_documents'][0].page_content}")



[1m> Entering new RetrievalQA chain...[0m


In [17]:
%%time
query = "what is the Revenue on the financial year 2022 from UCAN?"
response = qa_with_sources_chain({"query":query})
print(f"Response generated : \n {process_output(response['result'])}")
print('####################################################')
print(f"Source Documents : \n {response['source_documents'][0].page_content}")



[1m> Entering new RetrievalQA chain...[0m


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



[1m> Finished chain.[0m
Response generated : 
 The revenue generated from UCAN (United States and Canada) for the year ended December 31, 2022 was $14,084,643.
####################################################
Source Documents : 
 # Table of Contents

# Marketing

|Year Ended December 31|2023|2022|2021|Change|
|---|---|---|---|---|
|(in thousands, except percentages)|$2,657,883|$2,530,502|$2,545,146|$127,381 (5%)|

As a percentage of revenues: 8% (2023), 8% (2022), 9% (2021)

The increase in marketing expenses for the year ended December 31, 2023 as compared to the year ended December 31, 2022 was primarily due to a $146 million increase in advertising expenses and a $21 million increase in personnel-related costs, partially offset by a $39 million decrease in payments to our marketing partners.

# Technology and Development

Technology and development expenses consist primarily of payroll, stock-based compensation, facilities, and other related expenses for technology personnel 

In [22]:
query = "Risks Related to Intellectual Property"
response = qa_with_sources_chain({"query":query})
print(f"Response generated : \n {process_output(response['result'])}")
print('####################################################')
print(f"Source Documents : \n {response['source_documents'][0].page_content}")



[1m> Entering new RetrievalQA chain...[0m


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



[1m> Finished chain.[0m
Response generated : 
 Yes, intellectual property rights are important to us and other companies. Our intellectual property rights extend to our technology, business processes, the content we produce and distribute through our service, and consumer products, experiences, and marketing assets based thereon. We use the intellectual property of third parties in creating some of our content, merchandising our products and marketing our service. From time to time, third parties allege that we have infringed or otherwise violated their intellectual property rights. If we are unable to obtain sufficient rights, successfully defend our use, or develop non-infringing technology or otherwise alter our business practices on a timely basis in response to claims against us for infringement, misappropriation, misuse or other violation of third-party intellectual property rights, our business and competitive position may be adversely affected. In addition, the use or adopti

In [12]:
#Setup Retrieval chain — with Hybrid Search
handler = StdOutCallbackHandler()
qa_with_sources_chain_hb = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = ensemble_retriever,
    callbacks=[handler],
    chain_type_kwargs={"prompt": custom_prompt},
    return_source_documents=True
)

In [15]:
%%time
query = "what is the Revenue for 2022?"
response = qa_with_sources_chain_hb({"query":query})
print(f"Response generated : \n {process_output(response['result'])}")
print('####################################################')
print(f"Source Documents : \n {response['source_documents'][0].page_content}")

# aws dynamodb, aws redis, sqlite 
# key - value pair
# key - vectors , value - question + answer
# create some ground truth q/a pairs -> embedding -> insert to dynamodb 
# when user ask questions -> dynamodb to find similar q/a



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Response generated : 
 The revenue for 2022 was $11.0 billion.
####################################################
Source Documents : 
 # Table of Contents

# Revenue Recognition

presented net of the taxes that are collected from members and remitted to governmental authorities. The Company is the principal in all its relationships where partners, including consumer electronics ("CE") manufacturers, multichannel video programming distributors ("MVPDs"), mobile operators and internet service providers ("ISPs"), provide access to the service as the Company retains control over service delivery to its members. In circumstances in which the price that the member pays is established by a partner and there is no standalone price for the Netflix service (for instance, in a bundle), the net amount collected from the partner is recognized as revenue.

The Company also earns revenue from advertisements presented on its st

In [16]:
%%time
query = "what is the Revenue on the financial year 2022 from UCAN?"
response = qa_with_sources_chain_hb({"query":query})
print(f"Response generated : \n {process_output(response['result'])}")
print('####################################################')
print(f"Source Documents : \n {response['source_documents'][0].page_content}")



[1m> Entering new RetrievalQA chain...[0m


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



[1m> Finished chain.[0m
Response generated : 
 The table shows that the revenue generated from UCAN in 2022 was $1,456,985,200.
####################################################
Source Documents : 
 # Table of Contents

# Interest expense for the year ended December 31, 2023

Interest expense for the year ended December 31, 2023 consisted primarily of $698 million of interest on our Notes. Interest expense for the year ended December 31, 2023 as compared to the year ended December 31, 2022 remained relatively flat.

# Interest and Other Income (Expense)

Interest and other income (expense) consists primarily of foreign exchange gains and losses on foreign currency denominated balances and interest earned on cash, cash equivalents and short-term investments.

|Year Ended December 31,|2023|2022|2021|2023 vs. 2022|
|---|---|---|---|---|
|Interest and other income (expense)|$ (48,772)|$ 337,310|$ 411,214|$ (386,082) (114%)|
|As a percentage of revenues|— %|1 %|1 %| |

Interest and ot

In [21]:
query = "Risks Related to Intellectual Property"
response = qa_with_sources_chain_hb({"query":query})
print(f"Response generated : \n {process_output(response['result'])}")
print('####################################################')
print(f"Source Documents : \n {response['source_documents'][0].page_content}")



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Response generated : 
 Yes, the long-term and largely fixed cost nature of our content commitments may limit our operating flexibility and could adversely affect our liquidity and results of operations. In connection with licensing streaming content, we typically enter into multi-year commitments with studios and other content providers. We also enter into multi-year commitments for content that we produce, either directly or through third parties, including elements associated with these productions such as non-cancelable commitments under talent agreements. The payment terms of these agreements are not tied to member usage or the size of our membership base (“fixed cost”) but may be determined by costs of production or tied to such factors as titles licensed and/or theatrical exhibition receipts. Such commitments, to the extent estimable under accounting standards, are included in the Contractual Obligations sec

In [34]:
langchain.llm_cache.clear()