In [1]:
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Milvus
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter

loader = PyPDFLoader("samsungreport.pdf") #wget https://images.samsung.com/is/content/samsung/assets/global/ir/docs/2023_con_quarter04_all.pdf
pages = loader.load_and_split()

In [2]:
len(pages)

91

In [3]:
local_embedding_model="all-MiniLM-L6-v2" #git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
embeddings = HuggingFaceEmbeddings(model_name=local_embedding_model)

In [4]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(pages)

In [5]:
vector_db = Milvus.from_documents(
    docs,
    embeddings,
    collection_name="samsungreport2023",
    connection_args={"host": "my-release-milvus.milvus.svc.cluster.local", "port": "19530"},
)

In [6]:
len(docs)

91

In [8]:
print(f"Default collection name - {vector_db.collection_name}")
print(f"Default search params - {vector_db.search_params}")
print(f"Default index params - {vector_db.index_params}")

Default collection name - samsungreport2023
Default search params - {'metric_type': 'L2', 'params': {'ef': 10}}
Default index params - {'metric_type': 'L2', 'index_type': 'HNSW', 'params': {'M': 8, 'efConstruction': 64}}


In [9]:
docs[1]

Document(page_content='Contents  \n \n \n Page  \n  \nIndependent Auditors’ Report  1 \n  \nConsolidated Statements of Financial Position  4 \n  \nConsolidated Statements of Profit or Loss  7 \n  \nConsolidated Statements of Comprehensive Income  8 \n  \nConsolidated Statements of Changes in Equity  9 \n  \nConsolidated Statements of Cash Flows  13 \n  \nNotes to the Consolidated Financial Statements  15', metadata={'source': 'samsungreport.pdf', 'page': 1})

In [None]:
vector_db.col

<Collection>:
-------------
<name>: samsungreport2023
<description>: 
<schema>: {'auto_id': True, 'description': '', 'fields': [{'name': 'source', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 65535}}, {'name': 'page', 'description': '', 'type': <DataType.INT64: 5>}, {'name': 'text', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 65535}}, {'name': 'pk', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': True}, {'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 384}}], 'enable_dynamic_field': False}

In [12]:
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate

template = """Question: {question}
Answer: Let's work this out in a step by step way to be sure we have the right answer"""
prompt = PromptTemplate.from_template(template)
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

local_path = (
    "gpt4all-falcon-newbpe-q4_0.gguf"  #wget https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf
)
llm = LlamaCpp(
    model_path=local_path,
    temperature=0.75,
    max_tokens=2000,
    top_p=1,
    n_ctx=4096,
    callback_manager=callback_manager,
    verbose=True,
)

llama_model_loader: loaded meta data with 18 key-value pairs and 196 tensors from gpt4all-falcon-newbpe-q4_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = falcon
llama_model_loader: - kv   1:                               general.name str              = Falcon
llama_model_loader: - kv   2:                      falcon.context_length u32              = 2048
llama_model_loader: - kv   3:                  falcon.tensor_data_layout str              = jploski
llama_model_loader: - kv   4:                    falcon.embedding_length u32              = 4544
llama_model_loader: - kv   5:                 falcon.feed_forward_length u32              = 18176
llama_model_loader: - kv   6:                         falcon.block_count u32              = 32
llama_model_loader: - kv   7:                falcon.attention.head_count u32

In [13]:
question = """
Question: What is the actual Samsung's revenue in 2023?
"""
llm.invoke(question)

Answer: I am sorry, I do not have access to real-time financial data. However, according to Samsung's Q2 2023 earnings report, the company reported a total operating revenue of KRW 17.6 trillion (approximately $14.5 billion USD) for that period.


llama_print_timings:        load time =     577.02 ms
llama_print_timings:      sample time =      27.53 ms /    64 runs   (    0.43 ms per token,  2324.74 tokens per second)
llama_print_timings: prompt eval time =    1215.46 ms /    18 tokens (   67.53 ms per token,    14.81 tokens per second)
llama_print_timings:        eval time =    7692.15 ms /    63 runs   (  122.10 ms per token,     8.19 tokens per second)
llama_print_timings:       total time =    9347.06 ms /    81 tokens


"Answer: I am sorry, I do not have access to real-time financial data. However, according to Samsung's Q2 2023 earnings report, the company reported a total operating revenue of KRW 17.6 trillion (approximately $14.5 billion USD) for that period."

In [15]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs={
        "refine_prompt": QA_CHAIN_PROMPT
        }

qa_chain = RetrievalQA.from_chain_type(
    llm,
    chain_type="refine",
    retriever=vector_db.as_retriever(search_type="mmr", search_kwargs={"k": 1}),
    return_source_documents=False,
    callbacks=None,
    chain_type_kwargs={"refine_prompt": QA_CHAIN_PROMPT,"verbose":True},
    verbose=True
)

question = "What is Samsung's revenue in 2023 in KRW?"
result = qa_chain({"query": question}) # must be query
result["result"]



[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mContext information is below. 
------------
Samsung Electronics Co., Ltd. and its subsidiaries  
 
CONSOLIDATED STATEMENTS OF PROFIT OR LOSS  
 
 
The above consolidated statement s of profit or loss  should be read in conjunction with the accompanying notes.  
  
- 7 - (In millions of Korean w on, in thousands of US dollars (Note  2.18)) 
 
    For the year s ended December 31,  
  Notes  2023 2022 2023 2022 
  KRW  KRW  USD  USD  
      
Revenue  29 258,935,494  302,231,360  198,247,859  231,396,319  
Cost of sales  21 180,388,580  190,041,770  138,110,266  145,501,003  
Gross profit   78,546,914  112,189,590  60,137,593  85,895,316  
Selling and administrative expenses  21, 22  71,979,938  68,812,960  55,109,743  52,685,021  
Operating profit  29 6,566,976  43,376,630  5,027,850  33,210,295  
Other non -

Llama.generate: prefix-match hit


Samsung's revenue in 2023 in KRW is 29,258,935,494.


llama_print_timings:        load time =     577.02 ms
llama_print_timings:      sample time =      10.19 ms /    22 runs   (    0.46 ms per token,  2160.04 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    3439.45 ms /    22 runs   (  156.34 ms per token,     6.40 tokens per second)
llama_print_timings:       total time =    3626.21 ms /    23 tokens



[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


"Samsung's revenue in 2023 in KRW is 29,258,935,494."