In [1]:
__import__("pysqlite3")
import sys

sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader

In [3]:
loader = PyPDFLoader("data/pdf/intel_q1_2024_earnings.pdf")
data = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [5]:
all_splits[0]

Document(metadata={'source': 'data/pdf/intel_q1_2024_earnings.pdf', 'page': 0}, page_content='Intel Corporation\n2200 Mission College Blvd.\nSanta Clara, CA 95054-1549\n                                                         \nNews Release\n Intel Reports First -Quarter 2024  Financial Results\nNEWS SUMMARY\n▪First-quarter revenue of $12.7 billion , up 9%  year over year (YoY).\n▪First-quarter GAAP earnings (loss) per share (EPS) attributable to Intel was $(0.09) ; non-GAAP EPS \nattributable to Intel was $0.18 .')

In [6]:
from langchain_chroma import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings

In [7]:
model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
gpt4all_kwargs = {"allow_download": "True"}
embeddings = GPT4AllEmbeddings(model_name=model_name, gpt4all_kwargs=gpt4all_kwargs)

Failed to load libllamamodel-mainline-cuda.so: dlopen: libcuda.so.1: cannot open shared object file: No such file or directory
Failed to load libllamamodel-mainline-cuda-avxonly.so: dlopen: libcuda.so.1: cannot open shared object file: No such file or directory


In [8]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)

In [9]:
question = "What is Intel CCG revenue in Q1 2024"
docs = vectorstore.similarity_search(question)
print(len(docs))

4


In [10]:
docs[0]

Document(metadata={'page': 1, 'source': 'data/pdf/intel_q1_2024_earnings.pdf'}, page_content='Client Computing Group (CCG) $7.5 billion up31%\nData Center and AI (DCAI) $3.0 billion up5%\nNetwork and Edge (NEX) $1.4 billion down 8%\nTotal Intel Products revenue $11.9 billion up17%\nIntel Foundry $4.4 billion down 10%\nAll other:\nAltera $342 million down 58%\nMobileye $239 million down 48%\nOther $194 million up17%\nTotal all other revenue $775 million down 46%\nIntersegment eliminations $(4.4) billion\nTotal net revenue $12.7 billion up9%\nIntel Products Highlights')

In [11]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import LlamaCpp

In [12]:
llm = LlamaCpp(
    model_path="model/llama-2-7b-chat.Q8_0.gguf",
    n_gpu_layers=-1,
    n_batch=512,
    n_ctx=2048,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=CallbackManager([]),
    verbose=True,
)

ValidationError: 1 validation error for LlamaCpp
__root__
  Could not load Llama model from path: model/llama-2-7b-chat.Q8_0.gguf. Received error Model path does not exist: model/llama-2-7b-chat.Q8_0.gguf (type=value_error)

In [None]:
llm.invoke(question)

?

According to the latest earnings report, Intel's Client Computing Group (CCG) revenue for Q1 2024 was $8.7 billion.


llama_print_timings:        load time =    1039.73 ms
llama_print_timings:      sample time =      16.58 ms /    41 runs   (    0.40 ms per token,  2473.46 tokens per second)
llama_print_timings: prompt eval time =    1039.68 ms /    16 tokens (   64.98 ms per token,    15.39 tokens per second)
llama_print_timings:        eval time =   15288.59 ms /    40 runs   (  382.21 ms per token,     2.62 tokens per second)
llama_print_timings:       total time =   16400.48 ms /    56 tokens


"?\n\nAccording to the latest earnings report, Intel's Client Computing Group (CCG) revenue for Q1 2024 was $8.7 billion."

In [None]:
from langchain import hub

rag_prompt = hub.pull("rlm/rag-prompt")
rag_prompt.messages

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
retriever = vectorstore.as_retriever()
qa_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [None]:
result = qa_chain.invoke("what is Intel DCAI revenue in Q1 2024?")

Llama.generate: prefix-match hit


 According to the provided context, Intel DCAI revenue in Q1 2024 was $3.0 billion, up 5% from the same quarter last year.


llama_print_timings:        load time =    1039.73 ms
llama_print_timings:      sample time =      15.57 ms /    39 runs   (    0.40 ms per token,  2504.98 tokens per second)
llama_print_timings: prompt eval time =   35053.82 ms /   745 tokens (   47.05 ms per token,    21.25 tokens per second)
llama_print_timings:        eval time =   11737.83 ms /    38 runs   (  308.89 ms per token,     3.24 tokens per second)
llama_print_timings:       total time =   46844.00 ms /   783 tokens
