In [2]:
# import packages
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.vectorstores.chroma import Chroma
from langchain_huggingface import HuggingFacePipeline
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM

# Below will use HuggingFace - sentence-transformers
# https://huggingface.co/sentence-transformers
from langchain_huggingface import HuggingFaceEmbeddings

In [3]:
# Define directories
pdf_file_dir_path = "custom_data_chatbot/pdfs"
model_path = "custom_data_chatbot/models"

In [4]:
# Load  ................................................................................................................
# Load data from PDF file.
loader = DirectoryLoader(pdf_file_dir_path)

# convert docs in to small chunks for better management
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=1000,
    chunk_overlap=0,
    length_function=len,
    is_separator_regex=False,
)

# load data from pdf and create chunks for better management
pages = loader.load_and_split(text_splitter=text_splitter)

In [5]:
print(pages)

[Document(page_content='\ue905\n\nInvestors\n\nNews\n\nEvents & Presentations\n\nStock Info\n\nFinancial Info\n\nGovernance\n\nInvestor Resources\n\nPress Release Details\n\nVIEW ALL NEWS\n\nNVIDIA Announces Financial Results For First Quarter Fiscal 2024\n\nMay 24, 2023\n\nQuarterly revenue of $7.19 billion, up 19% from previous quarter\n\nRecord Data Center revenue of $4.28 billion\n\nSecond quarter fiscal 2024 revenue outlook of $11.00 billion\n\nSANTA CLARA, Calif., May 24, 2023 (GLOBE NEWSWIRE) -- NVIDIA (NASDAQ: NVDA) today reported revenue for the first quarter ended April 30, 2023, of $7.19\n\nbillion, down 13% from a year ago and up 19% from the previous quarter.\n\nGAAP earnings per diluted share for the quarter were $0.82, up 28% from a year ago and up 44% from the previous quarter. Non-GAAP earnings per diluted share\n\nwere $1.09, down 20% from a year ago and up 24% from the previous quarter.', metadata={'source': 'custom_data_chatbot/pdfs/NVIDIA Corporation.pdf'}), Docume

In [6]:
# load text embedding model from HuggingFaceHub to generate vector embeddings ..........................................
embed_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-l6-v2",
    cache_folder=model_path,
    # cpu because on AWS we are not using GPU
    model_kwargs={
        "device": "cpu",
    },  # make it to "cpu" in case of no GPU
    encode_kwargs={"normalize_embeddings": False},
    multi_process=True,
)

In [7]:
# Store vector embeddings and define retriever .........................................................................
chroma_db = Chroma.from_documents(pages, embed_model, persist_directory=model_path)

retriever = chroma_db.as_retriever(
    search_type="mmr",  # Maximum MArginal Relevance
    search_kwargs={"k": 1},  # max relevan docs to retrieve
)

In [8]:
# Load the pre-trained model and tokenizer .............................................................................
tokenizer = AutoTokenizer.from_pretrained("gpt2", cache_dir=model_path)
model = AutoModelForCausalLM.from_pretrained("gpt2", cache_dir=model_path)

In [9]:
# Define pipeline ......................................................................................................
text_generator = pipeline(
    task="text-generation",
    model=model,
    token="PUT_HERE_HUGGINGFACEHUB_API_TOKEN",
    trust_remote_code=True,
    device_map="auto",  # make it "auto" for auto selection between GPU and CPU, -1 for CPU, 0 for GPU
    tokenizer=tokenizer,
    max_length=1024,  # generate token sequences of 1024 including input and output token sequences
)

ms_dialo_gpt_hf = HuggingFacePipeline(pipeline=text_generator)

In [10]:
# Get Answer ...........................................................................................................
retrievalQA = RetrievalQA.from_llm(
    llm=ms_dialo_gpt_hf,
    retriever=retriever,
    prompt=PromptTemplate(
        input_variables=["context"],
        template="{context}",
    ),
)
print(retrievalQA)

combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['context'], template='{context}'), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7fd8bc835ae0>)), document_prompt=PromptTemplate(input_variables=['page_content'], template='Context:\n{page_content}'), document_variable_name='context') retriever=VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7fd8c4e818a0>, search_type='mmr', search_kwargs={'k': 1})


In [11]:
# get answer
retrievalQA.invoke("Provide NVIDIA’s outlook for the third quarter of fiscal 2024")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{'query': 'Provide NVIDIA’s outlook for the third quarter of fiscal 2024',
 'result': 'Context:\nCommentary on the quarter by Colette Kress, NVIDIA’s executive vice president and chief financial officer, is available at https://investor.nvidia.com/.\n\nConference Call and Webcast Information\n\nNVIDIA will conduct a conference call with analysts and investors to discuss its first quarter fiscal 2024 financial results and current financial prospects today at 2 p.m.\n\nPacific time (5 p.m. Eastern time). A live webcast (listen-only mode) of the conference call will be accessible at NVIDIA’s investor relations website,\n\nhttps://investor.nvidia.com. The webcast will be recorded and available for replay until NVIDIA’s conference call to discuss its financial results for its second quarter of\n\nfiscal 2024.\n\nNon-GAAP Measures\n\nTo supplement NVIDIA’s condensed consolidated financial statements presented in accordance with GAAP, the company uses non-GAAP measures of certain key non-GAAP