In [None]:
from PyPDF2 import PdfReader
from langchain.llms import HuggingFaceTextGenInference
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains.question_answering import load_qa_chain
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.schema import StrOutputParser
import joblib
import os

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
URI="http://139.84.138.178:8080/"
model_id = "meta-llama/Llama-2-13b-chat-hf"
llm = HuggingFaceTextGenInference(inference_server_url=URI,max_new_tokens=2000)

In [None]:
template = """
<s>[INST] <<SYS>>
You are a helpful AI assistant.
Answer based on the context provided. If you cannot find the correct answer, say I don't know. Be concise and just include the response.
<</SYS>>
{context}
Question: {question}
Helpful Answer: [/INST]
"""

In [None]:
prompt = PromptTemplate.from_template(template)

In [None]:
reader = PdfReader('data/steve-jobs.pdf')

In [None]:
raw_text = ''
for i, page in enumerate(reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text

In [None]:
text_splitter = CharacterTextSplitter(        
    separator = "\n",
    chunk_size = 500,
    chunk_overlap  = 20,
    length_function = len,
)
texts = text_splitter.split_text(raw_text)

In [None]:
embeddings_file = "./data/steve-jobs.joblib"
if os.path.exists(embeddings_file):
    embeddings = joblib.load(embeddings_file)
else:
    embeddings = HuggingFaceEmbeddings()
    joblib.dump(embeddings, embeddings_file)

In [None]:
vectorstore = Chroma.from_texts(texts, embeddings)

In [None]:
retriever = vectorstore.as_retriever()

In [None]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | prompt 
    | llm 
)

In [None]:
query="Where did Steve Jobs deliver this speech?"

In [None]:
retriever.get_relevant_documents(query)

In [None]:
print(chain.invoke(query))