In [None]:
from datetime import datetime

def print_current_time():
    current_time = datetime.now()
    formatted_time = current_time.strftime("%H:%M:%S")
    print("Current Time:", formatted_time)

print_current_time()

# Imports

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

from langchain_community.document_loaders import UnstructuredPDFLoader
from IPython.display import display as Markdown

from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.vectorstores import Chroma

from langchain.embeddings.base import Embeddings

from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.retrievers.multi_query import MultiQueryRetriever

from langchain_huggingface import HuggingFaceEndpoint

In [None]:
# Unnecessary if no protobuf compatibility issues
import os
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

# PDF

In [None]:
print_current_time()

In [None]:
local_path = "test-doc.pdf"

if local_path:
  loader = UnstructuredPDFLoader(file_path=local_path)
  data = loader.load()
else:
  print("Upload a PDF file")

In [None]:
display(Markdown(data[0].page_content))

# Vector Embeddings

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
chunks = text_splitter.split_documents(data)

In [None]:
model_name = "meta-llama/Llama-3.2-1B"
access_token = "insert_your_access_token_here"

tokenizer = AutoTokenizer.from_pretrained(model_name, token=access_token)
model = AutoModelForCausalLM.from_pretrained(model_name, token=access_token)

tokenizer.pad_token = tokenizer.eos_token

In [None]:
class LlamaEmbeddings(Embeddings):
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def embed_documents(self, texts):
        inputs = self.tokenizer(texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
        
        with torch.no_grad():
            outputs = self.model(**inputs, output_hidden_states=True)
            hidden_states = outputs.hidden_states
            
            last_hidden_state = hidden_states[-1]
            embeddings = last_hidden_state.mean(dim=1).cpu().numpy()

            embeddings = embeddings.tolist()
        
        return embeddings

    def embed_query(self, query):
        return self.embed_documents([query])[0]

In [None]:
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=LlamaEmbeddings(model, tokenizer),
    collection_name="local-rag"
)

# Retrieval

In [None]:
llm = HuggingFaceEndpoint(repo_id=model_name, huggingfacehub_api_token=access_token)

In [None]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [None]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

In [None]:
prompt = ChatPromptTemplate.from_template(template)

# Prompt

In [None]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
print_current_time()

In [None]:
chain.invoke("What are the 5 pillars of global cooperation? Include an explanation of each pillar")

In [None]:
print_current_time()