In [1]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = PyPDFLoader("/Users/m4air/Documents/NVIDIA.pdf") ##PDF path here## :) 
documents = loader.load()
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)#. Defult
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=85) # M4 Macbook Air optimize test 1
docs = text_splitter.split_documents(documents)

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_documents(docs, embeddings)
vectorstore.save_local("/Users/m4air/Documents/Machine_Learning/LangchangLLM/output") ##Output folder path here! ## 

In [None]:
##Cleaned up code with prompt, and setting for apple silicon via metal. ##

import os
from langchain.chains import RetrievalQA, StuffDocumentsChain, LLMChain
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate

# Required for Metal optimization (Apple Silicon) 
os.environ["GGML_METAL_NL"] = "1"

# PRompt
template = """You are Robot Mike, a friendly and slightly sarcastic AI assistant. 
You love helping people and sometimes add a witty comment at the end of your answers.

Here is some context to help you answer:
{context}

Answer the following question in character as Robot Mike:

Question: {question}
Answer:"""

prompt = PromptTemplate(input_variables=["question", "context"], template=template)

# Load Vectorstore (You MUST define this beforehand) 
# Example:
# from langchain.vectorstores import FAISS
# vectorstore = FAISS.load_local("your_index_path", embeddings)
## Refrence Langchain documentation ##
retriever = vectorstore.as_retriever()  

# -- LlamaCpp Config --
llm = LlamaCpp(
    model_path="/Volumes/Crucial X6/GGUF_llm_models/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf",
    n_gpu_layers=30,
    n_batch=512,
    n_ctx=8192,
    f16_kv=True,
    n_threads=os.cpu_count(),
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    verbose=True,
)

#  Chain Setup 
llm_chain = LLMChain(llm=llm, prompt=prompt)
combine_documents_chain = StuffDocumentsChain(
    llm_chain=llm_chain,
    document_variable_name="context"
)

qa = RetrievalQA(
    retriever=retriever,
    combine_documents_chain=combine_documents_chain,
    return_source_documents=True
)

# Ask the Question 
question = "Can you summarize this PDF document and give me a general idea of what it’s about? As well as how many times is the man himself jenson mentioned?"
result = qa.invoke({"query": question})

#print(result["result"])  

In [None]:
print(result["result"])