In [None]:
# Step 3.1 Load embeddings model
from langchain_community.embeddings import LlamaCppEmbeddings

embedding_model = LlamaCppEmbeddings(model_path="models/all-MiniLM-L6-v2-Q6_K.gguf")

In [None]:
# Step 3.2 load vector database that was persisted in the past 

import chromadb
from langchain.vectorstores import Chroma

persist_folder = 'chroma_db_c1000o200_docs_textbooks'
client = chromadb.PersistentClient(path=persist_folder) 
vectordb = Chroma(
    client=client,
    embedding_function=embedding_model
)

In [None]:
# Step 3.3 load a LLM model

# download LLM models from https://huggingface.co/
#   note: search for gguf models
#         and download .gguf file
#         and make sure the model_path is pointed to it

from langchain_community.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

#models/gemma-3-12b-it-q4_0.gguf
#models/Meta-Llama-3.1-8B-Instruct-Q6_K_L.gguf
#models/google_gemma-3n-E4B-it-Q6_K.gguf
#models/llama-2-7b.Q4_K_M.gguf

## https://magazine.sebastianraschka.com/p/the-big-llm-architecture-comparison

llm = LlamaCpp(
    model_path="models/gemma-3-12b-it-q4_0.gguf",   # <--- make sure this is pointed to your folder/model
    n_gpu_layers=100,
    n_batch=512,
    n_ctx=2048,
    f16_kv=True,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    verbose=False
)


########################################################################################
# Step 3.4 connect the LLM and the vector database for question and answer
########################################################################################

from langchain.chains import RetrievalQA

retriever = vectordb.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    verbose=True
)


In [None]:
# Step 3.5 ask a question

query = """
Which type of bond represents a weak chemical bond?
a. hydrogen bond
b. ionic bond
c. covalent bond
d. polar covalent bond
"""
response = qa.invoke(query)


In [None]:
# step 3.6 (extra) display relevant text retrieved from vectordb

print(f'using chroma db at {persist_folder} to search for relevant text')
results = vectordb.similarity_search_with_score(query)
# Returns List of documents most similar to the query text and cosine distance in float for each. Lower score represents more similarity.
# https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.chroma.Chroma.html#langchain_community.vectorstores.chroma.Chroma.similarity_search_with_relevance_scores


# results is a list []
#   but its contents is a tuple ()
#     the first item in the tuple being the document which contains metadata and page_content
#     the second item in the tuple is the cosine distince

# Cosine Distance:
# Usually, people use the cosine similarity as a similarity metric between vectors. the cosine distance can be defined as follows:
# Cosine Distance = 1 — Cosine Similarity
# The intuition behind this is that if 2 vectors are perfectly the same then the similarity is 1 (angle=0 hence 𝑐𝑜𝑠(𝜃)=1) and thus, distance is 0 (1–1=0).
# https://medium.com/geekculture/cosine-similarity-and-cosine-distance-48eed889a5c4

for i in results:
    print(f'cosine distance {i[1]}\n', i[0].page_content,'\n')

In [None]:
# the end!

# how does a neural network work?
# https://youtu.be/AeM5LgNmNAw?feature=shared&t=141
# https://youtu.be/me4SV_tuMSE?feature=shared&t=312
# https://www.youtube.com/watch?v=aircAruvnKk&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi

# how does an LLM transformer architecture work?
# https://youtu.be/wjZofJX0v4M?feature=shared
# https://youtu.be/bCz4OMemCcA?feature=shared



