In [66]:
# pip install llama-index llama-index-embeddings-huggingface llama-index-llms-huggingface gradio transformers torch

from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

from llama_index.core import VectorStoreIndex, get_response_synthesizer, Settings
from llama_index.core.schema import TextNode
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

import torch, gradio as gr

from openai import OpenAI
from dotenv import load_dotenv
import os
from IPython.display import display, Markdown


# 1. Local embeddings
# Settings.embed_model = HuggingFaceEmbedding("sentence-transformers/all-MiniLM-L6-v2")

# 2. Local LLM (tiny model for CPU)
# model_name = "google/flan-t5-small"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Settings.llm = HuggingFaceLLM(model=model, tokenizer=tokenizer, max_new_tokens=128,context_window=512,generate_kwargs={"temperature": 0.1})

# 3. Load documents
docs = SimpleDirectoryReader("Data/").load_data()



In [67]:
# import sk from .env file

load_dotenv()

# connect to openai API
client = OpenAI(api_key=os.getenv("BOBAD_OPENAI_KEY"))

In [68]:
# 4. Build index
index = VectorStoreIndex.from_documents(docs)

print(f"Embedding Model: {index._embed_model.model_name}")
print(f"Index Size: {len(index.vector_store.data.embedding_dict)}")
first_key = list(index.vector_store.data.embedding_dict.keys())[0]
first_embedding = index.vector_store.data.embedding_dict[first_key]
print(f"First embedding shape: {len(first_embedding)}")



Embedding Model: sentence-transformers/all-MiniLM-L6-v2
Index Size: 20
First embedding shape: 384


In [69]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# changing embedding model
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

In [70]:
index = VectorStoreIndex(docs)

print(f"Embedding Model: {index._embed_model.model_name}")
print(f"Index Size: {len(index.vector_store.data.embedding_dict)}")
first_key = list(index.vector_store.data.embedding_dict.keys())[0]
first_embedding = index.vector_store.data.embedding_dict[first_key]
print(f"First embedding shape: {len(first_embedding)}")

Embedding Model: BAAI/bge-small-en-v1.5
Index Size: 20
First embedding shape: 384


In [71]:
# configure retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=10,
)

In [72]:
results = retriever.retrieve("what is Domain Driven Design?")

In [73]:
display(results)

[NodeWithScore(node=Document(id_='d4ea7033-6aaf-4e49-bcd3-482ee16b0823', embedding=None, metadata={'page_label': '1', 'file_name': 'A Crash Course on Domain-Driven Design.pdf', 'file_path': '/Users/dubeys/Documents/ABB/CortexLab/Assignment-3/Data/A Crash Course on Domain-Driven Design.pdf', 'file_type': 'application/pdf', 'file_size': 4062016, 'creation_date': '2024-10-08', 'last_modified_date': '2024-10-08'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='A Crash Course on Domain-Driven Design\nAUG 01, 2024\n1 21 Share\nDeveloping so\x00ware for complex domains is a challenging task.\xa0\nAs the complexity of the problem domain grows, it

In [74]:
# format results in markdown
results_markdown = ""
for i, result in enumerate(results, start=1):
    # results_markdown += f"{i}. **Article title:** {result.metadata["article"]}  \n"
    # results_markdown += f"   **Section:** {result.metadata["section"]}  \n"
    results_markdown += f"   **Snippet:** {result.text}  \n\n"

ValueError: Node must be a TextNode to get text.

In [75]:
display(Markdown(results_markdown))



In [76]:
# 5. Query engine
# query_engine = index.as_query_engine(response_mode="compact")



In [77]:
# configure response synthesizer
response_synthesizer = get_response_synthesizer()

In [78]:
# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
)

In [81]:
response = query_engine.query("what is Domain-Driven Design ?")
print(response)


Empty Response


In [82]:
print(f"LLM: {Settings.llm.model}")
from llama_index.llms.openai import OpenAI

AttributeError: 'HuggingFaceLLM' object has no attribute 'model'

In [59]:
# changing the global LLM
Settings.llm = OpenAI("gpt-4o")
print(f"LLM: {Settings.llm.model}")

TypeError: OpenAI.__init__() takes 1 positional argument but 2 were given

In [62]:
# simpler way to make query engine
query_engine = index.as_query_engine()
response = query_engine.query("what is Domain-Driven Design?")
print(response)

Empty Response


In [None]:
print(f"LLM: {Settings.llm.model}")

In [None]:
# 6. Gradio UI
def rag_chat(user_input: str) -> str:
    return str(query_engine.query(user_input))

iface = gr.Interface(
    fn=rag_chat,
    inputs="text",
    outputs="text",
    title="📚 Offline RAG Chatbot",
    description="Ask questions about your local documents (embeddings + LLM are 100% local)."
)

iface.launch()