In [None]:
! pip install langchain_community chromadb sentence-transformers chromadb

In [None]:
# Imports
import os
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from sentence_transformers import SentenceTransformer
import subprocess


In [None]:
#### INDEXING ####

# 1. Load Documents (same as tutorial, you can replace the URL with local files)
docs = [
    {"content": "My first dogs name was Shadow."},
    {"content": "Shadow was a black dog with a white patch on his chest."}
]

# 2. Split Documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_text(" ".join(doc['content'] for doc in docs))
print("Splits:", splits)

In [None]:
# 3. Generate Local Embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")  # Fast, lightweight model all-mpnet-base-v2 larger more accurate model
embeddings = [embedding_model.encode(split) for split in splits]
print("Embeddings:", embeddings)

In [6]:
# 4. Store Embeddings Locally (using ChromaDB)
import chromadb
#
client = chromadb.Client()

# Initialize Chroma client (persistent or in-memory)
# client = chromadb.Client(Settings(persist_directory="chroma_storage", chroma_db_impl="duckdb+parquet"))

# Create a collection in Chroma
collection = client.get_or_create_collection("rag_local_demo")

# Add documents and embeddings
for i, split in enumerate(splits):
    collection.add(
        ids=[str(i)],
        documents=[split],
        embeddings=[embeddings[i]]
    )

In [None]:
#### RETRIEVAL ####

# Query Embedding
query = "What was the name of my first dog and what was their appearance?"
query_embedding = embedding_model.encode(query)

# Retrieve relevant documents
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=5  # Retrieve top 5 matches
)
print("Retrieved Documents:", results["documents"])

In [None]:
#### GENERATION ####

# 5. Use llama.cpp for Local LLM Inference
def query_llama_cpp(input_text):
    llama_path = "/home/fasz/tools/llama.cpp/build/bin/llama-cli"  # Path to llama.cpp binary
    #model_path = "/home/fasz/tools/models/TheBloke/Wizard-Vicuna-13B-Uncensored-GGUF/Wizard-Vicuna-13B-Uncensored.Q4_K_S.gguf"
    model_path = "/home/fasz/tools/models/TheBloke/Wizard-Vicuna-30B-Uncensored-GGUF/Wizard-Vicuna-30B-Uncensored.Q4_K_M.gguf"

    command = [
        llama_path,
        "--model", model_path,
        "--prompt", input_text,
        "--temp", "0.7",        # Adjust temperature
        "--predict", "512",     # Number of tokens to predict
        "--threads", "14",      # Number of CPU threads
        "--gpu-layers", "10",   # Number of layers to store in VRAM
        "--verbose",
    ]
    
    result = subprocess.run(command, capture_output=True, text=True)
    return result.stdout.strip()


# Generate Answer
retrieved_docs = "\n".join([doc[0] for doc in results["documents"]])
query_text = f"Context:\n{retrieved_docs}\n\nQuestion: {query}\nAnswer:"
response = query_llama_cpp(query_text)
print("Generated Response:", response)

In [None]:
from langchain.prompts import ChatPromptTemplate

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

In [None]:
retrieved_docs = "\n".join([doc[0] for doc in results["documents"]])
query_text = prompt.format(context=retrieved_docs, question=query)
response = query_llama_cpp(query_text)
print("Generated Response:", response)

In [None]:
! pip install tiktoken

In [None]:
import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens
question = "What was the name of my first dog and what was their appearance?"
num_of_tokens = num_tokens_from_string(question, "cl100k_base")
print(f"Number of tokens in question: {num_of_tokens}")

In [None]:
import numpy as np

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)
document = "My first birds name was Tweety."
question = "What was the name of my first bird?"
document_embedding = embedding_model.encode(document)
question_embedding = embedding_model.encode(question)
similarity = cosine_similarity(question_embedding, document_embedding)
print("Cosine Similarity:", similarity)

In [None]:
! pip install beautifulsoup4

In [137]:
#### INDEXING ####

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

In [138]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)

In [None]:
collection = client.get_or_create_collection(name="demo_documents")
documents = [split.page_content for split in splits]
metadatas = [split.metadata for split in splits]  # Optional metadata
embeddings = [embedding_model.encode(doc) for doc in documents]
ids = [f"doc_{i}" for i in range(len(documents))]
collection.add(
    ids=ids,
    documents=documents,
    embeddings=embeddings,
    metadatas=metadatas
)


In [None]:
# Query the collection
query = "What is an autonomous agent?"
#query = "What is faszrohdan?"
query_embedding = embedding_model.encode(query)

# Retrieve top 5 matching documents
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=5
)
documents = results["documents"]
# Print retrieved documents
print("Retrieved Documents:")
for doc in documents:
    print(doc)

In [None]:
# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
# Prompt
#template = """Try to answer the question based only on the following context or if the answer is not in context say "I don't know":
#{context}
#
#Qestion: {question}
#"""
prompt = ChatPromptTemplate.from_template(template)
prompt

In [None]:
retrieved_docs = "\n".join([doc[0] for doc in documents])
retrieved_docs

In [143]:
query_text = prompt.format(context=retrieved_docs, question=query)

In [None]:
response = query_llama_cpp(query_text)
print("Generated Response:", response)