<a href="https://colab.research.google.com/github/brem-21/RAG-APP/blob/main/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-0.6.3-py3-none-any.whl.metadata (6.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.13.0-py2.py3-none-any.whl.metadata (2.9 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.20.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.30.0-py3

In [1]:
#%%
import re
from langchain.text_splitter import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter
import chromadb
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
from uuid import uuid4
import requests
import asyncio
import chromadb
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
import torch

In [2]:
def max_token_length(txt_list:list):
    max_length = 0
    for txt in txt_list:
        token_count = len(re.findall(r'\w+', txt))
        if token_count > max_length:
            max_length = token_count
    return f"Max Token Length: {max_length} tokens"

In [3]:
model_max_chunk_length = 256
token_splitter = SentenceTransformersTokenTextSplitter(
    tokens_per_chunk=model_max_chunk_length,
    model_name="all-MiniLM-L6-v2",
    chunk_overlap=0
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
text_path = "/content/paul_graham_essay (1).txt"
with open(text_path, "r", encoding="utf-8") as f:
    text_raw=f.read()

In [6]:
character_splitter = RecursiveCharacterTextSplitter(
    separators=['\n  \n', '\n\n', '\n', '. '],
    chunk_size=1000,
    chunk_overlap=0
)
text_splitted = character_splitter.split_text(text_raw)

In [7]:
max_token_length(text_splitted)

'Max Token Length: 205 tokens'

In [8]:
print(f"Total number of splitted chunks: {len(text_splitted)}")

Total number of splitted chunks: 103


In [9]:
text_tokens = []
for text in text_splitted:
    text_tokens.extend(token_splitter.split_text(text))
print(f"Total number of tokens: {len(text_tokens)}")

Total number of tokens: 103


In [10]:
max_token_length(text_tokens)

'Max Token Length: 205 tokens'

In [11]:
embedding_fn = SentenceTransformerEmbeddingFunction(
    model_name="all-MiniLM-L6-v2",
    device="cpu"
)

In [12]:
len(embedding_fn(text_tokens[0])[0])

384

In [13]:
chroma_db = chromadb.Client()
chroma_collection = chroma_db.get_or_create_collection("paul_graham", embedding_function=embedding_fn)

In [14]:
ids = [str(uuid4()) for _ in range(len(text_tokens))]
ids[:5]

['3a245d0a-96e8-41a3-bde0-d4c273d12161',
 '43651178-e234-4e89-93ce-7a5bc8e3d110',
 '76952aa7-8c74-48f2-82d4-fb6980969771',
 '9ca56e5c-3b2a-44d7-8451-36fbfea9c3a0',
 '3f95b4ef-a0bf-400d-9557-7a92fd88609a']

In [15]:
chroma_collection.add(documents=text_tokens, ids=ids)

In [16]:
res = chroma_collection.query(query_texts=["What did paul graham work on"], n_results = 10)
res['documents']

[['in september, robert rebelled. " we \' ve been working on this for a month, " he said, " and it \' s still not done. " this is funny in retrospect, because he would still be working on it almost 3 years later. but i decided it might be prudent to recruit more programmers, and i asked robert who else in grad school with him was really good. he recommended trevor blackwell, which surprised me at first, because at that point i knew trevor mainly for his plan to reduce everything in his life to a stack of notecards, which he carried around with him. but rtm was right, as usual. trevor turned out to be a frighteningly effective hacker. it was a lot of fun working with robert and trevor. they \' re the two most independent - minded people i know, and in completely different ways. if you could see inside rtm \' s brain it would look like a colonial new england church, and if you could see inside trevor \' s it would look like the worst excesses of austrian rococo.',
  "as jessica and i wer

In [17]:
chroma_collection = chroma_db.get_or_create_collection("paul_graham", embedding_function=embedding_fn)

In [18]:
def get_query_results(query_text: str, n_results: int = 5) -> str:
    """Retrieve relevant information from ChromaDB."""
    res = chroma_collection.query(query_texts=[query_text], n_results=n_results)
    docs = res["documents"][0]

    # Check if 'metadatas' exists, is a list, and contains valid metadata dictionaries
    if res.get('metadatas') and isinstance(res['metadatas'], list) and len(res['metadatas']) > 0:
        keywords = [item.get('keyword', '') if isinstance(item, dict) else '' for item in res['metadatas'][0]]
    else:
        keywords = [''] * len(docs)  # Assign empty keywords if metadata is missing

    return '; '.join([f'{keyword}: {information}' for keyword, information in zip(keywords, docs)])


In [19]:
def rag_tool(user_query: str) -> str:
    """Retrieves relevant data from database and generates an AI response using DeepSeek-7B."""
    retrieved_results = get_query_results(user_query)
    retrieved_results = retrieved_results[:1000]
    system_prompt = (
        "You are an AI assistant with RAG capabilities. You will be given a user query and relevant retrieved documents. "
        "Please generate a response based only on the provided information."
    )

    full_query = f"<query>{user_query}</query>\n<retrieved>{retrieved_results}</retrieved>\n<instruction>{system_prompt}</instruction>"

    # Load DeepSeek-7B model and tokenizer
    model_name = "deepseek-ai/deepseek-llm-7b-chat"
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        config=config,
        torch_dtype=torch.float16,  # Use fp16 for efficiency
        device_map="auto",  # Automatically assign device
        offload_folder="./offload",  # Specify folder for offloading
        trust_remote_code=True
    )
    inputs = tokenizer(full_query, return_tensors="pt").to(model.device)

    outputs = model.generate(**inputs, max_new_tokens=200)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return response

In [20]:
async def main():
    query = "who is paul graham"
    response = rag_tool(query)  # Directly call the RAG tool
    print("Response from RAG:")
    print("----------------------------")
    print(response)

if __name__ == "__main__":
    # Remove asyncio.run() and directly await the main function
    await main()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


Response from RAG:
----------------------------
<query>who is paul graham</query>
<retrieved>: in september, robert rebelled. " we ' ve been working on this for a month, " he said, " and it ' s still not done. " this is funny in retrospect, because he would still be working on it almost 3 years later. but i decided it might be prudent to recruit more programmers, and i asked robert who else in grad school with him was really good. he recommended trevor blackwell, which surprised me at first, because at that point i knew trevor mainly for his plan to reduce everything in his life to a stack of notecards, which he carried around with him. but rtm was right, as usual. trevor turned out to be a frighteningly effective hacker. it was a lot of fun working with robert and trevor. they ' re the two most independent - minded people i know, and in completely different ways. if you could see inside rtm ' s brain it would look like a colonial new england church, and if you could see inside trevor 