In [1]:
chroma_db_path = "hf_codellama_7b_exp/chroma_dbs/nRF52840_db"
pickle_path = "hf_codellama_7b_exp/pickle_files/nRF52840_summarized.pkl"

In [2]:
import pickle
import time
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_chroma import Chroma
import base64
import os
from PIL import Image
import pytesseract
from io import BytesIO
from transformers import BlipProcessor, BlipForConditionalGeneration
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain.vectorstores import Chroma
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import torch
from dotenv import load_dotenv
import sys
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.schema.document import Document
from langchain.storage import InMemoryStore
import uuid
from langchain_core.runnables import RunnableLambda


2025-05-08 13:27:03.236908: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746710823.254702   93659 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746710823.260132   93659 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746710823.273607   93659 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746710823.273619   93659 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746710823.273621   93659 computation_placer.cc:177] computation placer alr

In [3]:
load_dotenv()
base_model_id = "codellama/CodeLlama-7b-Instruct-hf"
# embedd_model = "sentence-transformers/all-MiniLM-L6-v2" #will have to experiment with embeddig models
# embedd_model = "intfloat/e5-large-v2"
embedd_model = "BAAI/bge-large-en-v1.5"

In [4]:
embedding = HuggingFaceEmbeddings(model_name=embedd_model)

In [5]:
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_fast=True)

print("Loading base model...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    device_map="auto",
    quantization_config=bnb_config,
    torch_dtype=torch.float16
)

# Create Hugging Face pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    pad_token_id=model.config.eos_token_id, #avoiding warning Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
    max_new_tokens=768,
    # temperature=0.0, # no need as do_sample=False
    do_sample=False
)

# Wrap it in LangChain-compatible interface
model_pipe = HuggingFacePipeline(pipeline=pipe)

Loading tokenizer...
Loading base model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


In [13]:
def load_chroma_db(local_directory=chroma_db_path):
    return Chroma(persist_directory=local_directory, embedding_function=embedding)

In [14]:
def create_multi_vector_retriever(vectorstore, text_summaries, texts, table_summaries, tables, image_summaries, images):
    """
    Create retriever that indexes summaries, but returns raw images or texts
    """
    # Initialize the storage layer
    store = InMemoryStore()
    id_key = "doc_id"

    # Create the multi-vector retriever
    retriever = MultiVectorRetriever(
        vectorstore=vectorstore,
        docstore=store,
        id_key=id_key,
    )
    
    # Helper function to add documents to the vectorstore and docstore
    def add_documents(retriever, doc_summaries, doc_contents):
        doc_ids = [str(uuid.uuid4()) for _ in doc_contents]
        summary_docs = [
            Document(page_content=s, metadata={id_key: doc_ids[i]})
            for i, s in enumerate(doc_summaries)
        ]
        retriever.vectorstore.add_documents(summary_docs)
        retriever.docstore.mset(list(zip(doc_ids, doc_contents)))

    # Add texts, tables, and images
    # Check that text_summaries is not empty before adding
    if text_summaries:
        add_documents(retriever, text_summaries, texts)
    # Check that table_summaries is not empty before adding
    if table_summaries:
        add_documents(retriever, table_summaries, tables)
    # Check that image_summaries is not empty before adding
    if image_summaries:
        add_documents(retriever, image_summaries, images) #changing this as codellama doesn't deal directly with images
        add_documents(retriever, image_summaries, image_summaries)

    return retriever

In [15]:
import io
import re

from IPython.display import HTML, display
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
from PIL import Image


def plt_img_base64(img_base64):
    """Disply base64 encoded string as image"""
    # Create an HTML img tag with the base64 string as the source
    image_html = f'<img src="data:image/jpeg;base64,{img_base64}" />'
    # Display the image by rendering the HTML
    display(HTML(image_html))

def looks_like_base64(sb):
    """Check if the string looks like base64"""
    return re.match("^[A-Za-z0-9+/]+[=]{0,2}$", sb) is not None


def is_image_data(b64data):
    """
    Check if the base64 data is an image by looking at the start of the data
    """
    image_signatures = {
        b"\xFF\xD8\xFF": "jpg",
        b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A": "png",
        b"\x47\x49\x46\x38": "gif",
        b"\x52\x49\x46\x46": "webp",
    }
    try:
        header = base64.b64decode(b64data)[:8]  # Decode and get the first 8 bytes
        for sig, format in image_signatures.items():
            if header.startswith(sig):
                return True
        return False
    except Exception:
        return False

def resize_base64_image(base64_string, size=(128, 128)):
    """
    Resize an image encoded as a Base64 string
    """
    # Decode the Base64 string
    img_data = base64.b64decode(base64_string)
    img = Image.open(io.BytesIO(img_data))

    # Resize the image
    resized_img = img.resize(size, Image.LANCZOS)

    # Save the resized image to a bytes buffer
    buffered = io.BytesIO()
    resized_img.save(buffered, format=img.format)

    # Encode the resized image to Base64
    return base64.b64encode(buffered.getvalue()).decode("utf-8")

# def split_image_text_types(docs):
#     """
#     Split base64-encoded images and texts
#     """
#     b64_images = []
#     texts = []
#     for doc in docs:
#         # Check if the document is of type Document and extract page_content if so
#         if isinstance(doc, Document):
#             doc = doc.page_content
#         if looks_like_base64(doc) and is_image_data(doc):
#             doc = resize_base64_image(doc, size=(1300, 600))
#             b64_images.append(doc)
#         else:
#             texts.append(doc)
#     if len(b64_images) > 0:
#         return {"images": b64_images[:1], "texts": []}
#     return {"images": b64_images, "texts": texts}

def split_image_text_types(docs):
    b64_images = []
    texts = []
    for doc in docs:
        if isinstance(doc, Document):
            content = doc.page_content
        else:
            content = doc

        # If this document is a base64 image (raw), skip it
        if looks_like_base64(content) and is_image_data(content): #we don'rt need it in this case as we r not dealing with images during inferences
            print("\n**********found base64")
            continue  # raw image, not usable here
        else:
            texts.append(content)

    return {"images": [], "texts": texts}



In [66]:
# def img_prompt_func(data_dict):
#     """
#     Join the context into a single string
#     """
#     messages = []

#     # Adding the text for analysis
#     text_message = {
#         "type": "text",
#         "text": (
#             "You are an AI scientist tasking with providing factual answers from a datasheet of a System-on-Chip (SoC) \n"
#             "Use this information to provide answers related to the user question. \n"
#             f"User-provided question: {data_dict['question']}\n\n"
#         ),
#     }
#     messages.append(text_message)
#     # Adding image(s) to the messages if present
#     if data_dict["context"]["images"]:
#         for image in data_dict["context"]["images"]:
#             image_message = {
#                 "type": "image_url",
#                 "image_url": {"url": f"data:image/jpeg;base64,{image}"},
#             }
#             messages.append(image_message)
#     return [HumanMessage(content=messages)]

# def text_only_prompt_func(data_dict):
#     context = "\n".join(data_dict["context"]["texts"])

#     return f"""
# You are an AI assistant helping analyze System-on-Chip (SoC) datasheets.
# Answer the following question using the provided datasheet context only.

# Question:
# {data_dict["question"]}

# Context:
# {context}
# """

# def text_only_prompt_func(data_dict):
#     """
#     Formats the retrieved context into a plain prompt string for HuggingFacePipeline
#     """
#     # prompt = (
#     #     "You are an AI assistant helping interpret information from a datasheet of a System-on-Chip (SoC).\n"
#     #     "Answer the user's question using the relevant text summaries provided below.\n\n"
#     #     f"User Question:\n{data_dict['question']}\n\n"
#     # )
#     prompt = (
#         "You are an expert assistant for Microcontroller datasheets.\n"
#         "Answer the user question concisely based  on the context provided.\n"
#         "Do not repeat the context. Do not explain your reasoning. Just give the final answer.\n\n"
#         f"User question: {data_dict['question']}\n\n"
#     )

#     if data_dict["context"]["texts"]:
#         prompt += "Relevant Context:\n" + "\n\n".join(data_dict["context"]["texts"])

#     return prompt

def text_only_prompt_func(data_dict):
    """
    Formats a CodeLLaMA-compatible prompt with summaries and question.
    """
    prompt = (
        "You are an expert on microcontrollers and can provide detailed information about their peripherals, registers, and fields.\n"
        "Answer the user question concisely based  on the context provided.\n"
        "ONLY output the direct answer as a word, number, address, or short phrase.\n"
        "Do NOT repeat the question or context. Do NOT give explanations or full sentences.\n\n"
        # f"User question: {data_dict['question']}\n\n"
    )
    if data_dict["context"]["texts"]:
        prompt += "Relevant Context:\n" + "\n\n".join(data_dict["context"]["texts"])

    instruction_block = (
        f"Context:\n{prompt}\n\n"
        f"Question: {data_dict['question']}"
    )
    # context=""
    # if data_dict["context"]["texts"]:
    #     context = "\n".join(data_dict["context"]["texts"])
    # formatted_prompt = (
    #     f"### Instruction:\n{prompt}\n"
    #     f"Context:\n{context}\n\n"
    #     f"Question: {data_dict['question']}\n\n"
    #     f"### Response:\n"
    # )

    # Final CodeLLaMA prompt format
    return f"### Instruction:\n{instruction_block}\n\n### Response:\n"
    # return formatted_prompt




def multi_modal_rag_chain(retriever):
    """
    Multi-modal RAG chain
    """

    # RAG pipeline
    chain = (
        {
            "context": retriever | RunnableLambda(split_image_text_types),
            "question": RunnablePassthrough(),
        }
        | RunnableLambda(text_only_prompt_func)
        | model_pipe  # MM_LLM
        | StrOutputParser()
    )

    return chain

In [67]:
def init_rag(chroma_path, pickle_path):
    # if os.path.exists(db_path) and os.path.exists(pickle_path):
    print("Loading existing Chroma database...")
    vectorstore = load_chroma_db(chroma_path)
    
    with open(pickle_path, 'rb') as f:
        loaded_data = pickle.load(f)

    # Access the variables
    texts = loaded_data['texts']
    tables = loaded_data['tables']
    text_summaries = loaded_data['text_summaries']
    table_summaries = loaded_data['table_summaries']
    img_base64_list = loaded_data['img_base64_list']
    image_summaries = loaded_data['image_summaries']

    retriever_multi_vector_img = create_multi_vector_retriever(
        vectorstore,
        text_summaries,
        texts,
        table_summaries,
        tables,
        image_summaries,
        img_base64_list,
    )
    chain_multimodal_rag = multi_modal_rag_chain(retriever_multi_vector_img)
    return chain_multimodal_rag,retriever_multi_vector_img

In [61]:
# def ask_bot(chain_multimodal_rag, query):
#     # docs = retriever_multi_vector_img.get_relevant_documents(query, limit=10)
#     # print(split_image_text_types(docs))
#     return chain_multimodal_rag.invoke(query)

In [68]:
rag_pipeline, retriever_multi_vector_img = init_rag(chroma_db_path, pickle_path)

Loading existing Chroma database...


In [75]:
print(retriever_multi_vector_img.search_kwargs)

{}


In [69]:
import re

def ask_bot(chain_multimodal_rag, query):
    raw_output = chain_multimodal_rag.invoke(query)
    # print(f"Raw output : {raw_output}\n")
    # print("*********************")

    # Extract after prompt marker
    if "### Response:" in raw_output:
        answer = raw_output.split("### Response:")[-1].strip()
    else:
        answer = raw_output.strip()

    # Normalize: clean each line
    lines = [
        re.sub(r"[.,:;!?]+$", "", line.strip())  # Remove trailing punctuation
        for line in answer.splitlines()
        if line.strip()
    ]

    return ", ".join(lines)


In [None]:
def ask_bot_manual(retriever_multi_vector_img, question: str) -> str:
    # Step 1: Retrieve relevant summaries (text + table + image) via RAG
    docs = retriever_multi_vector_img.get_relevant_documents(question, k=8)
    summaries = [
    doc if isinstance(doc, str) else doc.page_content
    for doc in retriever_multi_vector_img.get_relevant_documents(question, k=8)
   ]

    prompt = (
        "You are an expert on microcontrollers and can provide detailed information about their peripherals, registers, and fields.\n"
        "Answer the user question concisely based  on the context provided.\n"
        "ONLY output the direct answer as a word, number, address, or short phrase.\n"
        "Do NOT repeat the question or context. Do NOT give explanations or full sentences.\n\n"
        # f"User question: {data_dict['question']}\n\n"
    )

    # Step 2: Build context prompt
    context = "\n".join(summaries)
    formatted_prompt = (
        f"### Instruction:\n{prompt}\n"
        f"Context:\n{context}\n\n"
        f"Question: {question}\n\n"
        f"### Response:\n"
    )

    # Step 3: Tokenize and generate
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=768,
            #temperature=0.0,  # deterministic
            top_p=0.95,
            do_sample=False,
            pad_token_id=model.config.eos_token_id
        )
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Step 4: Extract answer cleanly
    answer = generated_text.replace(formatted_prompt, "").strip()
    return answer.split("\n")[0].strip()


In [72]:
def ask_bot_new1(prompt_chain, question: str) -> str:
    # Step 1: Generate the full CodeLLaMA-style prompt using the chain
    formatted_prompt = prompt_chain.invoke(question)

    # Step 2: Run your manual inference
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=768,
            # temperature=0.7,
            top_p=0.95,
            do_sample=False,
            pad_token_id=model.config.eos_token_id #avoiding warning Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.

        )
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Step 3: Extract only the answer
    return generated_text.replace(formatted_prompt, "").strip()


In [70]:
ask_bot(rag_pipeline, "List UART regsiters for nrf52840 Microcontroller")

Raw output : ### Instruction:
Context:
You are an expert on microcontrollers and can provide detailed information about their peripherals, registers, and fields.
Answer the user question concisely based  on the context provided.
ONLY output the direct answer as a word, number, address, or short phrase.
Do NOT repeat the question or context. Do NOT give explanations or full sentences.



Question: List UART regsiters for nrf52840 Microcontroller

### Response:

UART0_CONFIG
UART0_BAUD
UART0_RXD
UART0_TXD
UART0_CTS
UART0_RTS
UART0_STATUS
UART0_ERRORSRC
UART0_ENABLE
UART0_MATCH
UART0_MASK
UART0_PSELRXD
UART0_PSELTXD
UART0_PSELRTS
UART0_PSELCTS
UART0_BAUDRATE
UART0_RXD_PTR
UART0_RXD_MAXCNT
UART0_RXD_AMOUNT
UART0_RXD_LIST
UART0_RXD_LIST_MAXCNT
UART0_RXD_LIST_AMOUNT
UART0_TXD_PTR
UART0_TXD_MAXCNT
UART0_TXD_AMOUNT
UART0_TXD_LIST
UART0_TXD_LIST_MAXCNT
UART0_TXD_LIST_AMOUNT
UART0_ERRORSRC_REG
UART0_ENABLE_REG
UART0_MATCH_REG
UART0_MASK_REG
UART0_PSELRXD_REG
UART0_PSELTXD_REG
UART0_PSELRTS_REG
U

'UART0_CONFIG, UART0_BAUD, UART0_RXD, UART0_TXD, UART0_CTS, UART0_RTS, UART0_STATUS, UART0_ERRORSRC, UART0_ENABLE, UART0_MATCH, UART0_MASK, UART0_PSELRXD, UART0_PSELTXD, UART0_PSELRTS, UART0_PSELCTS, UART0_BAUDRATE, UART0_RXD_PTR, UART0_RXD_MAXCNT, UART0_RXD_AMOUNT, UART0_RXD_LIST, UART0_RXD_LIST_MAXCNT, UART0_RXD_LIST_AMOUNT, UART0_TXD_PTR, UART0_TXD_MAXCNT, UART0_TXD_AMOUNT, UART0_TXD_LIST, UART0_TXD_LIST_MAXCNT, UART0_TXD_LIST_AMOUNT, UART0_ERRORSRC_REG, UART0_ENABLE_REG, UART0_MATCH_REG, UART0_MASK_REG, UART0_PSELRXD_REG, UART0_PSELTXD_REG, UART0_PSELRTS_REG, UART0_PSELCTS_REG, UART0_BAUDRATE_REG, UART0_RXD_PTR_REG, UART0_RXD_MAXCNT_REG, UART0_RXD_AMOUNT_REG, UART0_RXD_LIST_REG, UART0_RXD_LIST_MAXCNT_REG, UART0_RXD_LIST_AMOUNT_REG, UART0_TXD_PTR_REG, UART0_TXD_MAXCNT_REG, UART0_TXD_AMOUNT_REG, UART0_TXD_LIST_REG, UART0_TXD_LIST_MAXCNT_REG, UART0_TXD_LIST_AMOUNT_REG, UART0_STATUS_REG, UART0_ERRORSRC_MASK, UART0_ENABLE_MASK, UART0_MATCH_MASK, UART0_MASK_MASK, UART0_PSELRXD_MASK, UART

In [73]:
ask_bot_new1(rag_pipeline, "List UART regsiters for nrf52840 Microcontroller")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'UART0_TXD_LIST_MASK\nUART0_TXD_LIST_MAXCNT_MASK\nUART0_TXD_LIST_AMOUNT_MASK\nUART0_STATUS_MASK\nUART0_STATUS_MAXCNT\nUART0_STATUS_AMOUNT\nUART0_STATUS_LIST\nUART0_STATUS_LIST_MAXCNT\nUART0_STATUS_LIST_AMOUNT\nUART0_ERRORSRC_MAXCNT\nUART0_ERRORSRC_AMOUNT\nUART0_ERRORSRC_LIST\nUART0_ERRORSRC_LIST_MAXCNT\nUART0_ERRORSRC_LIST_AMOUNT\nUART0_ENABLE_MAXCNT\nUART0_ENABLE_AMOUNT\nUART0_ENABLE_LIST\nUART0_ENABLE_LIST_MAXCNT\nUART0_ENABLE_LIST_AMOUNT\nUART0_MATCH_MAXCNT\nUART0_MATCH_AMOUNT\nUART0_MATCH_LIST\nUART0_MATCH_LIST_MAXCNT\nUART0_MATCH_LIST_AMOUNT\nUART0_MASK_MAXCNT\nUART0_MASK_AMOUNT\nUART0_MASK_LIST\nUART0_MASK_LIST_MAXCNT\nUART0_MASK_LIST_AMOUNT\nUART0_PSELRXD_MAXCNT\nUART0_PSELRXD_AMOUNT\nUART0_PSELRXD_LIST\nUART0_PSELRXD_LIST_MAXCNT\nUART0_PSELRXD_LIST_AMOUNT\nUART0_PSELTXD_MAXCNT\nUART0_PSELTXD_AMOUNT\nUART0_PSELTXD_LIST\nUART0_PSELTXD_LIST_MAXCNT\nUART0_PSELTXD_LIST_AMOUNT\nUART0_PSELRTS_MAXCNT\nUART0_PSELRTS_AMOUNT\nUART0'

In [10]:
ask_bot_manual(retriever_multi_vector_img, "List UART regsiters for nrf52840 Microcontroller")

NameError: name 'ask_bot_manual' is not defined

In [74]:
ask_bot_new1(rag_pipeline, "what is base address of SPI peripheral  for nrf52840 Microcontroller")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


''

In [12]:
ask_bot_manual(retriever_multi_vector_img, "Waht is the base address of SPI peripheral  for nrf52840 Microcontroller")

NameError: name 'ask_bot_manual' is not defined

In [71]:
ask_bot(rag_pipeline, "what is base address of SPI peripheral  for nrf52840 Microcontroller")

Raw output : ### Instruction:
Context:
You are an expert on microcontrollers and can provide detailed information about their peripherals, registers, and fields.
Answer the user question concisely based  on the context provided.
ONLY output the direct answer as a word, number, address, or short phrase.
Do NOT repeat the question or context. Do NOT give explanations or full sentences.



Question: what is base address of SPI peripheral  for nrf52840 Microcontroller

### Response:
0x40003000



























































































































































































































































































































































































































































































































'0x40003000'