In [14]:
# MTEB: Massive text embedding model 
# https://huggingface.co/spaces/mteb/leaderboard

# open source llm https://huggingface.co/google/gemma-7b-it
#create write token from huggingface
# gemma-7b-it, gemma-2b-it
import os
import time
from datetime import datetime
# Flag to print debug messages/write debug files
local_debug = True
# Remove huggingface token after execution for security purpose and comment below line
#os.environ['HF_TOKEN'] = ''
if local_debug :
    print(f"hf_token_val len", len(os.getenv('HF_TOKEN')))

hf_token_val len 37


In [4]:
from sentence_transformers import SentenceTransformer

def get_query_embedding(input_query: str) -> list:
    embedding_model = SentenceTransformer(model_name_or_path='all-mpnet-base-v2')    
    query_embedding = embedding_model.encode(input_query)
    query_embedding_list = query_embedding.tolist()        
    return query_embedding_list

  from .autonotebook import tqdm as notebook_tqdm





In [5]:

# CREATE OR REPLACE FUNCTION get_embedding(	
# 	rows_limit int,
#     embedding_input vector(768)
# )
# RETURNS TABLE (id bigint, chunk varchar(2500), embedding vector(768), cosine_similarity DOUBLE PRECISION)
# LANGUAGE plpgsql
# AS $$
# BEGIN    
# 	RETURN QUERY SELECT ni.id,ni.chunk, ni.embedding, 1 - (ni.embedding <=> embedding_input) AS cosine_similarity FROM nutritionitems ni
# 	ORDER BY cosine_similarity DESC LIMIT rows_limit;
# END;
# $$;

# select get_embedding(2::int,'[5.99734336e-02,-1.30569497e-02]'::vector);


In [6]:
import psycopg2
import numpy as np
import os
from dotenv import load_dotenv
from psycopg2 import Error

class PostgreSQLManager:
    def __init__(self, db_params):
        """
        Initializes the database connection.
        db_params should be a dictionary with keys like 'host', 'database', 'user', 'password', 'port'.
        """
        self.db_params = db_params
        self.connection = None

    def connect(self):
        """Establishes a connection to the PostgreSQL database."""
        try:
            self.connection = psycopg2.connect(**self.db_params)
            self.connection.autocommit = False  # Disable autocommit for explicit transactions
            print("Database connection established successfully.")
        except Error as e:
            print(f"Error connecting to database: {e}")
            self.connection = None

    def disconnect(self):
        """Closes the database connection."""
        if self.connection:
            self.connection.close()
            print("Database connection closed.")

    def execute_query(self, query, params=None, fetch_result=False):
        """Helper method to execute a query and handle transactions."""
        if not self.connection:
            print("No database connection. Please connect first.")
            return None

        try:
            with self.connection.cursor() as cursor:
                if params:
                    cursor.execute(query, params)
                else:
                    cursor.execute(query)

                if fetch_result:
                    return cursor.fetchall()
                else:
                    self.connection.commit()  # Commit changes for CUD operations
                    return True
        except Error as e:
            self.connection.rollback()  # Rollback on error
            print(f"Database operation failed: {e}")
            return None

In [7]:
import psycopg2
import numpy as np
import os
from dotenv import load_dotenv

def get_relevant_chunks(limit:int, query_embedding_list:str) -> list:
    load_dotenv()
    DB_NAME = os.getenv("DB_NAME")
    DB_USER = os.getenv("DB_USER")
    DB_PASS = os.getenv("DB_PASS")
    DB_HOST = os.getenv("DB_HOST")
    DB_PORT = os.getenv("DB_PORT")
    db_params = {
            "host": DB_HOST,
            "database": DB_NAME,
            "user": DB_USER,
            "password": DB_PASS,
            "port": DB_PORT
        }

    crud_manager = PostgreSQLManager(db_params)
    crud_manager.connect()

    if crud_manager.connection:
        select_sql = """SELECT * from get_embedding(%s,%s::vector);"""
        nutritionitems = crud_manager.execute_query(select_sql, (limit,query_embedding_list), True)
        print("nutritionitems len", len(nutritionitems))
        retrieved_chunks_dict = []
        retrieved_chunks = []
        for item in nutritionitems:
            retrieved_chunks.append(item[1])
            retrieved_chunks_dict.append({"id":item[0],
                                     "chunk": item[1],
                                     "cosine_similarity": item[3]
                                    })
        crud_manager.disconnect()
        if local_debug:
            retrieved_chunks_file = "Retrieved_chunks_test.txt"        
            if os.path.exists(retrieved_chunks_file):
                os.remove(retrieved_chunks_file)        
            
            with open(retrieved_chunks_file, 'w', encoding='utf-8') as file:
                for item in retrieved_chunks_dict :  
                    file.write(f"{item["id"]} | {item["cosine_similarity"]} | {item["chunk"]} \n\n")
        return retrieved_chunks
        
    

In [8]:
def prompt_formatter(query:str,
                    context_items: list[dict])-> str:
    context = "- "+"\n- ".join([item for item in context_items])            
    
    base_prompt = """Based on the following context items, please answer the query.
Give yourself room to think by extracting relevant passages from the context before answering the query
Don't return the thinking, only return answer.
Make sure your answers are as explanatory as possible.
Use the following examples as reference for the ideal answer style.
\nExample 1:
Query: What are fat-soluble vitamins?
Answer: The fat-soluble vitamins include Vitamin A, Vitamin D, Vitamin E, and vitamin K. These vitamins dissolve in fat and are absorbed and stored in the body's fatty tissues
\nExample 2:
Query: What are the causes of type 2 diabetes?
Answer: Insulin Resistance – Body’s cells don’t respond properly to insulin, leading to elevated blood glucose. Pancreatic Dysfunction – Over time, the pancreas can’t produce enough insulin to compensate for resistance.
\nNow use the following context items to answer user query:
{context}
\nRelevant passages: <extract relevant passages from the context here>
User query: {query}
Answer:"""
    base_prompt = base_prompt.format(context=context, query=query)
    return base_prompt

In [9]:
from huggingface_hub import login
import os
def connect_to_huggingface():
    hf_token_val = os.getenv('HF_TOKEN')
    login(token=hf_token_val)
    if local_debug :
        print(f"hf_token_val len", len(hf_token_val))


In [20]:
#from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

def llm_generate(llm_name:str, formatted_prompt: str) -> str:    
    
    tokenizer = AutoTokenizer.from_pretrained(llm_name)    
    messages = [ {"role": "user", "content": formatted_prompt}]    
    chat_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    prompt_file = "prompt_test.txt"    
    if os.path.exists(prompt_file):
        os.remove(prompt_file)    
    with open(prompt_file, 'w', encoding='utf-8') as file:
        file.write(chat_prompt)
    
    model = AutoModelForCausalLM.from_pretrained(
        llm_name,
        torch_dtype=torch.bfloat16
    )   
    # quantization_config = BitsAndBytesConfig(load_in_8bit=True)
    # model_8bit = AutoModelForCausalLM.from_pretrained(
    #     llm_name,         
    #     quantization_config=quantization_config,
    #     torch_dtype=torch.bfloat16
    # )
    
    inputs = tokenizer.encode(chat_prompt, add_special_tokens=False, return_tensors="pt")
    # outputs = model_8bit.generate(input_ids=inputs.to(model_8bit.device), max_new_tokens=250)
    outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=250)
    output = tokenizer.decode(outputs[0])
    return output    

In [16]:
# No Answer
#input_query = "Describe the process of digestion and absorption of nutrients in the human body."
#input_query = "What is Metabolism of Proteins?"
input_query = "Explain Digestion and Metabolism of Carbohydrates"
start_time = datetime.now()
if local_debug :
    print("start_time: ",start_time)
data_str = "query_embedding_list"
query_embedding_list = get_query_embedding(input_query)
end_time = datetime.now()
elapsed_time = end_time - start_time
if local_debug :
    print("end_time: ",end_time)
    print(f"{data_str} len", len(query_embedding_list))
    print(f"Elapsed time for {data_str}: {elapsed_time.total_seconds():.4f} seconds which is equal to  {elapsed_time.total_seconds()/60:.4f} minutes")

start_time:  2025-11-20 21:17:14.316521
end_time:  2025-11-20 21:17:16.683180
query_embedding_list len 768
Elapsed time for query_embedding_list: 2.3667 seconds which is equal to  0.0394 minutes


In [17]:
start_time = time.time()
data_str = "retrieved_chunks"
retrieved_chunks = get_relevant_chunks(5, query_embedding_list)
end_time = time.time()
elapsed_time = end_time - start_time
if local_debug :
    print(f"{data_str} len", len(retrieved_chunks))
    print(f"Elapsed time for {data_str}: {elapsed_time:.4f} seconds which is equal to  {elapsed_time/60:.4f} minutes")

Database connection established successfully.
nutritionitems len 5
Database connection closed.
retrieved_chunks len 5
Elapsed time for retrieved_chunks: 0.4592 seconds which is equal to  0.0077 minutes


In [21]:

start_time = datetime.now()
if local_debug :
    print("start_time: ",start_time)
data_str = "llm_output"
formatted_prompt = prompt_formatter(input_query,retrieved_chunks)
connect_to_huggingface()
llm_name = "google/gemma-2b-it"
#llm_name = "google/gemma-7b-it"
llm_output = llm_generate(llm_name, formatted_prompt)
print("llm_output: -----", llm_output) 
end_time = datetime.now()
elapsed_time = end_time - start_time
if local_debug :
    print("end_time: ",end_time)
    print(f"{data_str} len", len(llm_output))    
    print(f"Elapsed time for {data_str}: {elapsed_time.total_seconds():.4f} seconds which is equal to  {elapsed_time.total_seconds()/60:.4f} minutes")


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


start_time:  2025-11-20 21:22:50.312739
hf_token_val len 37


Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 16.74it/s]


llm_output: ----- <bos><start_of_turn>user
Based on the following context items, please answer the query.
Give yourself room to think by extracting relevant passages from the context before answering the query
Don't return the thinking, only return answer.
Make sure your answers are as explanatory as possible.
Use the following examples as reference for the ideal answer style.

Example 1:
Query: What are fat-soluble vitamins?
Answer: The fat-soluble vitamins include Vitamin A, Vitamin D, Vitamin E, and vitamin K. These vitamins dissolve in fat and are absorbed and stored in the body's fatty tissues

Example 2:
Query: What are the causes of type 2 diabetes?
Answer: Insulin Resistance – Body’s cells don’t respond properly to insulin, leading to elevated blood glucose. Pancreatic Dysfunction – Over time, the pancreas can’t produce enough insulin to compensate for resistance.

Now use the following context items to answer user query:
- 5 Digestion and Metabolism of Carbohydrates John Mathe

## LLM Metrics

- System: Configuration i10, RAM: 16GB, 64 bit windows 11, 1.80 GHz
- without optimization on quantization and running on multi core.
    - Elapsed time for google/gemma-2b-it: 10 to 15 minutes, 1