In [1]:
import lancedb
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import random
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

#### Offline Rag evaluation for semantic search 

* Connect to table (new_diet_table)

In [2]:
db = lancedb.connect('/home/bluemusk/diet-assistant/lancedb')

In [3]:
db.table_names()

['diet_table', 'new_diet_table']

In [4]:
new_table = db.open_table('new_diet_table')
new_table.to_pandas()

Unnamed: 0,chunk_id,text,embedding
0,0,INTRODUCTION TO \nNUTRITION SCIENCE,"[-0.050998192, -0.056592684, -0.05413804, 0.07..."
1,1,Introduction to Nutrition Science,"[-0.050998192, -0.056592684, -0.05413804, 0.07..."
2,2,This text is disseminated via the Open Educati...,"[-0.019119347, 0.10461532, 0.008642459, 0.0719..."
3,3,Instructors can adopt existing LibreTexts text...,"[-0.029113632, 0.010369417, -0.021756086, -0.0..."
4,4,"for the construction, customization, and disse...","[-0.017107606, 0.024136158, -0.00488623, -0.00..."
...,...,...,...
3590,3590,11.7: Food Processing - CC BY-NC-SA 4.0\n11.8:...,"[-0.024376936, -0.00542707, -0.024001742, 0.07..."
3591,3591,3 h t t p s : / / m e d . l i b r e t e x t s ...,"[-0.025427796, 0.02301017, -0.011724482, 0.101..."
3592,3592,SA 4.0\n13.4: Fuel Sources - CC BY-NC-SA 4.0\n...,"[-0.08159753, 0.005391544, -0.04637359, 0.0655..."
3593,3593,14.3: Infancy - CC BY-NC-SA 4.0\n14.4: Toddler...,"[-0.023193209, 0.054620773, -0.033654038, 0.07..."


#### Using Cosine Similarity for the query embeddings and the LLM generated response embeddings

* Step 1: Retrieve Documents and Generate LLM (google-flan-t5-large) Response

In [5]:
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")



In [6]:
def search_vector(query):
    embed_model = SentenceTransformer('all-MiniLM-L6-v2')
    query_embedding = embed_model.encode(query).tolist()
    semantic_search = new_table.search(query_embedding, query_type='vector', vector_column_name='embedding').limit(5).select(['text']).to_list()
    return semantic_search

In [7]:
query = 'Define the digestive system'
semantic_search = search_vector(query)
semantic_search

[{'text': 'and processed by cells throughout the body for energy or used as building blocks for new cells. The digestive system is one of the\neleven organ systems of the human body, and it is composed of several hollow tube-shaped organs including the mouth, pharynx,\nesophagus, stomach, small intestine, large intestine (colon), rectum, and anus. It is lined with mucosal tissue that secretes digestive',
  '_distance': 0.4034729599952698},
 {'text': 'system is one of the eleven organ systems of the human body and it is composed of several hollow tube-shaped organs including\nthe mouth, pharynx, esophagus, stomach, small intestine, large intestine (or colon), rectum, and anus. It is lined with mucosal tissue\nthat secretes digestive juices (which aid in the breakdown of food) and mucus (which facilitates the propulsion of food through the',
  '_distance': 0.5606390237808228},
 {'text': 'The digestive system is composed of the mouth, pharynx, esophagus, stomach, small intestine, large in

In [8]:
def build_prompt(query, semantic_search, tokenizer, max_length=512):
    prompt_template = """
    You are a diet assistant. You are performing a semantic search, so use the embedding column for your answers.
    Based on the provided context, answer the following question completely and coherently. 
    Use the information from the CONTEXT to provide a detailed and full response to the QUESTION.
    Ensure your response is comprehensive and complete, avoiding any abrupt or partial endings.

    QUESTION: {question}
    CONTEXT: {context}
    """.strip()

    context = ""
    for item in semantic_search:
        context += f'{item.get("embedding", "")}\n\n'

    prompt = prompt_template.format(question=query, context=context).strip()

    # Tokenize and truncate the prompt if it exceeds the max length
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_length)
    truncated_prompt = tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)
    
    return truncated_prompt

In [9]:
def llm(prompt, model, tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids
    outputs = model.generate(inputs, max_length=512, num_beams=2, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [10]:
def rag_pipeline(query, table, model, tokenizer):
    semantic_search = search_vector(query)
    prompt = build_prompt(query, semantic_search, tokenizer)
    answer = llm(prompt, model, tokenizer)
    return answer

In [11]:
# Applying the RAG pipeline to a query
query = 'Define the digestive system'
response = rag_pipeline(query, new_table, model, tokenizer)
print(response)

The digestive system is a system of organs that break down food and waste.


* Step 2: Get the Cosine Similarity between query embedding and response embedding

In [12]:
query

'Define the digestive system'

In [13]:
embed_model = SentenceTransformer('all-MiniLM-L6-v2')

In [14]:
query_embedding = embed_model.encode(query).tolist()

In [15]:
response

'The digestive system is a system of organs that break down food and waste.'

In [16]:
response_embedding = embed_model.encode(response).tolist()

In [17]:
cos_sim = cosine_similarity([query_embedding], [response_embedding])[0][0]
cos_sim_rounded = round(cos_sim * 100, 2)
print(f'Cosine Similarity betwenen this Query and RAG Response is {cos_sim_rounded}%')

Cosine Similarity betwenen this Query and RAG Response is 87.96%


In [18]:
def compute_cosine_similarity(query, response):
    embed_model = SentenceTransformer('all-MiniLM-L6-v2')
    
    query_embedding = embed_model.encode(query).tolist()
    response_embedding = embed_model.encode(response).tolist()
    cos_sim = cosine_similarity([query_embedding], [response_embedding])[0][0]
    cos_sim_rounded = round(cos_sim * 100, 2)
    print(f'Cosine Similarity between this Query and RAG Response is {cos_sim_rounded}%')

In [19]:
compute_cosine_similarity(query, response)

Cosine Similarity between this Query and RAG Response is 87.96%




* Step 3: Applying the RAG and Cosine Similarity to another query

In [20]:
query = 'Describe the Central Nervous System'
response = rag_pipeline(query, new_table, model, tokenizer)

compute_cosine_similarity(query=query, response=response)



Cosine Similarity between this Query and RAG Response is 81.96%


In [21]:
query = 'What are Nutrients'
response = rag_pipeline(query, new_table, model, tokenizer)

compute_cosine_similarity(query=query, response=response)



Cosine Similarity between this Query and RAG Response is 68.85%


#### LLM (google-flan-t5-base) -as-a-judge offline rag evaluation for vector search

* Note: codes below got from RAG_eval_textsearch

In [22]:
judge_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
judge_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")



In [23]:
prompt_template = """
You are the best judge in evaluating a Retrieval-Augmented Generation (RAG) system.
Given the following below, your task is to rate the relevance of the generated response to the given question in percentage,
with 100 as the best score.

Query: {query}
Response: {response}
""".strip()

In [24]:
query = 'Define the digestive system'
response = 'The digestive system is a system of organs that break down food and waste.'
prompt = prompt_template.format(query=query, response=response)
print(prompt)

You are the best judge in evaluating a Retrieval-Augmented Generation (RAG) system.
Given the following below, your task is to rate the relevance of the generated response to the given question in percentage,
with 100 as the best score.

Query: Define the digestive system
Response: The digestive system is a system of organs that break down food and waste.


In [25]:
def llm_as_a_judge(prompt, judge_model, judge_tokenizer):
    prompt = prompt_template.format(query=query, response=response)
    inputs = judge_tokenizer(prompt, return_tensors="pt")
    outputs = judge_model.generate(inputs.input_ids)
    
    rating = judge_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return rating

In [26]:
rating = llm_as_a_judge(prompt, judge_model, judge_tokenizer)
print(f"LLM Judge Rating: {rating}%")



LLM Judge Rating: 100%


* Applying RAG pipeline with LLM as a Judge for offline rag evaluation and getting cosine similarity

In [27]:
query = 'Describe the respiratory system for me'
response = rag_pipeline(query, new_table, model, tokenizer)
compute_cosine_similarity(query=query, response=response)
print('\n')
rating = llm_as_a_judge(prompt, judge_model, judge_tokenizer)
print(f"LLM Judge Rating: {rating}%")

Cosine Similarity between this Query and RAG Response is 83.28%






LLM Judge Rating: 100%
