In [1]:
import lancedb
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import random
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

#### Offline Rag evaluation for text_search 

* Step 1: connect lancedb and open table (diet_table)

In [8]:
db = lancedb.connect('/home/bluemusk/diet-assistant/lancedb')

db.table_names()

['diet_table', 'new_diet_table']

In [9]:
table = db.open_table('diet_table')
table.to_pandas()

Unnamed: 0,chunk_id,text,embedding
0,0,INTRODUCTION TO \nNUTRITION SCIENCE,"[-0.05099819228053093, -0.056592684239149094, ..."
1,1,Introduction to Nutrition Science,"[-0.05099819228053093, -0.056592684239149094, ..."
2,2,This text is disseminated via the Open Educati...,"[-0.019119346514344215, 0.10461532324552536, 0..."
3,3,Instructors can adopt existing LibreTexts text...,"[-0.029113631695508957, 0.010369417257606983, ..."
4,4,"for the construction, customization, and disse...","[-0.017107605934143066, 0.02413615770637989, -..."
...,...,...,...
3590,3590,11.7: Food Processing - CC BY-NC-SA 4.0\n11.8:...,"[-0.02437693625688553, -0.005427069962024689, ..."
3591,3591,3 h t t p s : / / m e d . l i b r e t e x t s ...,"[-0.025427795946598053, 0.023010170087218285, ..."
3592,3592,SA 4.0\n13.4: Fuel Sources - CC BY-NC-SA 4.0\n...,"[-0.08159752935171127, 0.005391544196754694, -..."
3593,3593,14.3: Infancy - CC BY-NC-SA 4.0\n14.4: Toddler...,"[-0.023193208500742912, 0.05462077260017395, -..."


* Step 2: Retrieve Documents and Generate LLM (google-flan-t5-large) Response

In [16]:
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")



In [10]:
def search(query):
    text_search = table.search(query, query_type="fts").limit(5).select(["text"]).to_list()
    return text_search

In [19]:
query = 'What is food processing?'
text_search = search(query)
text_search

[{'text': 'Expand Y our Knowledge  \n1. Write a short script for a public service announcement that explains the benefits and risks of food additives. What do you\nbelieve the public should know about the natural and synthetic substances that are introduced to foods during the processing\nstage?\n2. Summarize in a written discussion why economic experts believe the era of cheap food is over. What factors have contributed to\nrising food prices around the globe?',
  '_score': 10.872123718261719},
 {'text': 'Food Preservation and Processing  \nTwo important aspects of a food system are preservation and processing. Each provides for or protects consumers in different ways.\nFood preservation includes the handling or treating of food to prevent or slow down spoilage. Food processing involves\ntransforming raw ingredients into packaged food, from fresh-baked goods to frozen dinners. Although there are numerous benefits',
  '_score': 9.493345260620117},
 {'text': '11.6: Food Preservation\n11

In [12]:
def build_prompt(query, text_search, tokenizer, max_length=512):
    prompt_template = """
    You are a diet assistant. You're performing a full text search, so use the text column only for answers.
    Based on the provided context, answer the following question completely and coherently. 
    Use the information from the CONTEXT to provide a detailed and full response to the QUESTION.
    Ensure your response is comprehensive and complete, avoiding any abrupt or partial endings.

    QUESTION: {question}
    CONTEXT: {context}
    """.strip()

    context = ""
    for item in text_search:
        context += f'{item.get("text", "")}\n\n'

    prompt = prompt_template.format(question=query, context=context).strip()

    # Truncate prompt if it exceeds the model's max length
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_length)
    truncated_prompt = tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)
    
    return truncated_prompt

In [13]:
def llm(prompt, model, tokenizer, max_tokens=72):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids
    outputs = model.generate(inputs, max_new_tokens=max_tokens)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [14]:
def rag_pipeline(query, model, tokenizer):
    text_search = search(query)
    prompt = build_prompt(query, text_search, tokenizer)
    answer = llm(prompt, model, tokenizer)
    return answer

In [18]:
# Applying the RAG pipeline to this query
query = 'What is food processing?'
outcome = rag_pipeline(query, model, tokenizer)
print(outcome)

Food processing involves transforming raw ingredients into packaged food, from fresh-baked goods to frozen dinners.


##### Exact match offline rag evaluation for text search using Precision and Recall 

In [86]:
def exact_match_evaluation(query, response):
    query_tokens = set(query.lower().split())  
    response_tokens = set(response.lower().split())  
    
    matches = query_tokens.intersection(response_tokens)
    
    precision = len(matches) / len(response_tokens) if response_tokens else 0
    recall = len(matches) / len(query_tokens) if query_tokens else 0

    print(f'The exact match of the query_tokens and response_tokens is {matches}')
    return precision, recall

In [88]:
query = 'What is food processing?'
response = 'Food processing involves transforming raw ingredients into packaged food, from fresh-baked goods to frozen dinners.'

precision, recall = exact_match_evaluation(query, response)
print(f"Precision: {precision:.2f}, Recall: {recall:.2f}")

The exact match of the query_tokens and response_tokens is {'food'}
Precision: 0.07, Recall: 0.25


* Applying RAG pipeline with exact match, precision and recall offline rag evaluation

In [90]:
query = 'What are nutrients'
response = rag_pipeline(query, model, tokenizer)

precision, recall = exact_match_evaluation(query, response)
print(f"Precision: {precision:.2f}, Recall: {recall:.2f}")

The exact match of the query_tokens and response_tokens is {'nutrients', 'are'}
Precision: 0.05, Recall: 0.67


##### LLM (google-flan-t5-base) -as-a-judge offline rag evaluation for text search

* Step 1: Download the Judge model

In [57]:
judge_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
judge_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

* Step 2: Get the prompt

In [72]:
prompt_template = """
You are the best judge in evaluating a Retrieval-Augmented Generation (RAG) system.
Given the following below, your task is to rate the relevance of the generated response to the given question in percentage,
with 100 as the best score.

Query: {query}
Response: {response}
""".strip()

In [74]:
query = 'What is food processing?'
response = 'Food processing involves transforming raw ingredients into packaged food, from fresh-baked goods to frozen dinners.'
prompt = prompt_template.format(query=query, response=response)
print(prompt)

You are the best judge in evaluating a Retrieval-Augmented Generation (RAG) system.
Given the following below, your task is to rate the relevance of the generated response to the given question in percentage,
with 100 as the best score.

Query: What is food processing?
Response: Food processing involves transforming raw ingredients into packaged food, from fresh-baked goods to frozen dinners.


* Step 3: Evaluate RAG with the LLM Judge

In [83]:
def llm_as_a_judge(prompt, judge_model, judge_tokenizer):
    prompt = prompt_template.format(query=query, response=response)
    inputs = judge_tokenizer(prompt, return_tensors="pt")
    outputs = judge_model.generate(inputs.input_ids)
    
    rating = judge_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return rating

In [85]:
rating = llm_as_a_judge(prompt, judge_model, judge_tokenizer)
print(f"LLM Judge Rating: {rating}")



LLM Judge Rating: 100


* Step 4: Applying RAG pipeline with LLM as a Judge for offline rag evaluation

In [91]:
query = 'What are nutrients'
response = rag_pipeline(query, model, tokenizer)

rating = llm_as_a_judge(prompt, judge_model, judge_tokenizer)
print(f"LLM Judge Rating: {rating}")



LLM Judge Rating: 100
