In [1]:
pip install faiss-gpu

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.3.1-py3-none-any.whl (268 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-3.3.1
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=e92b3ad7c62301ae3c2924e6ffc1bd32dffd6f85883c9e0b0ee659f47a3be74b
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2
Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
import numpy as np
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
import google.generativeai as genai
import uuid  # For generating UUIDs

############################################
# 1) Read data from CSV file
############################################
CSV_FILE_PATH = "/kaggle/input/drug-label-dataset/sample.csv"  # Replace with your actual CSV file path

# Load the CSV file into a Pandas DataFrame
# Assuming the CSV has a column named "context" with text data to process
df = pd.read_csv(CSV_FILE_PATH)
print(f"Loaded {len(df)} rows from CSV file.")

############################################
# 2) Process the DataFrame
############################################
# Create chunks of text from the "context" column
records = []
MAX_TOKENS = 3000  # Example token limit for chunking

for _, row in df.iterrows():
    # Safely get the context field
    context = str(row.get("context", "")).strip()  # Ensure the column name matches your CSV
    if context:
        # Create chunks by token size if necessary
        words = context.split()
        chunks = [" ".join(words)]  # Modify as needed to split into smaller chunks
        for chunk in chunks:
            records.append({
                "chunk": chunk,
                "uuid": str(uuid.uuid4())  # Assign a unique UUID to each chunk
            })

# Convert to a Pandas DataFrame
processed_df = pd.DataFrame(records)
print(f"Created {len(processed_df)} rows after chunking.")

# Replace NaN values with empty strings
processed_df.fillna("", inplace=True)

############################################
# 3) Save the processed data to CSV
############################################
output_csv_path = "processed_data.csv"
processed_df.to_csv(output_csv_path, index=False)
print(f"Processed data saved to {output_csv_path}.")


Loaded 100 rows from CSV file.
Created 100 rows after chunking.
Processed data saved to processed_data.csv.


In [5]:
processed_df.head()

Unnamed: 0,chunk,uuid
0,"Brand Name: Bismuth Stibium, Generic Name: BIS...",1837832a-eab3-4bec-b80f-5cc423a459fe
1,"Effective Time: 20240118, Effective Date: 2024...",3be2d2a2-2a4c-40df-84b1-4a90efaef8f0
2,"Brand Name: Omeprazole, Sodium bicarbonate, Ge...",57cb304c-2c3e-41d0-9e53-3bbd76261e05
3,Brand Name: Diltiazem Hydrochloride Extended-R...,19711f90-3230-4018-a23b-9d6ce523066e
4,"Brand Name: FLUDROCORTISONE ACETATE, Generic N...",1263e60b-03a1-435f-b043-7a01cd91d1a0


In [6]:
processed_df.iloc[0, 0]



In [7]:
from sentence_transformers import SentenceTransformer

# Load the SentenceTransformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Generate embeddings for the context column
processed_df['embeddings'] = processed_df['chunk'].apply(lambda x: model.encode(x).tolist())

# Save embeddings for later use
processed_df.to_parquet("data_with_embeddings_final.parquet", index=False)
print("Data with embeddings saved successfully!")

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Data with embeddings saved successfully!


In [8]:
import faiss
import numpy as np

# Convert embeddings to a numpy array
embeddings = np.array(processed_df['embeddings'].tolist(), dtype='float32')

# Create a FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Save the FAISS index
faiss.write_index(index, "faiss_index_final.bin")
print("FAISS index saved successfully!")


FAISS index saved successfully!


In [9]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

def generate_answer_with_context_t5(query, context):
    model_name = "t5-large"  # or "t5-base", "t5-large"
    model = T5ForConditionalGeneration.from_pretrained(model_name)
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    
    input_text = f"Context: {context}\nQuestion: {query}\nAnswer:"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    
    outputs = model.generate(input_ids=inputs['input_ids'], max_length=100, num_beams=4, early_stopping=True)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text


In [10]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

def generate_answer_with_context_gpt2(query, context):
    model_name = "gpt2"  # You can use "gpt2-medium", "gpt2-large" for larger models
    model = GPT2LMHeadModel.from_pretrained(model_name)
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    
    input_text = f"Context: {context}\nQuestion: {query}\nAnswer:"
    
    # Increase max_length to a higher value, ensuring it's within the model's token limit
    max_input_length = 1024  # Ensure this is within the model's max token limit
    inputs = tokenizer(input_text, return_tensors="pt", max_length=max_input_length, truncation=True)
    
    # Set max_new_tokens to control the length of the generated answer
    outputs = model.generate(input_ids=inputs['input_ids'], max_new_tokens=150, num_return_sequences=1, no_repeat_ngram_size=2, temperature=0.7)
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Remove the prompt part from the output and return the answer
    answer = generated_text[len(input_text):].strip()
    return answer


In [11]:
from transformers import BertForQuestionAnswering, BertTokenizer
import torch


def generate_answer_with_context_bert(query, context):
    model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
    model = BertForQuestionAnswering.from_pretrained(model_name)
    tokenizer = BertTokenizer.from_pretrained(model_name)

    # Prepare the input text for BERT
    input_text = f"Context: {context}\nQuestion: {query}"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    
    # Print the tokenized input to check if the context and question are properly encoded
    # print("Tokenized Input:", inputs)

    # Run the model to get start and end logits
    outputs = model(**inputs)
    start_scores, end_scores = outputs.start_logits, outputs.end_logits

    # Print the logits to see what the model is predicting
    # print("Start Scores:", start_scores)
    # print("End Scores:", end_scores)

    # Get the most likely start and end positions
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores) + 1

    # Check if the answer span is valid
    if answer_start >= answer_end:
        print("No valid answer found.")
        return "No valid answer found."

    # Decode the answer (removes [CLS], [SEP] tokens and gives you the answer span)
    answer = tokenizer.decode(inputs.input_ids[0][answer_start:answer_end], skip_special_tokens=True)
    
    return answer


In [12]:
from transformers import DistilBertForQuestionAnswering, DistilBertTokenizer
import torch

def generate_answer_with_context_distilbert(query, context):
    model_name = "distilbert-base-uncased-distilled-squad"
    model = DistilBertForQuestionAnswering.from_pretrained(model_name)
    tokenizer = DistilBertTokenizer.from_pretrained(model_name)

    input_text = f"Context: {context}\nQuestion: {query}"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)

    outputs = model(**inputs)
    start_scores, end_scores = outputs.start_logits, outputs.end_logits
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores) + 1
    answer = tokenizer.decode(inputs.input_ids[0][answer_start:answer_end])
    return answer


In [13]:
import google.generativeai as genai

def generate_answer_with_context_gemini(query, context):
    # Initialize Google Generative AI model
    genai.configure(api_key="AIzaSyAMCnx5Wde22yIsRL53lB20FETVkCXe2Ws")
    # gemini_model = genai.GenerativeModel('gemini-pro')
    gemini_model = genai.GenerativeModel('gemini-1.5-pro-latest')
    
    # Construct the prompt
    prompt = f"Given the following context:\n{context}\n\nQuestion: {query}\nAnswer:"
    
    try:
        # Use the correct method to generate an answer, check if `generate_content` or `generate` is available
        answer = gemini_model.generate_content(prompt)  # Correct method usage
        # print(answer.text)

        
        # Print and return the answer
        print("Generated Answer:")
        return answer.text
        
    except Exception as e:
        print(f"Error while generating content: {e}")


In [14]:
from transformers import BloomForCausalLM, BloomTokenizerFast

def generate_answer_with_context_bloom(query, context):
    model_name = "bigscience/bloom-560m"
    model = BloomForCausalLM.from_pretrained(model_name)
    tokenizer = BloomTokenizerFast.from_pretrained(model_name)

    # Create the prompt
    input_text = f"Given the following context:\n{context}\n\nQuestion: {query}\nAnswer:"
    
    # Tokenize input text
    inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True, padding=True)

    # Generate the response
    outputs = model.generate(input_ids=inputs['input_ids'], max_new_tokens=100)  # Control only new token generation

    # Decode the generated tokens to text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    generated_text = generated_text.split('Question: ')[-1]

    return generated_text


In [15]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

def generate_answer_with_context_distilgpt2(query, context):
    model_name = "distilgpt2"
    model = GPT2LMHeadModel.from_pretrained(model_name)
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)

    input_text = f"Context: {context}\nQuestion: {query}\nAnswer:"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True)

    outputs = model.generate(input_ids=inputs['input_ids'], max_new_tokens=150, temperature=0.9)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    generated_text = generated_text.split('Question: ')[-1]

    return generated_text


In [16]:
from transformers import ElectraForQuestionAnswering, ElectraTokenizer
import torch

def generate_answer_with_context_electra(query, context):
    model_name = "google/electra-small-discriminator"
    model = ElectraForQuestionAnswering.from_pretrained(model_name)
    tokenizer = ElectraTokenizer.from_pretrained(model_name)

    input_text = f"Context: {context}\nQuestion: {query}"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)

    outputs = model(**inputs)
    start_scores, end_scores = outputs.start_logits, outputs.end_logits
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores) + 1
    answer = tokenizer.decode(inputs.input_ids[0][answer_start:answer_end])
    return answer


In [17]:
from transformers import ReformerForSequenceClassification, ReformerTokenizer

def generate_answer_with_context_reformer(query, context):
    model_name = "google/reformer-enwik8"
    model = ReformerForSequenceClassification.from_pretrained(model_name)
    tokenizer = ReformerTokenizer.from_pretrained(model_name)

    input_text = f"Context: {context}\nQuestion: {query}"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True)

    outputs = model.generate(input_ids=inputs['input_ids'], max_length=150)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text


In [18]:
# import torch
# from transformers import pipeline

# model_id = "meta-llama/Llama-3.2-1B-Instruct"
# pipe = pipeline(
#     "text-generation",
#     model=model_id,
#     torch_dtype=torch.bfloat16,
#     device_map="auto",
# )
# messages = [
#     {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
#     {"role": "user", "content": "Who are you?"},
# ]
# outputs = pipe(
#     messages,
#     max_new_tokens=256,
# )
# print(outputs[0]["generated_text"][-1])


In [19]:
# from transformers import AutoModelForCausalLM, AutoTokenizer

# model_name_long = "meta-llama/Llama-3.2-1B-Instruct"
# tokenizer = AutoTokenizer.from_pretrained(model_name_long)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# log.info(f"Loading the model {model_name_long}")
# bf16 = False
# fp16 = True
# if torch.cuda.is_available():
#     major, _ = torch.cuda.get_device_capability()
#     if major >= 8:
#         log.info("Your GPU supports bfloat16: accelerate training with bf16=True")
#         bf16 = True
#         fp16 = False
# # Load the model
# device_map = {"": 0}  # Load on GPU 0
# torch_dtype = torch.bfloat16 if bf16 else torch.float16
# model = AutoModelForCausalLM.from_pretrained(
#     model_name_long,
#     torch_dtype=torch_dtype,
#     device_map=device_map,
# )
# log.info(f"Model loaded with torch_dtype={torch_dtype}")

In [20]:

# Load the FAISS index
index = faiss.read_index("faiss_index_final.bin")

# Example query
query = "what is Bismuth Stibium ?"
query_embedding = model.encode(query).astype('float32').reshape(1, -1)

# Retrieve the top 5 most relevant contexts
distances, indices = index.search(query_embedding, 5)

# Extract the relevant contexts
relevant_contexts = processed_df.iloc[indices[0]]['chunk'].tolist()

# print('relevent context : ',relevant_contexts)
context = relevant_contexts[0]

# For GPT-2
generated_answer = generate_answer_with_context_gpt2(query, context)
print("GPT-2 model")
print(generated_answer)
print("------------------------")

# For T5
generated_answer = generate_answer_with_context_t5(query, context)
print("T5 model")
print(generated_answer)
print("------------------------")

# For BLOOM
generated_answer = generate_answer_with_context_bloom(query, context)
print("bloom model")
print(generated_answer)
print("------------------------")


# For GEMINI
generated_answer = generate_answer_with_context_gemini(query, context)
print("GEMINI model")
print(generated_answer)
print("------------------------")

# For GEMINI
# generated_answer = generate_answer_with_context_reformer(query, context)
# print("REFORMER model")
# print(generated_answer)
# print("------------------------")

# For Electra
generated_answer = generate_answer_with_context_electra(query, context)
print("Electra model")
print(generated_answer)
print("------------------------")

# For Distilgpt2
generated_answer = generate_answer_with_context_distilgpt2(query, context)
print("Distilgpt2 model")
print(generated_answer)
print("------------------------")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


GPT-2 model
It is a natural compound that is used to treat wart, ulcers, and other conditions. It has been used for centuries to relieve warty, sore throats, to prevent the spread of diseases, as well as to cure the common cold.
Bismutum St.ibum is an alkaloid found in the bark of the tree of B. stibus. The bark is composed of a mixture of two parts: the alkaline and the insoluble part. Bistum st.bismum contains a compound called bismulose, which is found naturally in bark. This compound is known as bistulosulfate. In the United States, it is also known by the name bicarbonate
------------------------


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.95G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


T5 model
GARLIC CHELIDONIUM MAJUS CHELIDONIUM MAJUS CHELIDONIUM MAJUS CHELIDONIUM MAJUS CHELIDONIUM MAJUS CHELIDONIUM MAJUS CHELIDONIUM MAJUS CHELIDONIUM MAJUS CHELIDONIUM MAJUS CHELIDONIUM MAJUS CHELIDONIUM MAJUS CHELIDONIUM MAJ
------------------------


config.json:   0%|          | 0.00/693 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

bloom model
what is Bismuth Stibium ?
Answer: Bismuth Stibium is a natural product derived from the roots of the herb Bismuth. It is a natural product derived from the roots of the herb Bismuth. It is a natural product derived from the roots of the herb Bismuth. It is a natural product derived from the roots of the herb Bismuth. It is a natural product derived from the roots of the herb Bismuth. It is a natural product derived from the roots of the herb Bismuth. It is a natural product derived
------------------------
Generated Answer:
GEMINI model
Bismuth Stibium is a homeopathic ointment marketed for the temporary relief of warts.  It contains a mixture of homeopathic ingredients, including Bismuth, Stibium (Antimony), Garlic, Greater Celandine, Turmeric, and American Arborvitae, in a base of white petrolatum, lanolin, mineral oil, and other inactive ingredients.  It is manufactured by Uriel Pharmacy Inc.

------------------------


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/54.2M [00:00<?, ?B/s]

Some weights of ElectraForQuestionAnswering were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Electra model

------------------------


config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Distilgpt2 model
what is Bismuth Stibium?
Answer: Bismuth Stibium is a compound of the Bismuth Stibium, which is a compound of the Bismuth Stibium, which is a compound of the Bismuth Stibium, which is a compound of the Bismuth Stibium, which is a compound of the Bismuth Stibium, which is a compound of the Bismuth Stibium, which is a compound of the Bismuth Stibium, which is a compound of the Bismuth Stibium, which is a compound of the Bismuth Stibium, which is a compound of the Bismuth Stibium, which is a compound of the Bismuth Stibium, which is
------------------------


In [None]:
import numpy as np
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu
import time
import torch
import pandas as pd
import nltk
nltk.download('punkt')  # Required for BLEU score

# First, let's define our test dataset with some sample questions and reference answers
test_data = [
    {
        'question': 'What is Bismuth Stibium?',
        'context': context,  # Your existing context
        'reference_answer': 'Bismuth Stibium is a human over-the-counter (OTC) drug manufactured by Uriel Pharmacy Inc., used topically for the temporary relief of warts. It contains a blend of active ingredients, including Allium sativa (Garlic), Chelidonium (Greater celandine), Curcuma (Turmeric), Thuja (American arborvitae), Bismuth, and Stibium (Antimony). The product is for external use only and is marketed as a homeopathic remedy based on traditional practices, though it is not FDA evaluated. The ointment is applied once or twice daily to warts, with special instructions to consult a doctor for children under age 2.'
    },
    # Add more test cases if you have them
]

# Keep your existing model functions
models = {
    'GPT-2': generate_answer_with_context_gpt2,
    'T5': generate_answer_with_context_t5,
    # 'BERT': generate_answer_with_context_bert,
    # 'DistilBERT': generate_answer_with_context_distilbert,
    'BLOOM': generate_answer_with_context_bloom,
     'GEMINI': generate_answer_with_context_gemini,
    'DISTILGPT2': generate_answer_with_context_distilgpt2
}

def evaluate_single_answer(predicted, reference):
    # Initialize ROUGE scorer
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    
    # ROUGE scores
    rouge_scores = scorer.score(predicted, reference)
    
    # BLEU score
    ref_tokens = [reference.split()]
    pred_tokens = predicted.split()
    try:
        bleu = sentence_bleu(ref_tokens, pred_tokens)
    except:
        bleu = 0
    
    # Exact Match
    exact_match = 1 if predicted.lower().strip() == reference.lower().strip() else 0
    
    # F1 Score
    pred_words = set(predicted.lower().split())
    ref_words = set(reference.lower().split())
    
    precision = len(pred_words & ref_words) / len(pred_words) if pred_words else 0
    recall = len(pred_words & ref_words) / len(ref_words) if ref_words else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0
    
    return {
        'rouge1': rouge_scores['rouge1'].fmeasure,
        'rouge2': rouge_scores['rouge2'].fmeasure,
        'rougeL': rouge_scores['rougeL'].fmeasure,
        'bleu': bleu,
        'exact_match': exact_match,
        'f1': f1
    }

def evaluate_model(model_name, model_func, test_data):
    all_metrics = []
    execution_times = []
    
    for test_case in test_data:
        # Measure execution time
        start_time = time.time()
        predicted_answer = model_func(test_case['question'], test_case['context'])
        execution_time = time.time() - start_time
        
        # Calculate metrics
        metrics = evaluate_single_answer(predicted_answer, test_case['reference_answer'])
        metrics['execution_time'] = execution_time
        all_metrics.append(metrics)
        
        # Print individual results
        print(f"\n{model_name} Results for Question: {test_case['question']}")
        print(f"Predicted Answer: {predicted_answer}")
        print(f"Reference Answer: {test_case['reference_answer']}")
        print(f"Metrics: {metrics}")
    
    # Calculate averages
    avg_metrics = {
        metric: np.mean([m[metric] for m in all_metrics])
        for metric in all_metrics[0].keys()
    }
    
    return avg_metrics

# Run evaluation for all models
results = {}
for model_name, model_func in models.items():
    print(f"\nEvaluating {model_name}...")
    try:
        results[model_name] = evaluate_model(model_name, model_func, test_data)
    except Exception as e:
        print(f"Error evaluating {model_name}: {str(e)}")
        continue

# Create final report
df_results = pd.DataFrame(results).round(4)
print("\nFinal Evaluation Results:")
print(df_results)

# Save results to CSV
df_results.to_csv('rag_evaluation_results.csv')
print("\nResults saved to 'rag_evaluation_results.csv'")

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!

Evaluating GPT-2...


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().



GPT-2 Results for Question: What is Bismuth Stibium?
Predicted Answer: This product is a topical product that is used to treat wart, ulcers, and other skin conditions. It is also used for topical use to prevent or treat other conditions such as acne, psoriasis, eczema, rashes, dry skin, skin cancer, etc.
What is the difference between Bistuth and Bactulose? BISTUTH is an antiseptic that has been used in the treatment of psoriatic ulcerative colitis, acne vulgaris, dermatitis and psorsitis. BACTULOSE is another topical antisera that was used as a treatment for psorectal ulicosis. The difference is that BIS is not a preservative. This is because B
Reference Answer: Bismuth Stibium is a human over-the-counter (OTC) drug manufactured by Uriel Pharmacy Inc., used topically for the temporary relief of warts. It contains a blend of active ingredients, including Allium sativa (Garlic), Chelidonium (Greater celandine), Curcuma (Turmeric), Thuja (American arborvitae), Bismuth, and Stibium (Anti

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
