# Preparing  metrics

In [1]:
#model_evaluation
import nltk
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.meteor_score import single_meteor_score
from nltk.translate.meteor_score import single_meteor_score
from nltk.translate.bleu_score import sentence_bleu ,SmoothingFunction
from rouge_score import rouge_scorer


def calculate_bleu_score(reference, hypothesis):
    # Tokenize the reference and hypothesis translations
    reference_tokens = nltk.word_tokenize(reference.lower())
    hypothesis_tokens = nltk.word_tokenize(hypothesis.lower())

    # Calculate BLEU score using NLTK's corpus BLEU implementation
    # We use weights=(1, 0, 0, 0) for unigram precision (BLEU-1)
    bleu_score = nltk.translate.bleu_score.sentence_bleu([reference_tokens], hypothesis_tokens, weights=(1, 0, 0, 0))
    
    return bleu_score






def compute_f1(predicted_answer, true_answer):
    predicted_tokens = set(predicted_answer.lower().split())
    true_tokens = set(true_answer.lower().split())
    common_tokens = predicted_tokens.intersection(true_tokens)
    precision = len(common_tokens) / (len(predicted_tokens) + 1e-8)
    recall = len(common_tokens) / (len(true_tokens) + 1e-8)
    f1 = 2 * (precision * recall) / (precision + recall + 1e-8)
    return f1



def calculate_rouge2_score(reference, candidate):
    scorer = rouge_scorer.RougeScorer(['rouge2'], use_stemmer=True)
    scores = scorer.score(reference, candidate)
    return scores['rouge2'].fmeasure


def load_existing_results(file_path):
    try:
        existing_df = pd.read_csv(file_path)
        return existing_df
    except FileNotFoundError:
        return pd.DataFrame(columns=["Question", "True Answer", "Predicted Answer", "BLEU Score", "ROUGE-2 Score"])

def save_dataframe_to_csv(dataframe, file_path):
    dataframe.to_csv(file_path, index=False)





# T5 BASED BOT

In [2]:
from transformers import pipeline
import pandas as pd 
import warnings
warnings.filterwarnings("ignore")

result_file_path = "output3.csv" 
existing_results_df = load_existing_results(result_file_path)

# Create the text-to-text pipeline for question answering
t5_qa = pipeline(task="text2text-generation", model="t5-large", tokenizer="t5-large")

# Read the content of "data.txt" and store it as the context
with open('data.txt', 'r', encoding='utf-8') as file:
    context =file.read()

while True:
    # User inputs the question
    question = input("Enter your question (type 'exit' to end): ")
 
    if question.lower() == 'exit':
        break 
        
   
    
    true_answer = input("Enter the true answer or type 'skip': ")
    
    if true_answer.lower() == "skip":
        # Generate the answer using the T5 model
        answer = t5_qa(f"question: {question} context: {context}")[0]['generated_text']
        print("Answer:", answer)
    else:
        
        
        # Get predicted answer and calculate scores
        predicted_answer = t5_qa(f"question: {question} context: {context}")[0]['generated_text']
      
        bleu_score = calculate_bleu_score(predicted_answer.capitalize(), true_answer.capitalize())
        rouge_score = calculate_rouge2_score(predicted_answer.capitalize(), true_answer.capitalize())
        new_data = {
                "Question": question.capitalize(),
                "Predicted Answer": predicted_answer.capitalize(),
                "True Answer": true_answer.capitalize(),
                "BLEU Score": bleu_score,
                "ROUGE-2 Score": rouge_score
            }
            
        existing_results_df = pd.concat([existing_results_df, pd.DataFrame([new_data])], ignore_index=True)
        save_dataframe_to_csv(existing_results_df, result_file_path)
            
        # Print results
        print("Predicted Answer:", predicted_answer)
        print("True Answer:", true_answer)
        print("BLEU Score:", bleu_score)
        print("ROUGE Score:", rouge_score)
        print("Results updated")
 
  


Enter your question (type 'exit' to end): what is the purpose of the Airline's referntial data
Enter the true answer or type 'skip': Airline’s Referential Data have the purpose to represent the ecosystem (payment and other) of the Airline in Adad, allowing it to better use adad
Predicted Answer: represent the ecosystem (payment and other) of the Airline in Adad, allowing it
True Answer: Airline’s Referential Data have the purpose to represent the ecosystem (payment and other) of the Airline in Adad, allowing it to better use adad
BLEU Score: 0.5517241379310345
ROUGE Score: 0.6666666666666666
Results updated
Enter your question (type 'exit' to end): exit


# T5 FINAL RESULT

In [3]:
df=pd.read_csv("output3.csv") 
df.head(100)

Unnamed: 0,Question,True Answer,Predicted Answer,BLEU Score,ROUGE-2 Score
0,What is etl,a data integration process that combines data...,"Extract, transform, and load, is a data integr...",0.25,0.25641
1,What is sla,Is a contract between a service provider and i...,Service-level agreement (sla) is a contract be...,0.3,0.4
2,What does etl stand for,"Extract , transform , load","Extract, transform, and load",0.818731,0.4
3,What does sla stand for,Service-level agreement,Service-level agreement (sla) is a contract be...,0.002479,0.285714
4,A sla is a contract between who and who,A service provider and its customers,A service provider and its customers,1.0,1.0
5,What is the type of testing preferred at adad,Black box testing (behavioral testing),Black box testing (behavioral testing),1.0,1.0
6,What is the role of the etl configuration panel,The aim of the etl configuration panel is to p...,The aim of the etl configuration panel is to p...,0.515152,0.744186
7,What is load test,Load test is the objective is to ensure that l...,Load test is the objective is to ensure that l...,0.481481,0.631579
8,Give me example of the file name,9429515695_epa_amex_20150818_0453389821883824_...,Ax_merchant_number_epa_amex_20150818,0.0,0.307692
9,What format should the file name respect,Ax_merchant_number_epa_amex_$utc_date$_utc_time,Ax_merchant_number_epa_amex_$utc,0.4,0.769231


# Randomly picking 10 rows from the output

In [4]:
import random
df = pd.read_csv('output3.csv')

# Randomly select five rows
num_samples = 10
random_indices = random.sample(range(len(df)), num_samples)
random_rows = df.iloc[random_indices]

random_rows.head(10)


Unnamed: 0,Question,True Answer,Predicted Answer,BLEU Score,ROUGE-2 Score
21,"When do the status ""pending"" apply",When no settlement has been linked to a sale,When no settlement has been linked to a sale,1.0,1.0
8,Give me example of the file name,9429515695_epa_amex_20150818_0453389821883824_...,Ax_merchant_number_epa_amex_20150818,0.0,0.307692
13,What is unit test in a clean architecture,"In clean architecture, a unit test is a type o...",A unit test is a type of automated test that i...,0.615385,0.810811
19,"When the settlement is considered ""settled""",When it has been successfully matched to one o...,When it has been successfully matched to one o...,0.642857,0.761905
2,What does etl stand for,"Extract , transform , load","Extract, transform, and load",0.818731,0.4
25,What is the purpose of the airline's referntia...,Airline’s referential data have the purpose to...,Adad gl file monitoring dashboard helps airlin...,0.034483,0.0
18,What is the purpose of matching rules module,The matching rules module enables the user to ...,Matching rules module enables the user to chan...,0.888889,0.967742
9,What format should the file name respect,Ax_merchant_number_epa_amex_$utc_date$_utc_time,Ax_merchant_number_epa_amex_$utc,0.4,0.769231
6,What is the role of the etl configuration panel,The aim of the etl configuration panel is to p...,The aim of the etl configuration panel is to p...,0.515152,0.744186
11,What is scaling plan,A scaling plan is a set of rules and costs ass...,A scaling plan is a set of rules and costs ass...,0.319149,0.491228
