In [1]:
pip install sentencepiece


Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
import time
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import T5ForConditionalGeneration, T5Tokenizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import pickle

# Initialize the embedding model
embeddings = HuggingFaceEmbeddings(model_kwargs={"device": "cpu"})

# Load your FAISS index
vectorstore = FAISS.load_local(
    "faiss_index", 
    embeddings,
    allow_dangerous_deserialization=True
)

# Create retriever from FAISS index
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})  # Increase top_k for better results

# Load pre-trained SentenceTransformer model for semantic similarity
similarity_model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')  # Lightweight and fast

# Load the questions and answers file
def load_questions_and_answers(filename):
    questions_and_answers = {}
    with open(filename, 'r') as file:
        lines = file.readlines()
    i = 0
    while i < len(lines):
        # Read the question (assuming questions are one line long)
        question = lines[i].strip()
        i += 1
        # Collect answer lines (answers may span multiple lines)
        answer_lines = []
        while i < len(lines) and lines[i].strip() != "":
            answer_lines.append(lines[i].strip())
            i += 1
        # Join the answer lines into a single string
        answer = "\n".join(answer_lines)
        # Only add to dictionary if both question and answer are non-empty
        if question and answer:
            questions_and_answers[question] = answer
        while i < len(lines) and lines[i].strip() == "":
            i += 1
    return questions_and_answers

# Function to generate answers using the T5 model
def generate_answer_with_t5(question, context):
    tokenizer = T5Tokenizer.from_pretrained("t5-large")
    model = T5ForConditionalGeneration.from_pretrained("t5-large")
    
    input_text = f"question: {question} context: {context}"
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids
    output = output = model.generate(
        input_ids,
        min_length=100,
        max_length=200,  # Increase the max length for more detailed responses
        num_beams=4,  # Beam search to improve response quality
        no_repeat_ngram_size=2,  # Avoid repetition of n-grams in the response
        length_penalty=1.0,  # Encourage longer answers
        early_stopping=True
    )
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Evaluate the VA performance using T5
def evaluate_va(filename):
    # Load questions and answers from the text file
    correct_answers = load_questions_and_answers(filename)
    
    results = {
        'question': [],
        'context': [],
        'answer': [],
        'time': [],
        'correct': [],
        'similarity_score': []
    }

    for question, expected_answer in correct_answers.items():
        results['question'].append(question)

        # Evaluate with the retriever
        start_time = time.time()

        # Retrieve relevant documents based on the question
        retriever_result = retriever.get_relevant_documents(question, top_k=5)
        context = " ".join([doc.page_content for doc in retriever_result])  # Combine context from multiple docs

        results['context'].append(context)

        # Ensure the context isn't empty
        if not question.strip() or not context.strip():
            print(f"Warning: Empty question or context for input.")
            continue

        # Get the response from the T5 model
        response = generate_answer_with_t5(question, context)

        elapsed_time = time.time() - start_time

        # Append results
        results['answer'].append(response)
        results['time'].append(elapsed_time)

        # Calculate similarity score
        response_embedding = similarity_model.encode(response, convert_to_tensor=True)
        expected_embedding = similarity_model.encode(expected_answer, convert_to_tensor=True)

        similarity_score = cosine_similarity(
            response_embedding.unsqueeze(0).cpu().numpy(),
            expected_embedding.unsqueeze(0).cpu().numpy()
        )[0][0]

        results['similarity_score'].append(similarity_score)

        # Check if the similarity score exceeds the threshold (0.4 for acceptable match)
        is_correct = 1 if similarity_score > 0.4 else 0
        results['correct'].append(is_correct)

    return results

# File containing questions and answers
filename = './nlp/questions_and_answers.txt'

# Evaluate the VA performance using T5
evaluation_results = evaluate_va(filename)

# Calculate accuracy
t5_accuracy = sum(evaluation_results['correct']) / len(evaluation_results['question']) * 100
print(f"T5 Accuracy: {t5_accuracy:.2f}%")

# Calculate the average time taken
average_time = sum(evaluation_results['time']) / len(evaluation_results['time'])

# Print the average time
print(f"Average Time Taken: {average_time:.2f} seconds")

# Print detailed results
print("\nDetailed Results:")
for i in range(len(evaluation_results['question'])):
    print("======================================Question==========================================")
    print(f"{evaluation_results['question'][i]:<30}")
    # print("-------------------------------------Context--------------------------------------------")
    # print(f"{evaluation_results['context'][i]:<50}")
    print("-------------------------------------Answer--------------------------------------------")
    print(f"{evaluation_results['answer'][i]:<50}")
    print("-------------------------------------Evaluation----------------------------------------")
    print(f"{evaluation_results['time'][i]:<15.2f} {evaluation_results['similarity_score'][i]:<15}")



  embeddings = HuggingFaceEmbeddings(model_kwargs={"device": "cpu"})


T5 Accuracy: 86.67%
Average Time Taken: 44.80 seconds

Detailed Results:
What is a list in Python and how are lists defined?
-------------------------------------Answer--------------------------------------------
square brackets []. Lists are defined using parentheses ().  Tuples: A tuple is similar to a list but immutable. It is defining pd.DataFrame(data) # Display the DataFrape print(df)# Display data in the filtered_diff (dif x = 0) . Python for Data Analysis Python is the go-to language for data analysis and machine learning
-------------------------------------Evaluation----------------------------------------
44.45           0.5138731598854065
How do you define a function in Python?
-------------------------------------Answer--------------------------------------------
using the def keyword. Python also supports lambda functions for simple expressions.  Functions in Python are defined using __decorator(func): print("Before the function call") func() - wrapper(): df[['Age'] > 30]