In [None]:
!pip install faiss-cpu

# Install Ollama Python SDK
!pip install ollama

# Install Numpy
!pip install numpy

# (Optional) Install Transformers if you need it for other models
!pip install sentence_transformers

# (Optional) Install Pandas if you need it for data handling
!pip install pandas

In [1]:
import os

# Directory containing text files
directory = "../data/"

# Read all text files and combine them
all_texts = []
for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        print(filename)
        with open(os.path.join(directory, filename), 'r', encoding='utf-8') as file:
            all_texts.append(file.read())

CMU_Traditions.txt
CMU_Wikipedia.txt
Transportation_Pitts_Wikipedia.txt
Pittsburgh_Wikipedia.txt
festivals.txt
Visit_Pittsburgh.txt
current_affairs.txt
City_Of_Pittsbugh.txt
LocalService_Regulations.txt
opera_events.txt
ISP_Regulations.txt
PIRATES.txt
pgh_cultural_trust.txt
PENGUINS.txt
CMU_History.txt
UF_Regulations.txt
carnegie_museums.txt
Bridges_Wikipedia.txt
STEELERS.txt
Events_Pittsburgh.txt
sports_teams.txt
wikis.txt
Events_CMU.txt
annual_fests.txt
CMU_Events_Extra.txt
sports_schedules.txt
History_WIkipedia.txt
museums_list.txt
CMU.txt
Visit_Pitts 1.txt
symphony.txt
Culture_Pitts_wikipedia.txt
Parking_Regulations.txt
Amusement_Regulations.txt
CMU_Welcome_To_Pittsburgh.txt
Payroll_Regulations.txt


In [3]:
def chunk_text(text, max_length=128):
    words = text.split()
    chunks = []

    for i in range(0, len(words), max_length):
        chunks.append(" ".join(words[i:i + max_length]))

    return chunks

# Create chunks from each file content
text_chunks = []
for text in all_texts:
    text_chunks.extend(chunk_text(text, max_length=256))
print(len(text_chunks))

2909


In [5]:
import faiss
from sentence_transformers import SentenceTransformer

# Load a pre-trained model for embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

  from tqdm.autonotebook import tqdm, trange


In [6]:
doc_embeddings = model.encode(text_chunks, convert_to_tensor=True)
embedding_dim = doc_embeddings.shape[1]

index = faiss.IndexFlatL2(embedding_dim)
index.add(doc_embeddings.cpu().numpy())

faiss.write_index(index, 'faiss_index.index')

In [8]:
def retrieve_documents(query, k=3):
    query_embedding = model.encode([query])
    distances, indices = index.search(query_embedding, k)
    matching_chunks = [text_chunks[i] for i in indices[0]]
    return matching_chunks

In [11]:
import requests

#If there are ways to frame the answer in multiple ways, return multiple answers separated by semicolons.

def ollama_generate(question, context):
    instructions = f'''
    Answer the question based on the provided context. 
    Give short answers.
    For example:
    Q1: What are the operating hours of the CMU Pantry?
    A1: 2 p.m. to 5 p.m.
    While answering a question about some event X, do not use "it" or "the event", mention the name of event X in the answer.
    For example:
    Q3: How long does the Gender in Process event last?
    Wrong answer: This event runs from 3:30 to 5 p.m.
    Correct answer: 3:30 to 5 p.m.
    Don't give dates like "11/21", instead the answer should be "November 21, 2024"
    '''
    url = "http://localhost:11434/api/generate"
    payload = {
        "model": "llama2",
        "prompt": f"Question: {question}\nContext: {context}\nInstructions: {instructions}\nAnswer:",
        "stream": False,
        # "temperature": 0.5,  # Reduce to make output more deterministic
        # "top_p": 0.9,        # Control randomness
        "max_tokens": 150    # Limit response length
    }
    response = requests.post(url, json=payload)
    return response.json()['response']

In [13]:
question = "What are the colours of Pittsburgh Steelers?"
retrieved_chunks = retrieve_documents(question)
# print(retrieved_chunks)

context = " ".join(retrieved_chunks)
answer = ollama_generate(question, context)
print(f"Question: {question}\nAnswer: {answer}")

Question: What are the colours of Pittsburgh Steelers?
Answer: Gold and black.


In [15]:
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score
from sentence_transformers import SentenceTransformer, util
import requests
import nltk
nltk.download('punkt_tab')

df = pd.read_csv('../final-dataset/test.csv')

generated_answers = []

# Iterate through each row in the DataFrame and get answers
for _, row in df.iterrows():
    question = row['Question']
    retrieved_chunks = retrieve_documents(question)
    context = " ".join(retrieved_chunks)
    answer = ollama_generate(question, context)
    # answer = ollama_generate(question)
    generated_answers.append(answer)

# Add the generated answers to the DataFrame
df['Generated Answer'] = generated_answers

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/akshitagupta/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [17]:
from nltk.translate.bleu_score import sentence_bleu
from nltk.tokenize import word_tokenize
from sklearn.metrics import f1_score
import numpy as np
import string

def calculate_metrics(predicted, actual):
    predicted = predicted.strip().lower()
    actual_answers = [ans.strip().lower() for ans in actual.split(';')]
    
    predicted = predicted.translate(str.maketrans('', '', string.punctuation))
    actual_answers = [ans.translate(str.maketrans('', '', string.punctuation)) for ans in actual_answers]
    
    predicted_tokens = word_tokenize(predicted)
    actual_tokens = [word_tokenize(ans) for ans in actual_answers]
    
    exact_match = any(predicted == ans for ans in actual_answers)
    
    f1_scores = []
    for ans in actual_tokens:
        true_positives = len(set(predicted_tokens) & set(ans))
        precision = true_positives / len(predicted_tokens) if predicted_tokens else 0
        recall = true_positives / len(ans) if ans else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        f1_scores.append(f1)
    
    max_f1 = max(f1_scores)
    
    answer_recall = any(all(word in predicted_tokens for word in ans) for ans in actual_tokens)
    
    return exact_match, max_f1, answer_recall


In [19]:
results = []
for _, row in df.iterrows():
    predicted = row['Generated Answer']
    actual = row['Answer']
    
    exact_match, f1_score, answer_recall = calculate_metrics(predicted, actual)
    
    results.append({
        'question': row['Question'],
        'predicted_answer': predicted,
        'actual_answer': actual,
        'exact_match': exact_match,
        'f1_score': f1_score,
        'answer_recall': answer_recall
    })

# Add the results to the DataFrame
df['Exact Match'] = [r['exact_match'] for r in results]
df['F1 Score'] = [r['f1_score'] for r in results]
df['Answer Recall'] = [r['answer_recall'] for r in results]

# Calculate averages
average_exact_match = np.mean(df['Exact Match'])
average_f1 = np.mean(df['F1 Score'])
average_answer_recall = np.mean(df['Answer Recall'])

# Print overall average metrics
print(f"Average Exact Match: {average_exact_match:.4f}")
print(f"Average F1 Score: {average_f1:.4f}")
print(f"Average Answer Recall: {average_answer_recall:.4f}")


Average Exact Match: 0.0345
Average F1 Score: 0.2297
Average Answer Recall: 0.2759
