In [1]:
import ast
import numpy as np
from collections import defaultdict
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import word_tokenize
from sklearn.metrics import f1_score
import os
from dotenv import load_dotenv


load_dotenv()

True

In [2]:
def load_knowledge_graph(filepath):
    graph = defaultdict(list)
    with open(filepath, 'r') as f:
        for line in f:
            src, rel, tgt = ast.literal_eval(line.strip())
            graph[src].append((rel, tgt))
    return graph


def load_questions(filepath):
    questions = []
    with open(filepath, 'r') as f:
        for line in f:
            parsed = ast.literal_eval(line.strip())
            qid, text, start, _, _, answers = parsed
            answers = set([ans['AnswerArgument'] for ans in answers])
            questions.append({'id': qid, 'text': text, 'start': start, 'answers': answers})
    return questions


def load_word2vec_model(path):
    return Word2Vec.load(path) 

In [3]:
def get_rel_score(rel, question, model):
    rel = 'ns:' + rel if not rel.startswith('ns:') else rel
    if rel not in model.wv:
        return 0.0
    q_tokens = word_tokenize(question.lower())
    q_vecs = [model.wv[w] for w in q_tokens if w in model.wv]
    if not q_vecs:
        return 0.0
    sim = cosine_similarity(np.mean(q_vecs, axis=0).reshape(1, -1), model.wv[rel].reshape(1, -1))
    return sim[0][0]

In [4]:
def answer_question(question, graph, model, threshold=0.3):
    start = question['start']
    q_text = question['text']
    predictions = set()
    if start not in graph:
        return predictions

    for rel, neighbor in graph[start]:
        score = get_rel_score(rel, q_text, model)
        if score > threshold:
            predictions.add(neighbor)

    return predictions


def compute_f1(true_answers, predicted_answers):
    if not predicted_answers:
        return 0.0
    y_true = [1 if a in true_answers else 0 for a in predicted_answers]
    y_pred = [1] * len(predicted_answers)
    return f1_score(y_true, y_pred)

In [5]:
def run_qa_pipeline(kg_path, questions_path, w2v_path):
    print("Loading data...")
    graph = load_knowledge_graph(kg_path)
    questions = load_questions(questions_path)
    model = load_word2vec_model(w2v_path)

    all_f1 = []
    for q in questions:
        pred = answer_question(q, graph, model)
        f1 = compute_f1(q['answers'], pred)
        print(f"Q{q['id']} | F1: {f1:.4f} | Predicted: {list(pred)} | Ground Truth: {list(q['answers'])}")
        all_f1.append(f1)

    avg_f1 = np.mean(all_f1)
    print(f"\n Average F1 Score across questions: {avg_f1:.4f}")

In [7]:
folder_path = os.environ.get("HW6_Q4_DATSET")
run_qa_pipeline(
    kg_path=folder_path + 'knowledge_graph.txt',
    questions_path=folder_path + 'question.txt',
    w2v_path=folder_path + 'word2vec_train_dev.dat.txt'  
)

Loading data...
Q1 | F1: 0.3333 | Predicted: ['m.0b28z', 'm.04g4s8k', 'm.02lctm', 'm.03v0t', 'm.0vzm', 'm.0mmyl', 'm.01_77x6', 'm.04g4s8q', 'm.03q1lvy', 'm.0bx8pn', 'm.042g7t', 'm.01l1jx', 'm.0j729', 'm.01d9r3', 'm.01_d4', 'm.03q1lvh', 'm.06yxd', 'm.02fqwt', 'm.03q1lw4', 'm.0gs227r', 'm.0fv_t', 'm.059g4', 'm.06_7k', 'm.01_6v6w', 'm.07b_l', 'm.027hqc_', 'm.0w7m2x9', 'm.02hcv8', 'm.027wjl3', 'm.02lcqs', 'm.03q1lwl', 'm.02hczc', 'm.02z431v', 'm.02lcrv', 'm.02jw0z', 'm.027wj2_', 'm.03q1lvq', 'm.04g4s90', 'm.09nqf', 'm.0_hhjkj', 'm.01ky66', 'm.09sh942', 'm.01_6pxw', 'm.04g4s8w', 'm.0c9rw5'] | Ground Truth: ['m.027wj2_', 'm.02hcv8', 'm.02lctm', 'm.027wjl3', 'm.02lcqs', 'm.02fqwt', 'm.02hczc', 'm.042g7t', 'm.02lcrv']
Q2 | F1: 0.0000 | Predicted: ['m.0b28z', 'm.04g4s8k', 'm.02lctm', 'm.03v0t', 'm.0vzm', 'm.0mmyl', 'm.01_77x6', 'm.04g4s8q', 'm.03q1lvy', 'm.0bx8pn', 'm.042g7t', 'm.01l1jx', 'm.0j729', 'm.01d9r3', 'm.01_d4', 'm.03q1lvh', 'm.06yxd', 'm.02fqwt', 'm.03q1lw4', 'm.0gs227r', 'm.0fv_t', 