In [3]:
import requests
from openai import OpenAI
import pandas as pd
import re

In [4]:
import json
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim
import numpy as np

In [5]:
# 1. import testing Question dataset [Q]
with open("math_qa_all.json", "r") as qf:
    data = json.load(qf)
    page_content = [q['content'] for q in data]
    page_number = [q['page'] for q in data]
    questions: list = [q['Q'] for q in data]
    answers: list = [q['A'] for q in data]

In [22]:
data[0]

{'page': 19,
 'title': '0 Getting Started',
 'content': '<markdown>\n## Chapter 0\n\n# Getting started\n\nBefore we can start proving things, we need to eliminate certain kinds of statements that we might try to prove. Consider the following statement:\n\n*This sentence is false.*\n\nIs it true or false? If you think about this for a couple of seconds then you’ll get into a bit of a pickle.\n\nNow consider the following statement:\n\n*The happiest donkey in the world.*\n\nIs it true or false? Well it’s not even a sentence; it doesn’t make sense to even *ask* if it’s true or false!\n\nClearly we’ll be wasting our time trying to write proofs of statements like the two listed above—we need to narrow our scope to statements that we might actually have a chance of proving (or perhaps refuting)! This motivates the following (informal) definition.\n\n✦ **Definition 0.1**  \nA *proposition* is a statement to which it is possible to assign a *truth value* (‘true’ or ‘false’). If a proposition i

In [6]:
model_name = 'thenlper/gte-large'
model = SentenceTransformer(model_name)

In [7]:
page_embeddings: np.ndarray = model.encode(page_content) 
questions_embeddings: np.ndarray  = model.encode(questions)

In [8]:
for i in range(len(data)):
    data[i]['page_embedding'] = page_embeddings[i]
    data[i]['question_embedding'] = questions_embeddings[i]

In [9]:
for i in range(len(data)):
    similarity = [float(cos_sim(data[i]['question_embedding'], page_embed)) for page_embed in page_embeddings]
    question_retrived_page_rank = [{k: v} for k, v in zip(page_number, similarity)]
    data[i]['question_retrived_page_rank'] = question_retrived_page_rank

In [10]:
for i in range(len(data)):
    data[i]['question_retrived_page_rank'] = sorted(data[i]['question_retrived_page_rank'], key = lambda d: list(d.values())[0], reverse=True)

In [23]:
def calculate_f1(string1, string2):
    set1 = set(string1.split())
    set2 = set(string2.split())
    tp = len(set1 & set2)
    precision = tp / len(set2) if set2 else 0
    recall = tp / len(set1) if set1 else 0
    if precision + recall == 0:
        f1 = 0
    else:
        f1 = 2 * (precision * recall) / (precision + recall)
    return f1

In [26]:
Error_L = []
F1_L = []
Errors = 0
page_check = 0
tokens = 0
top_k = 3

for d in data:
    query = d['Q']
    answer = d['A']
    retrived_pages = ""
    retrived_indexes = d['question_retrived_page_rank'][:top_k]
    retrived_indexes = [list(retrived_indexes[u].keys())[0] for u in range(len(retrived_indexes))]
    
    for page_num in retrived_indexes:
        for d in data:
            if str(page_num) == str(d['page']):
                retrived_pages += d['content']
                break  # Stop checking other keys for this dictionary

    try:
        client = OpenAI(api_key='OPENAI_API_KEY_PLACEHOLDER')
        r = client.chat.completions.create(
            messages=[
                {
                "role": "system", "content": "You are a math expert",
                "content": "user", "content": "I am giving you a question and a text content. Provide a short answer for the question based on context I gave you. Say nothing else. Context:" + retrived_pages + ", Question:" + query
                }
            ],
            model="gpt-4o-mini",
        )
        res = r.choices[0].message.content
        f1 = calculate_f1(answers[i], res)
        print(f1)
        F1_L.append(f1)
        usage = r.usage
        tokens += usage.total_tokens
        print(tokens)
        
    except Exception as e:
        Errors += 1
        Error_L.append(i)
        print(e)

e
e
e


In [17]:
print("Total QAs:", len(questions))
print("Percentage of page match:", page_check / len(questions))
print("Errors Percentage:", Errors / len(questions))
print("Average F1-Score", sum(F1_L) / len(F1_L))
print("Tokens Completion",tokens)

Total QAs: 498
Percentage of page match: 0.0
Errors Percentage: 0.0


ZeroDivisionError: division by zero

In [15]:
# Example list of dictionaries
list_of_dicts = [{1: 90}, {2: 85}, {3: 95}, {4: 80}, {3: 88}]

# Unordered list of target keys
target_keys = [3, 1, 4]

# Find dictionaries that contain any of the target keys
matching_dicts = []
for key in target_keys:
    for d in list_of_dicts:
        if key in d:
            matching_dicts.append(d)
            break  # Stop checking other keys for this dictionary

print(matching_dicts)


[{3: 95}, {1: 90}, {4: 80}]
