In [49]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

import os
import tempfile
import uuid
import pandas as pd
import re
from dotenv import load_dotenv
from tqdm import tqdm

In [50]:
def get_embeddings(OPENAI_API_KEY):
    embeddings = OpenAIEmbeddings( model= "text-embedding-ada-002", openai_api_key = OPENAI_API_KEY)
    return embeddings

def get_prompt(question:str,answers:list, OPENAI_API_KEY):
    db = Chroma(persist_directory="code/vectorstore_chroma", embedding_function=get_embeddings(OPENAI_API_KEY))
    retriever = db.as_retriever(seach_type="similarity")
    answers = answers
    relevant_chunks = retriever.invoke(f"{question}")
    """
    Formats the prompt correctly with retrieved context and multiple-choice options.
    """
    relevant_chunks = retriever.invoke(f"{question}")  # Retrieve context

    PROMPT_TEMPLATE = """  
    You are an assistant for multiple-choice question-answering tasks.  
    Use the following pieces of retrieved context to answer  
    the question. If you don't know the answer, say that you  
    don't know. DON'T MAKE UP ANYTHING.  

    {context}  

    ---  
    Answer the question by exclusively giving the letter of the correct answer based on the above context:  
    {question}  
    A) {answer_a}  
    B) {answer_b}  
    C) {answer_c}  
    D) {answer_d}  
    """
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    context_text = "\n\n---\n\n".join([doc.page_content for doc in relevant_chunks])

    # Properly format the template
    prompt = prompt_template.format(
        context=context_text,  
        question=question,  
        answer_a=answers[0],  
        answer_b=answers[1],  
        answer_c=answers[2],  
        answer_d=answers[3]  
    )

    return prompt,relevant_chunks


def get_response(question:str,answers:list,OPENAI_API_KEY):
    prompt,relevant_chunks = get_prompt(question,answers, OPENAI_API_KEY)
    llm = ChatOpenAI(model="gpt-4o-mini", api_key=OPENAI_API_KEY)
    return llm.invoke(prompt).content,relevant_chunks


def process_relevant_chunks(relevant_chunks):
    data = []
    
    for doc in relevant_chunks:
        metadata = doc.metadata
        data.append({
            "document_name": metadata.get("source", "").split("/")[-1],  # Extract filename
            "page": metadata.get("page", ""),  # Page number
            "content": doc.page_content.strip()  # Clean text content
        })
    
    return pd.DataFrame(data)

In [51]:
load_dotenv()

True

In [52]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

In [62]:
exam = pd.read_csv("exam.csv")

In [38]:
gpt_responses = []
for _, row in tqdm(exam.iterrows(), total=len(exam)):
    question = row["Question"]
    answers = [row["Option A"], row["Option B"], row["Option C"], row["Option D"]]
    response = get_response(question,answers, OPENAI_API_KEY)
    gpt_responses.append(response)

# Add responses to DataFrame and save
exam["GPT_response"] = gpt_responses
exam.to_csv("fifa_exam_with_gpt.csv", index=False)


100%|██████████| 20/20 [00:38<00:00,  1.94s/it]


In [39]:
exam

Unnamed: 0,Question,Option A,Option B,Option C,Option D,Correct Answer,GPT_response
0,A player deliberately kicks the ball towards t...,Play on,Indirect free kick for the opposing team,Direct free kick for the opposing team,Penalty kick,B,"(B, [])"
1,A substitute enters the field of play without ...,"Stop play, caution the substitute, and restart...",Allow play to continue and warn the substitute...,"Stop play, caution the substitute, and restart...","Stop play, send off the substitute, and restar...",C,"(C, [])"
2,In which of the following situations is a drop...,The ball strikes the referee and remains in play,The referee stops play due to an injury not ca...,"A goalkeeper releases the ball, then picks it ...",A throw-in is taken incorrectly,B,"(B, [])"
3,A player is in an offside position when their ...,Offside offence,Play continues,Indirect free kick for the defending team,Drop ball,B,"(B, [])"
4,A player in an offside position runs towards t...,Goal stands,Offside offence,Indirect free kick to the defending team,Retake the shot,B,"(B, [])"
5,"A player is fouled inside the penalty area, bu...",Award the penalty kick,Allow play to continue,Stop play and give a free kick from where the ...,Give a goal kick,B,"(B, [])"
6,Which of the following is considered serious f...,A reckless challenge with no contact,A challenge that endangers the safety of an op...,A late challenge with minor contact,Deliberate handling of the ball outside the pe...,B,"(B, [])"
7,A player deliberately prevents a goal by handl...,Yellow card and penalty kick,Red card and penalty kick,Only a penalty kick,Indirect free kick,B,"(B, [])"
8,A defender attempts to play the ball but accid...,Award a penalty kick,Play continues,Indirect free kick for the attacking team,Yellow card for simulation,B,"(B, [])"
9,"A player, after a goal is scored, makes an off...",Ignore the action if it is not excessive,Caution the player,Send the player off,Restart with a dropped ball,C,"(B, [])"


## PYDANTIC EVALUATION

In [None]:
from pydantic import BaseModel, ValidationError

class GPTResponse(BaseModel):
    answer: str = Field(description = "Answer letter (A, B, C, or D) selected by GPT")
    explanation: str = Field(description = "The process of thinking followed by GPT to arrive at the answer")


In [55]:
def get_prompt(question:str,answers:list, OPENAI_API_KEY):
    db = Chroma(persist_directory="code/vectorstore_chroma", embedding_function=get_embeddings(OPENAI_API_KEY))
    retriever = db.as_retriever(seach_type="similarity")
    answers = answers
    relevant_chunks = retriever.invoke(f"{question}")
    """
    Formats the prompt correctly with retrieved context and multiple-choice options.
    """
    relevant_chunks = retriever.invoke(f"{question}")  # Retrieve context

    PROMPT_TEMPLATE = """  
    You are an assistant for multiple-choice question-answering tasks.  
    Use the following pieces of retrieved context to answer  
    the question. If you don't know the answer, say that you  
    don't know. DON'T MAKE UP ANYTHING.  

    {context}  

    ---  
    Answer the question by exclusively giving the letter of the correct answer based on the above context:  
    {question}  
    A) {answer_a}  
    B) {answer_b}  
    C) {answer_c}  
    D) {answer_d} 

    Please return your answer in the following format: 
    Answer: <A/B/C/D>
    Explanation: <short explanation based on the context>
    """

    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    context_text = "\n\n---\n\n".join([doc.page_content for doc in relevant_chunks])

    # Properly format the template
    inputs = {
        "context": context_text,
        "question": question,
        "answer_a": answers[0],
        "answer_b": answers[1],
        "answer_c": answers[2],
        "answer_d": answers[3],
    }

    return prompt_template,inputs



In [56]:
def get_response(question:str,answers:list,OPENAI_API_KEY):
    prompt_template,inputs = get_prompt(question,answers, OPENAI_API_KEY)

    llm = ChatOpenAI(model="gpt-4o-mini", api_key=OPENAI_API_KEY)
    rag_chain =  prompt_template  | llm.with_structured_output(GPTResponse, strict=True)
    
    return rag_chain.invoke(inputs)



In [57]:
from tqdm import tqdm

gpt_responses = []
answers_list = []
explanations_list = []

for _, row in tqdm(exam.iterrows(), total=len(exam)):
    question = row["Question"]
    answers = [row["Option A"], row["Option B"], row["Option C"], row["Option D"]]
    
    response = get_response(question, answers, OPENAI_API_KEY)  # returns GPTResponse instance

    try:
        response_dict = response.dict()  # convert Pydantic model to dict
        answers_list.append(response_dict["answer"])
        explanations_list.append(response_dict["explanation"])
    except ValidationError as e:
        print(f"Validation error on question: {question}")
        print(e)
        answers_list.append("N/A")
        explanations_list.append("Invalid response format.")
# Process the relevant chunks and save them
# Finally add to dataframe and save
exam["GPT_answer"] = answers_list
exam["GPT_explanation"] = explanations_list

# Save CSV
exam.to_csv("fifa_exam_with_gpt.csv", index=False)

  0%|          | 0/20 [00:00<?, ?it/s]C:\Users\carlo\AppData\Local\Temp\ipykernel_48976\420847059.py:14: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  response_dict = response.dict()  # convert Pydantic model to dict
  5%|▌         | 1/20 [00:02<00:48,  2.54s/it]C:\Users\carlo\AppData\Local\Temp\ipykernel_48976\420847059.py:14: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  response_dict = response.dict()  # convert Pydantic model to dict
 10%|█         | 2/20 [00:05<00:45,  2.50s/it]C:\Users\carlo\AppData\Local\Temp\ipykernel_48976\420847059.py:14: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in P

In [58]:
exam

Unnamed: 0,Question,Option A,Option B,Option C,Option D,Correct Answer,GPT_answer,GPT_explanation
0,A player deliberately kicks the ball towards t...,Play on,Indirect free kick for the opposing team,Direct free kick for the opposing team,Penalty kick,B,B,When a player deliberately plays the ball to t...
1,A substitute enters the field of play without ...,"Stop play, caution the substitute, and restart...",Allow play to continue and warn the substitute...,"Stop play, caution the substitute, and restart...","Stop play, send off the substitute, and restar...",C,C,The correct procedure when a substitute enters...
2,In which of the following situations is a drop...,The ball strikes the referee and remains in play,The referee stops play due to an injury not ca...,"A goalkeeper releases the ball, then picks it ...",A throw-in is taken incorrectly,B,B,A dropped ball is required to restart play whe...
3,A player is in an offside position when their ...,Offside offence,Play continues,Indirect free kick for the defending team,Drop ball,B,B,The player in an offside position did not inte...
4,A player in an offside position runs towards t...,Goal stands,Offside offence,Indirect free kick to the defending team,Retake the shot,B,A,The player in an offside position was not invo...
5,"A player is fouled inside the penalty area, bu...",Award the penalty kick,Allow play to continue,Stop play and give a free kick from where the ...,Give a goal kick,B,B,If the referee applies the advantage and play ...
6,Which of the following is considered serious f...,A reckless challenge with no contact,A challenge that endangers the safety of an op...,A late challenge with minor contact,Deliberate handling of the ball outside the pe...,B,B,A challenge that endangers the safety of an op...
7,A player deliberately prevents a goal by handl...,Yellow card and penalty kick,Red card and penalty kick,Only a penalty kick,Indirect free kick,B,B,The correct disciplinary action for a player d...
8,A defender attempts to play the ball but accid...,Award a penalty kick,Play continues,Indirect free kick for the attacking team,Yellow card for simulation,B,B,"If no clear foul is evident, the referee shoul..."
9,"A player, after a goal is scored, makes an off...",Ignore the action if it is not excessive,Caution the player,Send the player off,Restart with a dropped ball,C,B,The referee should caution the player for maki...


In [59]:
import pandas as pd

def evaluate_gpt_answers(df, correct_col="Correct Answer", gpt_col="GPT_answer"):
    """
    Evaluate GPT answers against correct answers in a DataFrame.

    Args:
        df (pd.DataFrame): DataFrame containing at least the correct answers and GPT answers.
        correct_col (str): Column name for correct answers.
        gpt_col (str): Column name for GPT answers.

    Returns:
        pd.DataFrame: Original DataFrame with two new columns:
            - 'Is_Correct' (bool): True if GPT answer matches the correct answer, else False.
            - 'Score' (int): 1 if correct, else 0.
        float: Accuracy score (percentage of correct answers).
    """
    df = df.copy()
    df['Is_Correct'] = df[correct_col].str.strip().str.upper() == df[gpt_col].str.strip().str.upper()
    df['Score'] = df['Is_Correct'].astype(int)
    
    accuracy = df['Score'].mean() * 100  # percentage correct
    
    return df, accuracy


In [60]:
evaluated_df, accuracy = evaluate_gpt_answers(exam)
print(f"GPT Accuracy: {accuracy:.2f}%")

# Optionally, save the results
evaluated_df.to_csv("fifa_exam_evaluated.csv", index=False)


GPT Accuracy: 85.00%


In [63]:
exam_unbiased = exam.drop(columns=["Correct Answer"])
exam_unbiased.to_csv("exam_unbiased.csv", index=False)