# Question Answering using GPT

## Part 1: Adding a Context and Answers

In [46]:
def find_start_char(context, answer_text):
    
    # Find the start character index of the answer_text in the context.
    # Returns -1 if the answer_text is not found.
    
    return context.find(answer_text)

context = (
    "A planetary coordinate system (also referred to as planetographic, planetodetic, "
    "or planetocentric) is a generalization of the geographic, geodetic, and the "
    "geocentric coordinate systems for planets other than Earth. Similar coordinate systems "
    "are defined for other solid celestial bodies, such as in the selenographic coordinates "
    "for the Moon. The coordinate systems for almost all of the solid bodies in the Solar "
    "System were established by Merton E. Davies of the Rand Corporation, including Mercury, "
    "Venus, Mars, the four Galilean moons of Jupiter, and Triton, the largest moon of Neptune. "
    "A planetary datum is a generalization of geodetic datums for other planetary bodies, such "
    "as the Mars datum; it requires the specification of physical reference points or surfaces "
    "with fixed coordinates, such as a specific crater for the reference meridian or the best-"
    "fitting equigeopotential as zero-level surface."
)

answers = [
    "A planetary coordinate system (also referred to as planetographic, planetodetic, or planetocentric) is a generalization of the geographic, geodetic, and the geocentric coordinate systems for planets other than Earth.",
    "planetographic, planetodetic, or planetocentric",
    "selenographic coordinates",
    "Merton E. Davies of the Rand Corporation",
    "A planetary datum is a generalization of geodetic datums for other planetary bodies, such as the Mars datum; it requires the specification of physical reference points or surfaces with fixed coordinates, such as a specific crater for the reference meridian or the best-fitting equigeopotential as zero-level surface."
]

for answer in answers:
    start_char = find_start_char(context, answer)
    if start_char == -1:
        print(f"Answer not found in context: {answer}")
    else:
        print(f"Answer: {answer}\nStart Char: {start_char}\n")


Answer: A planetary coordinate system (also referred to as planetographic, planetodetic, or planetocentric) is a generalization of the geographic, geodetic, and the geocentric coordinate systems for planets other than Earth.
Start Char: 0

Answer: planetographic, planetodetic, or planetocentric
Start Char: 51

Answer: selenographic coordinates
Start Char: 305

Answer: Merton E. Davies of the Rand Corporation
Start Char: 443

Answer: A planetary datum is a generalization of geodetic datums for other planetary bodies, such as the Mars datum; it requires the specification of physical reference points or surfaces with fixed coordinates, such as a specific crater for the reference meridian or the best-fitting equigeopotential as zero-level surface.
Start Char: 594



In [47]:
import json

data = {
    "context": "A planetary coordinate system (also referred to as planetographic, planetodetic, or planetocentric) is a generalization of the geographic, geodetic, and the geocentric coordinate systems for planets other than Earth. Similar coordinate systems are defined for other solid celestial bodies, such as in the selenographic coordinates for the Moon. The coordinate systems for almost all of the solid bodies in the Solar System were established by Merton E. Davies of the Rand Corporation, including Mercury, Venus, Mars, the four Galilean moons of Jupiter, and Triton, the largest moon of Neptune. A planetary datum is a generalization of geodetic datums for other planetary bodies, such as the Mars datum; it requires the specification of physical reference points or surfaces with fixed coordinates, such as a specific crater for the reference meridian or the best-fitting equigeopotential as zero-level surface.",
    "qas": [

        # Answerable questions
        {
            "question": "What is a planetary coordinate system?",
            "answers": [{"text": "A planetary coordinate system (also referred to as planetographic, planetodetic, or planetocentric) is a generalization of the geographic, geodetic, and the geocentric coordinate systems for planets other than Earth.", "start_char": 0}],
            "is_impossible": False
        },
        {
            "question": "What are other names for a planetary coordinate system?",
            "answers": [{"text": "Planetographic, planetodetic, or planetocentric", "start_char": 51}],
            "is_impossible": False
        },
        {
            "question": "What is a coordinate system for the Moon called?",
            "answers": [{"text": "Selenographic coordinates", "start_char": 305}],
            "is_impossible": False
        },
        {
            "question": "Who established the coordinate systems for almost all of the solid bodies in the Solar System?",
            "answers": [{"text": "Merton E. Davies of the Rand Corporation", "start_char": 443}],
            "is_impossible": False
        },
        {
            "question": "What is a planetary datum?",
            "answers": [{"text": "A planetary datum is a generalization of geodetic datums for other planetary bodies, such as the Mars datum; it requires the specification of physical reference points or surfaces with fixed coordinates, such as a specific crater for the reference meridian or the best-fitting equigeopotential as zero-level surface.", "start_char": 594}],
            "is_impossible": False
        },

        # Unanswerable questions
        {
            "question": "What is a celestial body?",
            "answers": [],
            "is_impossible": True
        },
        {
            "question": "Where was Merton E. Davies born?",
            "answers": [],
            "is_impossible": True
        },
        {
            "question": "What is the coordinate system for Saturn called?",
            "answers": [],
            "is_impossible": True
        },
        {
            "question": "What is the largest moon of Jupiter?",
            "answers": [],
            "is_impossible": True
        },
        {
            "question": "How many moons does Neptune have?",
            "answers": [],
            "is_impossible": True
        }
    ]
}

with open('squad_questions.json', 'w') as f:
    json.dump(data, f, indent=4)

## Part 2: Prompt GPT to Answer Questions

In [3]:
# Loading the Secret API key
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
responses = []

In [42]:
import openai

with open("squad_questions.json") as f:
    squad_data = json.load(f)

responses = []
for q in squad_data["qas"]:
    prompt = f"""
    Context: {squad_data['context']}

    Question: {q['question']}

    Please provide the exact answer using **only** the sentences from the provided context. 
    If the answer is just a phrase in the context, provide just that phrase. 
    Answer exactly what is asked for.
    Don't include any exta information outside the context.
    **If the answer is not present in the context, respond with "No Answer".**
    
    """

    response = openai.ChatCompletion.create(
        model="gpt-4o-mini", 
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=100,
    )
    
    responses.append({
        "question": q["question"],
        "answer": response["choices"][0]["message"]["content"].strip()
    })

with open("responses.json", "w") as f:
    json.dump(responses, f, indent=4)

## Part 3: Exact Match (EM) and F1-Score Evaluation of Responses

In [48]:
with open("squad_questions.json") as f:
    squad_data = json.load(f)

ground_truth = []

for q in squad_data["qas"]:
    if q.get("answers"):
        correct_answer = q["answers"][0].get("text", "")
    else:
        correct_answer = "No Answer"  

    ground_truth.append({
        "question": q["question"],
        "answer": correct_answer
    })

with open("ground_truth.json", "w") as f:
    json.dump(ground_truth, f, indent=4)

In [49]:
import string
from sklearn.metrics import precision_score, recall_score, f1_score

# Removing casing and punctuation from the answers
def clean_answer(answer):
    return answer.translate(str.maketrans("", "", string.punctuation)).strip().lower()

with open("responses.json") as f:
    predicted_responses = json.load(f)

with open("ground_truth.json") as f:
    ground_truth = json.load(f)

exact_matches = 0
predictions = []
true_labels = []

non_matching_answers = []

for predicted, true in zip(predicted_responses, ground_truth):
    predicted_answer = clean_answer(predicted["answer"])  
    true_answer = clean_answer(true["answer"])  

    if predicted_answer == true_answer:
        exact_matches += 1
    else:
        non_matching_answers.append({
            "question": predicted["question"],
            "predicted_answer": predicted_answer,
            "true_answer": true_answer
        })

    predictions.append(predicted_answer)
    true_labels.append(true_answer)

exact_match_score = exact_matches / len(predicted_responses)

precision = precision_score(true_labels, predictions, average='weighted', zero_division=0)
recall = recall_score(true_labels, predictions, average='weighted', zero_division=0)
f1 = f1_score(true_labels, predictions, average='weighted', zero_division=0)

print(f"Exact Match Score: {exact_match_score:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Exact Match Score: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000


In [50]:
print("\nNon-Matching Answers:")
for item in non_matching_answers:
    print(f"Question: {item['question']}")
    print(f"Predicted Answer: {item['predicted_answer']}")
    print(f"True Answer: {item['true_answer']}")
    print("-" * 50)


Non-Matching Answers:


In [56]:
import sacrebleu
import re

with open("responses.json") as f:
    predicted_responses = json.load(f)

with open("ground_truth.json") as f:
    ground_truth = json.load(f)

# Using regular expression to tokenize the sentences, since there was an issue with NLTK
def simple_tokenize(text):
    return re.findall(r'\b\w+\b', text)

tokenized_ground_truth = [simple_tokenize(true["answer"]) for true in ground_truth]
tokenized_predictions = [simple_tokenize(predicted["answer"]) for predicted in predicted_responses]

references = [" ".join(true) for true in tokenized_ground_truth]
hypotheses = [" ".join(predicted) for predicted in tokenized_predictions]

bleu_score = sacrebleu.corpus_bleu(hypotheses, [references])

print(f"BLEU score: {bleu_score.score:.4f}")

BLEU score: 98.3213


In [54]:
tokenized_ground_truth

[['A',
  'planetary',
  'coordinate',
  'system',
  'also',
  'referred',
  'to',
  'as',
  'planetographic',
  'planetodetic',
  'or',
  'planetocentric',
  'is',
  'a',
  'generalization',
  'of',
  'the',
  'geographic',
  'geodetic',
  'and',
  'the',
  'geocentric',
  'coordinate',
  'systems',
  'for',
  'planets',
  'other',
  'than',
  'Earth'],
 ['Planetographic', 'planetodetic', 'or', 'planetocentric'],
 ['Selenographic', 'coordinates'],
 ['Merton', 'E', 'Davies', 'of', 'the', 'Rand', 'Corporation'],
 ['A',
  'planetary',
  'datum',
  'is',
  'a',
  'generalization',
  'of',
  'geodetic',
  'datums',
  'for',
  'other',
  'planetary',
  'bodies',
  'such',
  'as',
  'the',
  'Mars',
  'datum',
  'it',
  'requires',
  'the',
  'specification',
  'of',
  'physical',
  'reference',
  'points',
  'or',
  'surfaces',
  'with',
  'fixed',
  'coordinates',
  'such',
  'as',
  'a',
  'specific',
  'crater',
  'for',
  'the',
  'reference',
  'meridian',
  'or',
  'the',
  'best',
  '