Validation Part

In [19]:
import pandas as pd
from openai import OpenAI
from sklearn.metrics import mean_squared_error
import numpy as np
client = OpenAI()


In [None]:
# Load the dataset
file_path = './dataset/processed/validation.csv'
data = pd.read_csv(file_path)

# Replace '<4' with an approximate numeric value (e.g., 3.5) for MSE calculation
data['band'] = data['band'].apply(lambda x: 3.5 if x == '<4' else float(x))

# Display the first few rows of the dataset
data.head()

Unnamed: 0,prompt,essay,band
0,In some countries more and more people are bec...,"In recent years, many people are concerned to ...",8.5
1,"In some countries, more and more people are be...","In the modern world, many people have increasi...",6.0
2,The best way to solve the world's environmenta...,"In recent years, there has been a debate on wh...",8.5
3,"In some countries, more and more people are be...",Houses are the place that people live which is...,4.0
4,Some people believe that eventually all jobs w...,While it is true that in many nations of peopl...,8.0


In [23]:
# Function to predict IELTS band score using GPT model with structured API call
def predict_band_score(prompt, essay):
    # Prepare the chat completion request
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are an IELTS examiner who scores essays based on writing standards. Output overall band score float number only"},
            {"role": "user", "content": f"{prompt}\nEssay: {essay}"}
        ]
    )
    
    # Extract the prediction from the response
    result = response.choices[0].message.content.strip()
    
    try:
        predicted_score = float(result)
        if predicted_score < 4.0:
            return 3.5  # Convert low scores to 3.5 for comparison
        return predicted_score
    except ValueError:
        print("Prediction error: unable to convert output to float.")
        return None

In [25]:
# Calculate MSE and accuracy
def evaluate_model(data):
    true_scores = []
    predicted_scores = []
    
    for _, row in data.iterrows():
        prompt, essay, true_score = row['prompt'], row['essay'], row['band']
        predicted_score = predict_band_score(prompt, essay)
        
        if predicted_score is not None:
            true_scores.append(true_score)
            predicted_scores.append(predicted_score)

    # Calculate MSE and accuracy
    mse = mean_squared_error(true_scores, predicted_scores)
    accuracy = sum([1 if ((t == p) or (t+0.5 == p) or (t-0.5 == p)) else 0 for t, p in zip(true_scores, predicted_scores)]) / len(true_scores)
    return mse, accuracy

# Run evaluation
mse, accuracy = evaluate_model(data)
print(f"Mean Squared Error: {mse}")
print(f"Accuracy (within 0.5 band): {accuracy * 100:.2f}%")

Mean Squared Error: 2.8625
Accuracy (within 0.5 band): 31.67%


User Input Part

In [None]:
# Take user input for a new prediction
user_prompt = input("Enter the task prompt: ")
user_essay = input("Enter the essay: ")

# Predict band score based on user input
predicted_band = predict_band_score(user_prompt, user_essay, model="gpt-4o-mini")

if predicted_band is not None:
    if predicted_band == 3.5:
        print("Predicted IELTS band score: <4")
    else:
        print(f"Predicted IELTS band score: {predicted_band}")
else:
    print("Prediction error occurred.")