<a href="https://colab.research.google.com/github/aidanamergembayeva/AutomatedAssessmentSystemWithAI/blob/main/model2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from transformers import Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset, DataLoader
import logging
import sys

In [17]:
def load_data(file_path):
    return pd.read_csv(file_path, sep='\t', encoding='ISO-8859-1')
# /content/asap-aes
train_data = load_data('/content/asap-aes/training_set_rel3.tsv')
valid_data = load_data('/content/asap-aes/valid_set.tsv')
test_data = load_data('/content/asap-aes/test_set.tsv')

In [18]:
print("Train Data Columns:", train_data.columns)
print("Validation Data Columns:", valid_data.columns)
print("Test Data Columns:", test_data.columns)

Train Data Columns: Index(['essay_id', 'essay_set', 'essay', 'rater1_domain1', 'rater2_domain1',
       'rater3_domain1', 'domain1_score', 'rater1_domain2', 'rater2_domain2',
       'domain2_score', 'rater1_trait1', 'rater1_trait2', 'rater1_trait3',
       'rater1_trait4', 'rater1_trait5', 'rater1_trait6', 'rater2_trait1',
       'rater2_trait2', 'rater2_trait3', 'rater2_trait4', 'rater2_trait5',
       'rater2_trait6', 'rater3_trait1', 'rater3_trait2', 'rater3_trait3',
       'rater3_trait4', 'rater3_trait5', 'rater3_trait6'],
      dtype='object')
Validation Data Columns: Index(['essay_id', 'essay_set', 'essay', 'domain1_predictionid',
       'domain2_predictionid'],
      dtype='object')
Test Data Columns: Index(['essay_id', 'essay_set', 'essay', 'domain1_predictionid',
       'domain2_predictionid'],
      dtype='object')


In [19]:
def preprocess_data(data, essay_set=None, is_train=True):
    if essay_set is not None:
        data = data[data['essay_set'] == essay_set]
    essays = data['essay'].tolist()
    if is_train:
        if 'domain1_score' in data.columns:
            scores = data['domain1_score'].astype(float).tolist()
        else:
            raise KeyError("Column 'domain1_score' not found in dataset.")
    else:
        scores = [0] * len(essays)
    return essays, scores


In [20]:
train_essays, train_scores = preprocess_data(train_data, essay_set=1)
valid_essays, valid_scores = preprocess_data(valid_data, essay_set=1, is_train=False)
test_essays, test_scores = preprocess_data(test_data, essay_set=1, is_train=False)


In [21]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

In [22]:
class EssayDataset(Dataset):
    def __init__(self, essays, scores, tokenizer, max_length=128):
        self.essays = essays
        self.scores = scores
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.essays)

    def __getitem__(self, idx):
        essay = str(self.essays[idx])
        score = self.scores[idx]

        encoding = self.tokenizer(
            essay,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': torch.tensor(score, dtype=torch.float).unsqueeze(0)  # Ensure correct shape
        }

In [23]:
train_dataset = EssayDataset(train_essays, train_scores, tokenizer)
valid_dataset = EssayDataset(valid_essays, valid_scores, tokenizer)
test_dataset = EssayDataset(test_essays, test_scores, tokenizer)

In [24]:
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=1)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [25]:
from sklearn.metrics import mean_squared_error

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions.squeeze()
    labels = labels.squeeze()
    rmse = np.sqrt(mean_squared_error(labels, predictions))
    accuracy = ((predictions.round() == labels).sum() / len(labels)).item()
    return {"rmse": rmse, "accuracy": accuracy}

In [26]:
os.environ["WANDB_DISABLED"] = "true"

training_args = TrainingArguments(
    output_dir="./results",
    run_name="essay_scoring_experiment",
    eval_strategy="steps",
    eval_steps=200,
    learning_rate=3e-5,
    gradient_accumulation_steps=2,
    per_device_train_batch_size=16,  # Halve the training batch size
    per_device_eval_batch_size=16,   # Halve the evaluation batch size
    num_train_epochs=3,
    warmup_steps=300,
    weight_decay=0.01,
    logging_dir='./logs',
    save_strategy="steps",
    save_steps=200,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    logging_steps=100,
    disable_tqdm=False,
    fp16=False
)


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [27]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    processing_class=tokenizer,
    compute_metrics=compute_metrics
)

In [28]:
trainer.train()

Step,Training Loss,Validation Loss


Step,Training Loss,Validation Loss


TrainOutput(global_step=168, training_loss=30.832377115885418, metrics={'train_runtime': 7865.5186, 'train_samples_per_second': 0.68, 'train_steps_per_second': 0.021, 'total_flos': 351842099703552.0, 'train_loss': 30.832377115885418, 'epoch': 3.0})

In [29]:
model.save_pretrained("./saved_model")
tokenizer.save_pretrained("./saved_model")

('./saved_model/tokenizer_config.json',
 './saved_model/special_tokens_map.json',
 './saved_model/vocab.json',
 './saved_model/merges.txt',
 './saved_model/added_tokens.json')

In [33]:
results = trainer.evaluate()
print("Validation RMSE:", results['eval_rmse'])


Validation RMSE: 8.801878929138184


In [31]:
test_loader = DataLoader(test_dataset, batch_size=8)
predictions = []
model.eval()
with torch.no_grad():
    for batch in test_loader:
        inputs = {
            'input_ids': batch['input_ids'].to(model.device),
            'attention_mask': batch['attention_mask'].to(model.device)
        }
        outputs = model(**inputs)
        predictions.extend(outputs.logits.squeeze().cpu().numpy())

In [32]:
predictions = np.array(predictions)
print("Test RMSE:", np.sqrt(mean_squared_error(test_scores, predictions)))
np.savetxt("test_predictions.csv", predictions, delimiter=",")

Test RMSE: 8.811080325241443
