In [None]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset
from transformers import AdamW
import torch.nn as nn
from transformers import AutoTokenizer, Trainer, TrainingArguments
from transformers import AutoModelForSequenceClassification, DataCollatorWithPadding
from sklearn.metrics import cohen_kappa_score, classification_report, confusion_matrix, f1_score

In [None]:
test_data = pd.read_csv('/kaggle/input/learning-agency-lab-automated-essay-scoring-2/test.csv')

In [None]:
tokenizer_path = "/kaggle/working/trained_deberta_tokenizer"
model_path = "/kaggle/working/trained_deberta_model"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, local_files_only=True)
model = AutoModelForSequenceClassification.from_pretrained(model_path, local_files_only=True)
model.to(device)

In [None]:
# Tokenize and encode the test data
test_encodings = tokenizer(list(test_data['full_text']), truncation=True, padding=True, max_length=512, return_tensors='pt')

# Create DataLoader for test set
test_dataset = TensorDataset(test_encodings['input_ids'], test_encodings['attention_mask'])
test_loader = DataLoader(test_dataset, batch_size=8)

In [None]:
# Evaluation on the test set
model.eval()
test_preds = []

with torch.no_grad():
    for batch in test_loader:
        input_ids, attention_mask = batch
        input_ids = input_ids.to(device, dtype=torch.long)  # Ensure inputs are of type torch.long
        attention_mask = attention_mask.to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        preds = logits.argmax(dim=1).cpu().numpy()
        test_preds.extend(preds)

# Adjust the predictions back to the original score range (1-6)
test_preds = [pred + 1 for pred in test_preds]

In [None]:
# Save the predictions in the required format
submission_df = pd.DataFrame({'essay_id': test_data['essay_id'], 'score': test_preds})
submission_df.to_csv('submission.csv', index=False)