In [None]:
import pandas as pd 
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

In [None]:
test_data = pd.read_csv('/kaggle/input/learning-agency-lab-automated-essay-scoring-2/test.csv')
test_data

In [None]:
tokenizer = AutoTokenizer.from_pretrained("/kaggle/input/trained-deberta-large")
model = AutoModelForSequenceClassification.from_pretrained("/kaggle/input/trained-deberta-large")

In [None]:
# Tokenize the test data
test_encodings = tokenizer(test_data['full_text'].tolist(), truncation=True, padding=True, max_length=1024)

In [None]:
# Create the test dataset
test_dataset = Dataset.from_dict(test_encodings)
test_dataset = test_dataset.add_column("essay_id", test_data['essay_id'].tolist())

In [None]:
predict_args = TrainingArguments(
    ".",
    per_device_eval_batch_size=4,
    report_to="none"
)

In [None]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=predict_args,
    tokenizer=tokenizer
)

In [None]:
# Make predictions
predictions = trainer.predict(test_dataset)

In [None]:
predicted_labels = torch.argmax(torch.tensor(predictions.predictions), dim=-1).numpy()

In [None]:
submission = pd.DataFrame({
    'essay_id': test_data['essay_id'],
    'score': predicted_labels.astype('int32')
})

# Save to CSV
submission.to_csv('submission.csv', index=False)

In [None]:
submission