In [None]:
import datasets
import pandas as pd
from datasets import Dataset
from transformers import RobertaForSequenceClassification, RobertaTokenizerFast, Trainer, TrainingArguments
from datasets import load_dataset, DatasetDict
import torch
import os

In [None]:
def train_roberta():
    roberta_model = RobertaForSequenceClassification.from_pretrained("roberta-base").to("cuda")
    tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base")

    # training dataset

    train_df = pd.read_csv("/kaggle/input/llm-detect-ai-generated-text/train_essays.csv")
    train_dataset = Dataset.from_pandas(train_df)
    train_val_dataset = train_dataset.train_test_split(test_size=0.2)

    tokenized_dataset = train_val_dataset.map(lambda x: tokenizer(x["text"], padding="max_length", truncation=True), batched=True)

    tokenized_dataset = tokenized_dataset.remove_columns(["id","prompt_id","text"])

    tokenized_dataset = tokenized_dataset.rename_column("generated", "labels")

    training_args = TrainingArguments(
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        learning_rate=2e-5,
        num_train_epochs=1,
        weight_decay=0.01,
        output_dir='./results',
        logging_dir='./logs',
        logging_steps=10,
        report_to="none"
    )

    trainer = Trainer(
        model=roberta_model,                         # the instantiated 🤗 Transformers model to be trained
        tokenizer=tokenizer,                         # the instantiated 🤗 Transformers tokenizer to be trained
        args=training_args,                       # training arguments, defined above
        train_dataset=tokenized_dataset["train"],         # training dataset
        eval_dataset=tokenized_dataset["test"]             # evaluation dataset
    )

    trainer.train()
    
    trainer.evaluate()
    
    os.makedirs("/kaggle/working/finetuned_roberta/", exist_ok=True)
    
    trainer.save_model("/kaggle/working/finetuned_roberta/")
    tokenizer.save_pretrained("/kaggle/working/finetuned_roberta/")

In [None]:
train_roberta()