In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
from transformers import RobertaTokenizer, RobertaForSequenceClassification
import pandas as pd
import math
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tqdm import tqdm
import os
import itertools

# Custom dataset class
class TextValueDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length):
        self.dataframe = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        text = row['generated_text']
        label = row['suddenness'] - 1  # Convert value to 0-4 classes
        
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt',
            return_attention_mask=True
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Load dataset
def load_dataset(file_path):
    df = pd.read_csv(file_path)
    return df

# Create DataLoader
def create_dataloader(df, tokenizer, max_length, batch_size):
    dataset = TextValueDataset(df, tokenizer, max_length)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Model setup
def create_model(num_labels):
    model = RobertaForSequenceClassification.from_pretrained(
        'roberta-base',
        num_labels=num_labels
    )
    return model

# Evaluation function
def evaluate(model, dataloader, device):
    model.eval()
    predictions, true_labels = [], []
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating", leave=False):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='weighted')
    
    return accuracy, precision, recall, f1

# Training function
def train(model, train_loader, val_loader, test_loader, epochs, device, lr, save_path):
    optimizer = AdamW(model.parameters(), lr=lr)
    model = model.to(device)

    max_val_accuracy = 0.0
    
    for epoch in range(epochs):
        print(f'Epoch {epoch + 1}/{epochs}')
        
        # Training
        model.train()
        train_loss = 0
        train_loop = tqdm(train_loader, desc="Training")
        
        for batch in train_loop:
            optimizer.zero_grad()
            
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )
            
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_loop.set_postfix(loss=loss.item())

        avg_train_loss = train_loss / len(train_loader)
        print(f'Average Training Loss: {avg_train_loss:.4f}')

        # Validation after each epoch
        val_accuracy, val_precision, val_recall, val_f1 = evaluate(model, val_loader, device)
        print(f'Validation Accuracy: {val_accuracy:.4f}, Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, F1: {val_f1:.4f}')

        # Save model after each epoch
        if val_accuracy >= max_val_accuracy:
            model_save_path = os.path.join(save_path, f'suddenness.pt')
            torch.save(model.state_dict(), model_save_path)
            print(f'Model saved to {model_save_path}')
            max_val_accuracy = val_accuracy
        else:
            print(f'Model not saved due to lower validation accuracy')

    # Testing after each epoch
    test_accuracy, test_precision, test_recall, test_f1 = evaluate(model, test_loader, device)
    print(f'Test Accuracy: {test_accuracy:.4f}, Precision: {test_precision:.4f}, Recall: {test_recall:.4f}, F1: {test_f1:.4f}')
        

# Main function
def main():
    # Configurations
    max_length = 128
    batch_size = 8
    epochs = 50
    num_labels = 5
    lr = 2e-6
    save_path = './models'  # Directory to save models
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Make sure the save directory exists
    os.makedirs(save_path, exist_ok=True)

    # Load datasets
    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    train_df = load_dataset('data/train.csv')
    val_df = load_dataset('data/val.csv')
    test_df = load_dataset('data/test.csv')

    # Create DataLoaders
    train_loader = create_dataloader(train_df, tokenizer, max_length, batch_size)
    val_loader = create_dataloader(val_df, tokenizer, max_length, batch_size)
    test_loader = create_dataloader(test_df, tokenizer, max_length, batch_size)

    # Initialize model
    model = create_model(num_labels)

    # Train and evaluate the model
    train(model, train_loader, val_loader, test_loader, epochs, device, lr, save_path)

if __name__ == '__main__':
    main()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/50


Training: 100%|████████████████████| 653/653 [01:06<00:00,  9.88it/s, loss=1.68]


Average Training Loss: 1.5993


  _warn_prf(average, modifier, msg_start, len(result))


Validation Accuracy: 0.3352, Precision: 0.2014, Recall: 0.3352, F1: 0.2173
Model saved to ./models/suddenness.pt
Epoch 2/50


Training: 100%|███████████████████| 653/653 [01:04<00:00, 10.15it/s, loss=0.671]


Average Training Loss: 1.4614


                                                                                

Validation Accuracy: 0.4259, Precision: 0.3435, Recall: 0.4259, F1: 0.3667
Model saved to ./models/suddenness.pt
Epoch 3/50


Training: 100%|████████████████████| 653/653 [01:05<00:00, 10.00it/s, loss=1.29]


Average Training Loss: 1.3578


                                                                                

Validation Accuracy: 0.4111, Precision: 0.3328, Recall: 0.4111, F1: 0.3600
Model not saved due to lower validation accuracy
Epoch 4/50


Training: 100%|████████████████████| 653/653 [01:05<00:00,  9.99it/s, loss=1.03]


Average Training Loss: 1.3063


                                                                                

Validation Accuracy: 0.4167, Precision: 0.3484, Recall: 0.4167, F1: 0.3642
Model not saved due to lower validation accuracy
Epoch 5/50


Training:  67%|█████████████▍      | 437/653 [00:43<00:22,  9.50it/s, loss=1.62]