In [3]:
import torch
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification, AdamW
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import pandas as pd

from torch.utils.data import TensorDataset, DataLoader

from tqdm import tqdm

from scipy.stats import pearsonr


# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Load the tokenizer and model
tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')
model = XLMRobertaForSequenceClassification.from_pretrained('xlm-roberta-base', num_labels=1).to(device)

# Load the dataset
df = pd.read_csv('t_data/final_data_some_lang.tsv',sep='\t')

df_v = pd.read_csv('t_data/test_data_some_lang.tsv',sep='\t')


# Tokenize the essay column and convert to tensors
train_inputs = tokenizer(df['essay'].tolist(), padding=True, truncation=True, max_length=512, return_tensors='pt').to(device)
train_labels = torch.tensor(df['empathy'].tolist(), dtype=torch.float32).unsqueeze(1).to(device)
print(f'Train input size: {train_inputs["input_ids"].size()}, Train label size: {train_labels.size()}')

val_inputs = tokenizer(df_v['essay'].tolist(), padding=True, truncation=True, max_length=512, return_tensors='pt').to(device)
val_labels = torch.tensor(df_v['empathy'].tolist(), dtype=torch.float32).unsqueeze(1).to(device)
print(f'Validation input size: {val_inputs["input_ids"].size()}, Validation label size: {val_labels.size()}')

# Split the data into train and validation sets
#train_inputs, val_inputs, train_labels, val_labels = train_test_split(inputs, labels, test_size=0.2, random_state=42)

# Create a data loader for each set
batch_size = 8
train_data = TensorDataset(train_inputs['input_ids'], train_inputs['attention_mask'], train_labels)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_data = TensorDataset(val_inputs['input_ids'], val_inputs['attention_mask'], val_labels)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

# Set the optimizer and loss function
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
loss_fn = torch.nn.MSELoss()

# Train the model
num_epochs = 10
best_val_loss = float('inf')
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for batch in tqdm(train_loader, desc=f'Training epoch {epoch + 1}/{num_epochs}'):
        optimizer.zero_grad()
        input_ids = batch[0].squeeze(1)
        attention_mask = batch[1].squeeze(1)
        labels = batch[2].squeeze(1)
        outputs = model(input_ids.to(device), attention_mask=attention_mask.to(device))[0].squeeze(1)
        loss = loss_fn(outputs, labels.to(device))
        
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    
    # Evaluate on the validation set
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in tqdm(val_loader, desc=f'Validation epoch {epoch + 1}/{num_epochs}'):
            input_ids = batch[0].squeeze(1)
            attention_mask = batch[1].squeeze(1)
            labels = batch[2].squeeze(1)
            outputs = model(input_ids.to(device), attention_mask=attention_mask.to(device))[0].squeeze(1)
            loss = loss_fn(outputs, labels.to(device))
            val_loss += loss.item()
    val_loss /= len(val_loader)
    
    # Save the best model based on validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'emp_model_multi.pt')
    
    print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')


Using device: cuda


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Train input size: torch.Size([5580, 275]), Train label size: torch.Size([5580, 1])
Validation input size: torch.Size([810, 257]), Validation label size: torch.Size([810, 1])


Training epoch 1/10: 100%|██████████| 698/698 [01:14<00:00,  9.38it/s]
Validation epoch 1/10: 100%|██████████| 102/102 [00:02<00:00, 34.53it/s]


Epoch 1/10, Train Loss: 3.5524, Validation Loss: 3.4629


Training epoch 2/10: 100%|██████████| 698/698 [01:14<00:00,  9.37it/s]
Validation epoch 2/10: 100%|██████████| 102/102 [00:02<00:00, 34.53it/s]


Epoch 2/10, Train Loss: 3.3869, Validation Loss: 3.5797


Training epoch 3/10: 100%|██████████| 698/698 [01:14<00:00,  9.37it/s]
Validation epoch 3/10: 100%|██████████| 102/102 [00:03<00:00, 33.62it/s]


Epoch 3/10, Train Loss: 3.3849, Validation Loss: 3.4564


Training epoch 4/10: 100%|██████████| 698/698 [01:57<00:00,  5.94it/s]
Validation epoch 4/10: 100%|██████████| 102/102 [00:05<00:00, 17.67it/s]


Epoch 4/10, Train Loss: 3.3861, Validation Loss: 3.4634


Training epoch 5/10: 100%|██████████| 698/698 [02:16<00:00,  5.13it/s]
Validation epoch 5/10: 100%|██████████| 102/102 [00:02<00:00, 34.43it/s]


Epoch 5/10, Train Loss: 3.3719, Validation Loss: 3.4555


Training epoch 6/10: 100%|██████████| 698/698 [01:14<00:00,  9.33it/s]
Validation epoch 6/10: 100%|██████████| 102/102 [00:02<00:00, 34.34it/s]


Epoch 6/10, Train Loss: 3.3610, Validation Loss: 3.4765


Training epoch 7/10: 100%|██████████| 698/698 [01:14<00:00,  9.33it/s]
Validation epoch 7/10: 100%|██████████| 102/102 [00:02<00:00, 34.42it/s]


Epoch 7/10, Train Loss: 3.3631, Validation Loss: 3.4853


Training epoch 8/10: 100%|██████████| 698/698 [01:20<00:00,  8.67it/s]
Validation epoch 8/10: 100%|██████████| 102/102 [00:03<00:00, 28.23it/s]


Epoch 8/10, Train Loss: 3.3621, Validation Loss: 3.4645


Training epoch 9/10: 100%|██████████| 698/698 [01:28<00:00,  7.93it/s]
Validation epoch 9/10: 100%|██████████| 102/102 [00:02<00:00, 34.45it/s]


Epoch 9/10, Train Loss: 3.3549, Validation Loss: 3.5365


Training epoch 10/10: 100%|██████████| 698/698 [01:43<00:00,  6.72it/s]
Validation epoch 10/10: 100%|██████████| 102/102 [00:05<00:00, 18.03it/s]

Epoch 10/10, Train Loss: 3.3739, Validation Loss: 3.5670



