In [116]:
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification
from transformers import Trainer, TrainingArguments
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import f1_score, classification_report
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
from transformers import AdamW
from tqdm import tqdm

In [117]:
language="en"
modelname="XLMRoberta"
input = "COOK"
test = "COOK"
seed = 5

In [118]:
if input == "gpt4":
  gpt4_flag=True
else:
  gpt4_flag=False

In [119]:
# JP encoding='euc-jp' other encoding normal CHECK ALWAYS DATASETS
df_train = pd.read_csv(f'cook_en_train.csv')
df_eval = pd.read_csv(f'cook_en_eval.csv')
df_test = pd.read_csv(f'cook_en_test.csv')

In [120]:
def get_category_mapping(language):
    if language in ['en', 'jp']:
        category_mapping = {
            'figurative': 'idiom',
            'literal': 'nonidiom'
        }
    elif language == 'tr':
        category_mapping = {
            'mecaz': 'idiom',
            'gerçek': 'nonidiom'
        }
    elif language == 'it':
        category_mapping = {
            'figurato': 'idiom',
            'letterale': 'nonidiom'
        }
    else:
        raise ValueError(f"Language '{language}' is not supported.")

    return category_mapping

In [121]:
category_mapping = get_category_mapping(language)
print(category_mapping)

{'figurative': 'idiom', 'literal': 'nonidiom'}


In [122]:
if gpt4_flag or language in ['en', 'jp']:
  df_train['category'] = df_train['category'].map(category_mapping)
df_train

Unnamed: 0,category,submission,idiom
0,nonidiom,"Anyway , Jack The Lad said , rehearsing , and ...",blow smoke
1,nonidiom,And he blows the smoke back into my mouth .,blow smoke
2,nonidiom,NO DRAUGHT COWL Prevent downdraught from blowi...,blow smoke
3,nonidiom,Mrs. Mounce blew out cigarette smoke provocati...,blow smoke
4,nonidiom,She blew smoke serenely as he flicked it at th...,blow smoke
...,...,...,...
1450,idiom,"This touched a raw nerve in Germany , the roma...",touch nerve
1451,idiom,We spilt a few beans and touched the odd ( fem...,touch nerve
1452,idiom,"Majority verdicts , a mechanical adjustment to...",touch nerve
1453,idiom,Other issues touch sensitive local nerves .,touch nerve


In [123]:
if gpt4_flag or language in ['en', 'jp']:
  df_eval['category'] = df_eval['category'].map(category_mapping)
df_eval

Unnamed: 0,category,submission,idiom
0,nonidiom,She drew the cigarette from her lips and blew ...,blow smoke
1,nonidiom,Then we spent five minutes blowing cigar smoke...,blow smoke
2,nonidiom,He blew smoke pleasurably from his nostrils .,blow smoke
3,nonidiom,He took a fresh cigarette offered by somebody ...,blow smoke
4,nonidiom,She inhaled deeply and threw her head back to ...,blow smoke
...,...,...,...
299,nonidiom,For the asymmetric T-section shown in figure 9...,take root
300,nonidiom,Ensure holes are deep and wide enough to take ...,take root
301,idiom,They also touched a nerve of public anxiety .,touch nerve
302,idiom,"He was , of course , touching a nerve deep wit...",touch nerve


In [124]:
if language in ['en', 'jp']:
  df_test['category'] = df_test['category'].map(category_mapping)

In [125]:
df_test

Unnamed: 0,category,submission,idiom
0,nonidiom,"He put his cigarette to his lips , drew in smo...",blow smoke
1,nonidiom,"She &apos;s lying on the bed , blowing smoke a...",blow smoke
2,nonidiom,The TV presenter sucked extravagantly on her c...,blow smoke
3,nonidiom,He blows cigarette smoke irritably across the ...,blow smoke
4,nonidiom,Test the alarm regularly by pressing the test ...,blow smoke
...,...,...,...
802,idiom,The death of Phyllis Henley touched these nerv...,touch nerve
803,idiom,Mikhail Gorbachev avoided touching sensitive n...,touch nerve
804,idiom,Jim Eggleton &apos;s murder touched a nerve th...,touch nerve
805,idiom,Salgado &apos;s photo-essay opens a window ont...,touch nerve


In [126]:
df_train['category'] = df_train['category'].str.lower()
df_eval['category'] = df_eval['category'].str.lower()
df_test['category'] = df_test['category'].str.lower()

In [127]:
df_train.head()

Unnamed: 0,category,submission,idiom
0,nonidiom,"Anyway , Jack The Lad said , rehearsing , and ...",blow smoke
1,nonidiom,And he blows the smoke back into my mouth .,blow smoke
2,nonidiom,NO DRAUGHT COWL Prevent downdraught from blowi...,blow smoke
3,nonidiom,Mrs. Mounce blew out cigarette smoke provocati...,blow smoke
4,nonidiom,She blew smoke serenely as he flicked it at th...,blow smoke


In [128]:
df_eval.head()

Unnamed: 0,category,submission,idiom
0,nonidiom,She drew the cigarette from her lips and blew ...,blow smoke
1,nonidiom,Then we spent five minutes blowing cigar smoke...,blow smoke
2,nonidiom,He blew smoke pleasurably from his nostrils .,blow smoke
3,nonidiom,He took a fresh cigarette offered by somebody ...,blow smoke
4,nonidiom,She inhaled deeply and threw her head back to ...,blow smoke


In [129]:
df_test.head()

Unnamed: 0,category,submission,idiom
0,nonidiom,"He put his cigarette to his lips , drew in smo...",blow smoke
1,nonidiom,"She &apos;s lying on the bed , blowing smoke a...",blow smoke
2,nonidiom,The TV presenter sucked extravagantly on her c...,blow smoke
3,nonidiom,He blows cigarette smoke irritably across the ...,blow smoke
4,nonidiom,Test the alarm regularly by pressing the test ...,blow smoke


In [130]:
# Encode the 'category' column
label_encoder = LabelEncoder()
df_train['category_encoded'] = label_encoder.fit_transform(df_train['category'])
df_eval['category_encoded'] = label_encoder.transform(df_eval['category'])
df_test['category_encoded'] = label_encoder.transform(df_test['category'])

In [131]:
df_train

Unnamed: 0,category,submission,idiom,category_encoded
0,nonidiom,"Anyway , Jack The Lad said , rehearsing , and ...",blow smoke,1
1,nonidiom,And he blows the smoke back into my mouth .,blow smoke,1
2,nonidiom,NO DRAUGHT COWL Prevent downdraught from blowi...,blow smoke,1
3,nonidiom,Mrs. Mounce blew out cigarette smoke provocati...,blow smoke,1
4,nonidiom,She blew smoke serenely as he flicked it at th...,blow smoke,1
...,...,...,...,...
1450,idiom,"This touched a raw nerve in Germany , the roma...",touch nerve,0
1451,idiom,We spilt a few beans and touched the odd ( fem...,touch nerve,0
1452,idiom,"Majority verdicts , a mechanical adjustment to...",touch nerve,0
1453,idiom,Other issues touch sensitive local nerves .,touch nerve,0


In [132]:
df_eval

Unnamed: 0,category,submission,idiom,category_encoded
0,nonidiom,She drew the cigarette from her lips and blew ...,blow smoke,1
1,nonidiom,Then we spent five minutes blowing cigar smoke...,blow smoke,1
2,nonidiom,He blew smoke pleasurably from his nostrils .,blow smoke,1
3,nonidiom,He took a fresh cigarette offered by somebody ...,blow smoke,1
4,nonidiom,She inhaled deeply and threw her head back to ...,blow smoke,1
...,...,...,...,...
299,nonidiom,For the asymmetric T-section shown in figure 9...,take root,1
300,nonidiom,Ensure holes are deep and wide enough to take ...,take root,1
301,idiom,They also touched a nerve of public anxiety .,touch nerve,0
302,idiom,"He was , of course , touching a nerve deep wit...",touch nerve,0


In [133]:
df_test

Unnamed: 0,category,submission,idiom,category_encoded
0,nonidiom,"He put his cigarette to his lips , drew in smo...",blow smoke,1
1,nonidiom,"She &apos;s lying on the bed , blowing smoke a...",blow smoke,1
2,nonidiom,The TV presenter sucked extravagantly on her c...,blow smoke,1
3,nonidiom,He blows cigarette smoke irritably across the ...,blow smoke,1
4,nonidiom,Test the alarm regularly by pressing the test ...,blow smoke,1
...,...,...,...,...
802,idiom,The death of Phyllis Henley touched these nerv...,touch nerve,0
803,idiom,Mikhail Gorbachev avoided touching sensitive n...,touch nerve,0
804,idiom,Jim Eggleton &apos;s murder touched a nerve th...,touch nerve,0
805,idiom,Salgado &apos;s photo-essay opens a window ont...,touch nerve,0


In [134]:
class TextDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


In [135]:
# Initialize tokenizer
tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-base')

# Tokenize the data
train_encodings = tokenizer(list(df_train['submission']), truncation=True, padding=True, return_tensors="pt")
val_encodings = tokenizer(list(df_eval['submission']), truncation=True, padding=True, return_tensors="pt")
test_encodings = tokenizer(list(df_test['submission']), truncation=True, padding=True, return_tensors="pt")
# Convert to torch tensors
train_labels = torch.tensor(df_train['category_encoded'].values)
val_labels = torch.tensor(df_eval['category_encoded'].values)
test_labels = torch.tensor(df_test['category_encoded'].values)

# Prepare to datasets
train_dataset = TextDataset(train_encodings, train_labels)
val_dataset = TextDataset(val_encodings, val_labels)
test_dataset = TextDataset(test_encodings, test_labels)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)



In [136]:
num_labels = len(label_encoder.classes_)
model = XLMRobertaForSequenceClassification.from_pretrained('xlm-roberta-base', num_labels=len(label_encoder.classes_))

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
optimizer = AdamW(model.parameters(), lr=5e-6)

epochs = 4
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    # Average loss across all batches
    avg_train_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{epochs} | Train Loss: {avg_train_loss}")

    # Evaluation step
    model.eval()
    total_eval_accuracy = 0
    for batch in tqdm(val_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        accuracy = (predictions == batch['labels']).cpu().numpy().mean() * 100
        total_eval_accuracy += accuracy

    avg_val_accuracy = total_eval_accuracy / len(val_loader)
    print(f"Epoch {epoch + 1}/{epochs} | Validation Accuracy: {avg_val_accuracy:.2f}%")


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item['labels'] = torch.tensor(self.labels[idx])
100%|██████████| 182/182 [00:40<00:00,  4.54it/s]


Epoch 1/4 | Train Loss: 0.5108604456533442


100%|██████████| 38/38 [00:01<00:00, 23.46it/s]


Epoch 1/4 | Validation Accuracy: 77.96%


100%|██████████| 182/182 [00:39<00:00,  4.59it/s]


Epoch 2/4 | Train Loss: 0.36860833441900026


100%|██████████| 38/38 [00:01<00:00, 24.14it/s]


Epoch 2/4 | Validation Accuracy: 86.18%


100%|██████████| 182/182 [00:39<00:00,  4.59it/s]


Epoch 3/4 | Train Loss: 0.28993444482458164


100%|██████████| 38/38 [00:01<00:00, 23.98it/s]


Epoch 3/4 | Validation Accuracy: 86.84%


100%|██████████| 182/182 [00:39<00:00,  4.58it/s]


Epoch 4/4 | Train Loss: 0.2157039563939139


100%|██████████| 38/38 [00:01<00:00, 24.24it/s]

Epoch 4/4 | Validation Accuracy: 89.47%





In [137]:
# Ensure the model is in evaluation mode
model.eval()

# Move the model to the appropriate device
model.to(device)

# Initialize variables to track accuracy
total_correct = 0
total_predictions = 0

# Store true labels and predictions for F1 score calculation
all_labels = []
all_predictions = []

# No gradient updates needed for evaluation
with torch.no_grad():
    for batch in test_loader:
        # Move batch to the same device as the model
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        outputs = model(**batch)

        # Get predictions
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)

        # Update tracking variables
        total_correct += (predictions == batch['labels']).sum().item()
        total_predictions += predictions.size(0)

        # Store predictions and true labels
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(batch['labels'].cpu().numpy())

# Calculate accuracy
test_accuracy = total_correct / total_predictions
print(f'Test Accuracy: {test_accuracy:.4f}')

# Calculate F1 scores
f1_weighted = f1_score(all_labels, all_predictions, average='weighted')
f1_macro = f1_score(all_labels, all_predictions, average='macro')

print(f'Weighted F1 Score: {f1_weighted:.4f}')
print(f'Macro F1 Score: {f1_macro:.4f}')


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item['labels'] = torch.tensor(self.labels[idx])


Test Accuracy: 0.8947
Weighted F1 Score: 0.8934
Macro F1 Score: 0.8433


In [138]:
# Generate classification report
report = classification_report(all_labels, all_predictions, digits=4)
print(f'Classification Report for {modelname}, language {language.upper()}, Train {input.upper()}, Test {test.upper()}:')
print(report)

Classification Report for XLMRoberta, language EN, Train COOK, Test COOK:
              precision    recall  f1-score   support

           0     0.9250    0.9412    0.9330       629
           1     0.7784    0.7303    0.7536       178

    accuracy                         0.8947       807
   macro avg     0.8517    0.8358    0.8433       807
weighted avg     0.8927    0.8947    0.8934       807

