In [25]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import Trainer, TrainingArguments
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import f1_score, classification_report
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
from transformers import AdamW
from tqdm import tqdm

In [26]:
language="en"
modelname="EnglishBert"
input = "gpt4"
test = "COOK"
filtered= False

In [27]:
if input == "gpt4":
  gpt4_flag=True
else:
  gpt4_flag=False

In [28]:
# JP encoding='euc-jp' other encoding normal CHECK ALWAYS DATASETS
df_train = pd.read_csv(f'train_gpt4_en_enhanced_prompting.csv')
df_eval = pd.read_csv(f'eval_gpt4_en_enhanced_prompting.csv')
df_test = pd.read_csv(f'cook_en_test.csv')

In [29]:
def get_category_mapping(language):
    if language in ['en', 'jp']:
        category_mapping = {
            'figurative': 'idiom',
            'literal': 'nonidiom'
        }
    elif language == 'tr':
        category_mapping = {
            'mecaz': 'idiom',
            'gerçek': 'nonidiom'
        }
    elif language == 'it':
        category_mapping = {
            'figurato': 'idiom',
            'letterale': 'nonidiom'
        }
    else:
        raise ValueError(f"Language '{language}' is not supported.")

    return category_mapping

In [30]:
category_mapping = get_category_mapping(language)
print(category_mapping)

{'figurative': 'idiom', 'literal': 'nonidiom'}


In [31]:
if gpt4_flag or language in ['en', 'jp']:
  df_train['category'] = df_train['category'].map(category_mapping)
df_train

Unnamed: 0,submission,category,idiom,type
0,Would you have a word with me about the shift ...,idiom,have word,Enhanced-prompting
1,"After months of hesitation, he blew the whistl...",idiom,blow whistle,Enhanced-prompting
2,The old sailor believed that whistling on the ...,idiom,bring luck,Enhanced-prompting
3,Those who get the drift quicker tend to adapt ...,idiom,get drift,Enhanced-prompting
4,"During the long-drawn negotiation process, the...",idiom,move goalpost,Enhanced-prompting
...,...,...,...,...
3387,She needed to thread the needle quickly withou...,nonidiom,lose thread,Enhanced-prompting
3388,"After they make a new trail, volunteers mark i...",nonidiom,make mark,Enhanced-prompting
3389,"In the dance class, partners practiced their m...",nonidiom,hold sway,Enhanced-prompting
3390,He struggled to plug in the blender because he...,nonidiom,pull plug,Enhanced-prompting


In [32]:
if gpt4_flag or language in ['en', 'jp']:
  df_eval['category'] = df_eval['category'].map(category_mapping)
df_eval

Unnamed: 0,submission,category,idiom,type
0,"He didn't have misfortune, because the stolen ...",idiom,have misfortune,Enhanced-prompting
1,"She never lost the thread, despite the novel’s...",idiom,lose thread,Enhanced-prompting
2,"Given her temper, she'll likely hit the roof w...",idiom,hit roof,Enhanced-prompting
3,He did not pull punches when he expressed his ...,idiom,pull punch,Enhanced-prompting
4,"During our session, the topic of family expect...",idiom,touch nerve,Enhanced-prompting
...,...,...,...,...
843,"To fix the broken toy, he had to cut a replace...",nonidiom,cut figure,Enhanced-prompting
844,"As soon as I got outside, the gust of wind nea...",nonidiom,get wind,Enhanced-prompting
845,"In the commotion of the cardiac emergency, the...",nonidiom,lose heart,Enhanced-prompting
846,The wind was so strong it moved the goalpost s...,nonidiom,move goalpost,Enhanced-prompting


In [33]:
if language in ['en', 'jp']:
  df_test['category'] = df_test['category'].map(category_mapping)

In [34]:
df_test

Unnamed: 0,category,submission,idiom
0,nonidiom,"He put his cigarette to his lips , drew in smo...",blow smoke
1,nonidiom,"She &apos;s lying on the bed , blowing smoke a...",blow smoke
2,nonidiom,The TV presenter sucked extravagantly on her c...,blow smoke
3,nonidiom,He blows cigarette smoke irritably across the ...,blow smoke
4,nonidiom,Test the alarm regularly by pressing the test ...,blow smoke
...,...,...,...
802,idiom,The death of Phyllis Henley touched these nerv...,touch nerve
803,idiom,Mikhail Gorbachev avoided touching sensitive n...,touch nerve
804,idiom,Jim Eggleton &apos;s murder touched a nerve th...,touch nerve
805,idiom,Salgado &apos;s photo-essay opens a window ont...,touch nerve


In [35]:
if filtered == True:
  df_train = df_train.groupby(['idiom', 'category']).apply(lambda x: x.sample(n=32, replace=False)).reset_index(drop=True)
  df_eval = df_eval.groupby(['idiom', 'category']).apply(lambda x: x.sample(n=8, replace=False)).reset_index(drop=True)

In [36]:
df_train['category'] = df_train['category'].str.lower()
df_eval['category'] = df_eval['category'].str.lower()
df_test['category'] = df_test['category'].str.lower()

In [37]:
df_train.head()

Unnamed: 0,submission,category,idiom,type
0,Would you have a word with me about the shift ...,idiom,have word,Enhanced-prompting
1,"After months of hesitation, he blew the whistl...",idiom,blow whistle,Enhanced-prompting
2,The old sailor believed that whistling on the ...,idiom,bring luck,Enhanced-prompting
3,Those who get the drift quicker tend to adapt ...,idiom,get drift,Enhanced-prompting
4,"During the long-drawn negotiation process, the...",idiom,move goalpost,Enhanced-prompting


In [38]:
df_eval.head()

Unnamed: 0,submission,category,idiom,type
0,"He didn't have misfortune, because the stolen ...",idiom,have misfortune,Enhanced-prompting
1,"She never lost the thread, despite the novel’s...",idiom,lose thread,Enhanced-prompting
2,"Given her temper, she'll likely hit the roof w...",idiom,hit roof,Enhanced-prompting
3,He did not pull punches when he expressed his ...,idiom,pull punch,Enhanced-prompting
4,"During our session, the topic of family expect...",idiom,touch nerve,Enhanced-prompting


In [39]:
df_test.head()

Unnamed: 0,category,submission,idiom
0,nonidiom,"He put his cigarette to his lips , drew in smo...",blow smoke
1,nonidiom,"She &apos;s lying on the bed , blowing smoke a...",blow smoke
2,nonidiom,The TV presenter sucked extravagantly on her c...,blow smoke
3,nonidiom,He blows cigarette smoke irritably across the ...,blow smoke
4,nonidiom,Test the alarm regularly by pressing the test ...,blow smoke


In [40]:
# Encode the 'category' column
label_encoder = LabelEncoder()
df_train['category_encoded'] = label_encoder.fit_transform(df_train['category'])
df_eval['category_encoded'] = label_encoder.transform(df_eval['category'])
df_test['category_encoded'] = label_encoder.transform(df_test['category'])

In [41]:
df_train

Unnamed: 0,submission,category,idiom,type,category_encoded
0,Would you have a word with me about the shift ...,idiom,have word,Enhanced-prompting,0
1,"After months of hesitation, he blew the whistl...",idiom,blow whistle,Enhanced-prompting,0
2,The old sailor believed that whistling on the ...,idiom,bring luck,Enhanced-prompting,0
3,Those who get the drift quicker tend to adapt ...,idiom,get drift,Enhanced-prompting,0
4,"During the long-drawn negotiation process, the...",idiom,move goalpost,Enhanced-prompting,0
...,...,...,...,...,...
3387,She needed to thread the needle quickly withou...,nonidiom,lose thread,Enhanced-prompting,1
3388,"After they make a new trail, volunteers mark i...",nonidiom,make mark,Enhanced-prompting,1
3389,"In the dance class, partners practiced their m...",nonidiom,hold sway,Enhanced-prompting,1
3390,He struggled to plug in the blender because he...,nonidiom,pull plug,Enhanced-prompting,1


In [42]:
df_eval

Unnamed: 0,submission,category,idiom,type,category_encoded
0,"He didn't have misfortune, because the stolen ...",idiom,have misfortune,Enhanced-prompting,0
1,"She never lost the thread, despite the novel’s...",idiom,lose thread,Enhanced-prompting,0
2,"Given her temper, she'll likely hit the roof w...",idiom,hit roof,Enhanced-prompting,0
3,He did not pull punches when he expressed his ...,idiom,pull punch,Enhanced-prompting,0
4,"During our session, the topic of family expect...",idiom,touch nerve,Enhanced-prompting,0
...,...,...,...,...,...
843,"To fix the broken toy, he had to cut a replace...",nonidiom,cut figure,Enhanced-prompting,1
844,"As soon as I got outside, the gust of wind nea...",nonidiom,get wind,Enhanced-prompting,1
845,"In the commotion of the cardiac emergency, the...",nonidiom,lose heart,Enhanced-prompting,1
846,The wind was so strong it moved the goalpost s...,nonidiom,move goalpost,Enhanced-prompting,1


In [43]:
df_test

Unnamed: 0,category,submission,idiom,category_encoded
0,nonidiom,"He put his cigarette to his lips , drew in smo...",blow smoke,1
1,nonidiom,"She &apos;s lying on the bed , blowing smoke a...",blow smoke,1
2,nonidiom,The TV presenter sucked extravagantly on her c...,blow smoke,1
3,nonidiom,He blows cigarette smoke irritably across the ...,blow smoke,1
4,nonidiom,Test the alarm regularly by pressing the test ...,blow smoke,1
...,...,...,...,...
802,idiom,The death of Phyllis Henley touched these nerv...,touch nerve,0
803,idiom,Mikhail Gorbachev avoided touching sensitive n...,touch nerve,0
804,idiom,Jim Eggleton &apos;s murder touched a nerve th...,touch nerve,0
805,idiom,Salgado &apos;s photo-essay opens a window ont...,touch nerve,0


In [44]:
class TextDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


In [None]:
# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Tokenize the data
train_encodings = tokenizer(list(df_train['submission']), truncation=True, padding=True, return_tensors="pt")
val_encodings = tokenizer(list(df_eval['submission']), truncation=True, padding=True, return_tensors="pt")
test_encodings = tokenizer(list(df_test['submission']), truncation=True, padding=True, return_tensors="pt")
# Convert to torch tensors
train_labels = torch.tensor(df_train['category_encoded'].values)
val_labels = torch.tensor(df_eval['category_encoded'].values)
test_labels = torch.tensor(df_test['category_encoded'].values)

# Prepare to datasets
train_dataset = TextDataset(train_encodings, train_labels)
val_dataset = TextDataset(val_encodings, val_labels)
test_dataset = TextDataset(test_encodings, test_labels)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

In [None]:
num_labels = len(label_encoder.classes_)
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
optimizer = AdamW(model.parameters(), lr=5e-6)

epochs = 4
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch in tqdm(train_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    # Average loss across all batches
    avg_train_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{epochs} | Train Loss: {avg_train_loss}")

    # Evaluation step
    model.eval()
    total_eval_accuracy = 0
    for batch in tqdm(val_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        accuracy = (predictions == batch['labels']).cpu().numpy().mean() * 100
        total_eval_accuracy += accuracy

    avg_val_accuracy = total_eval_accuracy / len(val_loader)
    print(f"Epoch {epoch + 1}/{epochs} | Validation Accuracy: {avg_val_accuracy:.2f}%")


In [None]:
# Ensure the model is in evaluation mode
model.eval()

# Move the model to the appropriate device
model.to(device)

# Initialize variables to track accuracy
total_correct = 0
total_predictions = 0

# Store true labels and predictions for F1 score calculation
all_labels = []
all_predictions = []

# No gradient updates needed for evaluation
with torch.no_grad():
    for batch in test_loader:
        # Move batch to the same device as the model
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        outputs = model(**batch)

        # Get predictions
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)

        # Update tracking variables
        total_correct += (predictions == batch['labels']).sum().item()
        total_predictions += predictions.size(0)

        # Store predictions and true labels
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(batch['labels'].cpu().numpy())

# Calculate accuracy
test_accuracy = total_correct / total_predictions
print(f'Test Accuracy: {test_accuracy:.4f}')

# Calculate F1 scores
f1_weighted = f1_score(all_labels, all_predictions, average='weighted')
f1_macro = f1_score(all_labels, all_predictions, average='macro')

print(f'Weighted F1 Score: {f1_weighted:.4f}')
print(f'Macro F1 Score: {f1_macro:.4f}')


In [None]:
# Generate classification report
report = classification_report(all_labels, all_predictions, digits=4)
print(f'Classification Report for {modelname}, language {language.upper()}, Train {input.upper()}, Test {test.upper()}:')
print(report)