In [1]:
!pip install transformers datasets torch evaluate

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m31.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m8

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, classification_report
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, BertTokenizer, BertModel
from sklearn.utils.class_weight import compute_class_weight
from datasets import Dataset
import torch
import numpy

In [3]:
orientation_data = pd.read_csv("/content/orientation-lv-train.tsv", sep="\t")
print("Orientation Data Distribution:")
print(orientation_data['label'].value_counts())

Orientation Data Distribution:
label
1    628
0    170
Name: count, dtype: int64


In [5]:
orientation_train, orientation_validate = train_test_split(orientation_data, train_size=0.9, test_size=0.1, stratify=orientation_data['label'], random_state=42)

print("Orientation Train Data Distribution:")
print(orientation_train['label'].value_counts())
print("Orientation Validate Data Distribution:")
print(orientation_validate['label'].value_counts())



Orientation Train Data Distribution:
label
1    565
0    153
Name: count, dtype: int64
Orientation Validate Data Distribution:
label
1    63
0    17
Name: count, dtype: int64


In [11]:
model_name = "bert-base-multilingual-cased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model_lv= AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
model_en = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

def tokenize_en(data):
  return tokenizer(data['text_en'], padding=True, truncation=True, max_length=512)

def tokenize_lv(data):
  return tokenizer(data['text'], padding=True, truncation=True, max_length=512)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
dataset_training = Dataset.from_pandas(orientation_train)
dataset_validation = Dataset.from_pandas(orientation_validate)

dataset_training_lv = dataset_training.map(tokenize_lv, batched=True)
dataset_validation_lv = dataset_validation.map(tokenize_lv, batched=True)

dataset_training_en = dataset_training.map(tokenize_en, batched=True)
dataset_validation_en = dataset_validation.map(tokenize_en, batched=True)


Map:   0%|          | 0/718 [00:00<?, ? examples/s]

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/718 [00:00<?, ? examples/s]

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

In [16]:
training_arguments_lv = TrainingArguments(
    output_dir="./results_lv",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs_lv",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    logging_steps=50,
    save_total_limit=1,
)

training_arguments_en = TrainingArguments(
    output_dir="./results_en",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs_en",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    logging_steps=50,
    save_total_limit=1,
)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    return classification_report(labels, preds, output_dict=True)

trainer_lv = Trainer(
    model=model_lv,
    args=training_arguments_lv,
    train_dataset=dataset_training_lv,
    eval_dataset=dataset_validation_lv,
    compute_metrics=compute_metrics,
)

trainer_en = Trainer(
    model=model_en,
    args=training_arguments_en,
    train_dataset=dataset_training_en,
    eval_dataset=dataset_validation_en,
    compute_metrics=compute_metrics,
)

trainer_lv.train()
predictions_lv = trainer_lv.predict(dataset_validation_lv)
result_lv = numpy.argmax(predictions_lv.predictions, axis=-1)
labels_lv = dataset_validation_lv['label']
print(classification_report(labels_lv, result_lv))

trainer_en.train()
predictions_en = trainer_en.predict(dataset_validation_en)
result_en = numpy.argmax(predictions_en.predictions, axis=-1)
labels_en = dataset_validation_en['label']
print(classification_report(labels_en, result_en))




Epoch,Training Loss,Validation Loss,0,1,Accuracy,Macro avg,Weighted avg
1,No log,0.496679,"{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17.0}","{'precision': 0.7875, 'recall': 1.0, 'f1-score': 0.8811188811188811, 'support': 63.0}",0.7875,"{'precision': 0.39375, 'recall': 0.5, 'f1-score': 0.4405594405594406, 'support': 80.0}","{'precision': 0.62015625, 'recall': 0.7875, 'f1-score': 0.6938811188811189, 'support': 80.0}"
2,0.504300,0.48918,"{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17.0}","{'precision': 0.7875, 'recall': 1.0, 'f1-score': 0.8811188811188811, 'support': 63.0}",0.7875,"{'precision': 0.39375, 'recall': 0.5, 'f1-score': 0.4405594405594406, 'support': 80.0}","{'precision': 0.62015625, 'recall': 0.7875, 'f1-score': 0.6938811188811189, 'support': 80.0}"
3,0.507200,0.411567,"{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17.0}","{'precision': 0.7848101265822784, 'recall': 0.9841269841269841, 'f1-score': 0.8732394366197183, 'support': 63.0}",0.775,"{'precision': 0.3924050632911392, 'recall': 0.49206349206349204, 'f1-score': 0.43661971830985913, 'support': 80.0}","{'precision': 0.6180379746835443, 'recall': 0.775, 'f1-score': 0.6876760563380281, 'support': 80.0}"


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 17.0}" of type <class 'dict'> for key "eval/0" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.7875, 'recall': 1.0, 'f1-score': 0.8811188811188811, 'support': 63.0}" of type <class 'dict'> for key "eval/1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.39375, 'recall': 0.5, 'f1-score': 0.4405594405594406, 'support': 80.0}" of type <class 'dict'> for key "eval/macro avg" as a scalar. This invocation of Tensorboard's writer.add_scalar() is

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00        17
           1       0.79      1.00      0.88        63

    accuracy                           0.79        80
   macro avg       0.39      0.50      0.44        80
weighted avg       0.62      0.79      0.69        80



Epoch,Training Loss,Validation Loss,0,1,Accuracy,Macro avg,Weighted avg
1,No log,0.389119,"{'precision': 0.6, 'recall': 0.7058823529411765, 'f1-score': 0.6486486486486487, 'support': 17.0}","{'precision': 0.9166666666666666, 'recall': 0.873015873015873, 'f1-score': 0.8943089430894309, 'support': 63.0}",0.8375,"{'precision': 0.7583333333333333, 'recall': 0.7894491129785248, 'f1-score': 0.7714787958690398, 'support': 80.0}","{'precision': 0.849375, 'recall': 0.8375, 'f1-score': 0.8421061305207648, 'support': 80.0}"
2,0.328400,0.376936,"{'precision': 0.6111111111111112, 'recall': 0.6470588235294118, 'f1-score': 0.6285714285714286, 'support': 17.0}","{'precision': 0.9032258064516129, 'recall': 0.8888888888888888, 'f1-score': 0.896, 'support': 63.0}",0.8375,"{'precision': 0.7571684587813621, 'recall': 0.7679738562091503, 'f1-score': 0.7622857142857142, 'support': 80.0}","{'precision': 0.8411514336917563, 'recall': 0.8375, 'f1-score': 0.8391714285714287, 'support': 80.0}"
3,0.221600,0.376222,"{'precision': 0.7272727272727273, 'recall': 0.47058823529411764, 'f1-score': 0.5714285714285714, 'support': 17.0}","{'precision': 0.8695652173913043, 'recall': 0.9523809523809523, 'f1-score': 0.9090909090909091, 'support': 63.0}",0.85,"{'precision': 0.7984189723320159, 'recall': 0.7114845938375349, 'f1-score': 0.7402597402597402, 'support': 80.0}","{'precision': 0.8393280632411066, 'recall': 0.85, 'f1-score': 0.8373376623376624, 'support': 80.0}"


Trainer is attempting to log a value of "{'precision': 0.6, 'recall': 0.7058823529411765, 'f1-score': 0.6486486486486487, 'support': 17.0}" of type <class 'dict'> for key "eval/0" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.9166666666666666, 'recall': 0.873015873015873, 'f1-score': 0.8943089430894309, 'support': 63.0}" of type <class 'dict'> for key "eval/1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.7583333333333333, 'recall': 0.7894491129785248, 'f1-score': 0.7714787958690398, 'support': 80.0}" of type <class 'dict'> for key "eval/macro avg" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.849375, 'recall': 0.8375, 'f1-score': 0

              precision    recall  f1-score   support

           0       0.73      0.47      0.57        17
           1       0.87      0.95      0.91        63

    accuracy                           0.85        80
   macro avg       0.80      0.71      0.74        80
weighted avg       0.84      0.85      0.84        80

