In [1]:
# !pip install adapter-transformers
# !pip install datasets
# !pip install evaluate

In [2]:
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


import os, sys
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"



import json
from transformers import AutoTokenizer, AdapterConfig, AutoAdapterModel, AutoConfig
from transformers import TrainingArguments, AdapterTrainer, EvalPrediction, TrainerCallback
from transformers import AutoModelForSequenceClassification, Trainer
from transformers import EarlyStoppingCallback, IntervalStrategy, DataCollatorForLanguageModeling
from datasets import Dataset, DatasetDict

from tqdm import tqdm

import numpy as np
from datasets import concatenate_datasets, load_metric
import evaluate
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import random
import torch

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

set_seed(42)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
lang = "es"

df = pd.read_csv("../data/subtask_1/"+lang+"/train.tsv", sep='\t')
df=df.drop(df.columns[0], axis=1)

In [4]:
mapping = {
    "generated":0,
    "human":1
}
df["label"] = df['label'].map(mapping)

In [5]:
df.head()

Unnamed: 0,text,label
0,Entrada en vigor. La presente Directiva entrar...,1
1,Preguntas: 1. ¿Cuáles son los principales argu...,0
2,¿Desea algo? Póngame una caja de madera. ¿Qué ...,0
3,"@victor28088 1665 Tweets no originales, que as...",1
4,De pequeño Dios me dio a elegir entre tener un...,1


In [6]:
df.label.unique()

array([1, 0])

In [7]:
from sklearn.model_selection import train_test_split

dataset_train_compl, dataset_test_compl = train_test_split(df, test_size=0.1, random_state=42)
dataset_train_compl, dataset_valid_compl = train_test_split(dataset_train_compl, test_size=0.1, random_state=42)

In [8]:

list_lm = [
#     "xlm-roberta-base", 
    "bert-base-multilingual-cased", 
#     "microsoft/deberta-v3-base",
#     "prajjwal1/bert-tiny",
#     "distilbert-base-cased",
#     "roberta-base-openai-detector",
#     "Hello-SimpleAI/chatgpt-detector-roberta"
]

t_metrics = {}

for language_model in list_lm:

    tokenizer = AutoTokenizer.from_pretrained(language_model)

    dataset_train = Dataset.from_pandas(dataset_train_compl)
    dataset_valid = Dataset.from_pandas(dataset_valid_compl)
    dataset_test = Dataset.from_pandas(dataset_test_compl)

    def encode_batch(batch):
        """Encodes a batch of input data using the model tokenizer."""
        return tokenizer(batch["text"], max_length=80, truncation=True, padding="max_length")


    dataset_train = dataset_train.rename_column("label", "labels")
    dataset_train = dataset_train.map(encode_batch, batched=True)
    dataset_train.set_format(type="torch", columns=['input_ids', 'attention_mask', 'labels'])

    dataset_valid = dataset_valid.rename_column("label", "labels")
    dataset_valid = dataset_valid.map(encode_batch, batched=True)
    dataset_valid.set_format(type="torch", columns=['input_ids', 'attention_mask', 'labels'])

    dataset_test = dataset_test.map(encode_batch, batched=True)
    dataset_test = dataset_test.rename_column("label", "labels")
    dataset_test.set_format(type="torch", columns=['input_ids', 'attention_mask', 'labels'])
    
    model = AutoModelForSequenceClassification.from_pretrained(language_model, num_labels=len(df.label.unique()), ignore_mismatched_sizes=True)
  



    def compute_metrics(pred):
        labels = pred.label_ids
        preds = pred.predictions.argmax(-1)
        precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
        acc = accuracy_score(labels, preds)
        return {
            'accuracy': acc,
            'f1': f1,
            'precision': precision,
            'recall': recall
        }

    # data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
    early_stop = EarlyStoppingCallback(3)

    training_args = TrainingArguments(
        learning_rate=1e-6,
        num_train_epochs=10,
        seed = 42,
        output_dir="./training_output_mbert_subtask1",
        # label_names=["generated", "human"]
        per_device_train_batch_size=64,
        per_device_eval_batch_size=64,
        dataloader_num_workers=32,
        logging_steps=100,
        save_total_limit = 2,
        overwrite_output_dir=True,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        report_to='tensorboard',
        metric_for_best_model='f1'
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset_train,
        eval_dataset=dataset_valid,
        compute_metrics=compute_metrics,
        callbacks = [early_stop]
    )

    trainer.train()
    
    t_metrics[language_model] = trainer.evaluate(dataset_test)
    print(t_metrics)
pd.DataFrame([t_metrics]).to_csv("./result/_subtask1_"+str(lang)+".csv")

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model ch

KeyboardInterrupt: 

## Modelling

In [None]:
# model = AutoModelForSequenceClassification.from_pretrained(language_model, num_labels=len(df.label.unique()), ignore_mismatched_sizes=True)
  
# def compute_metrics(pred):
#     labels = pred.label_ids
#     preds = pred.predictions.argmax(-1)
#     precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
#     acc = accuracy_score(labels, preds)
#     return {
#         'accuracy': acc,
#         'f1': f1,
#         'precision': precision,
#         'recall': recall
#     }

# # data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
# early_stop = EarlyStoppingCallback(3)

# training_args = TrainingArguments(
#     learning_rate=1e-6,
#     num_train_epochs=10,
#     seed = 42,
#     output_dir="./training_output_mbert_subtask1",
#     # label_names=["generated", "human"]
#     per_device_train_batch_size=64,
#     per_device_eval_batch_size=64,
#     dataloader_num_workers=32,
#     logging_steps=100,
#     save_total_limit = 2,
#     overwrite_output_dir=True,
#     evaluation_strategy="epoch",
#     save_strategy="epoch",
#     load_best_model_at_end=True,
#     report_to='tensorboard',
#     metric_for_best_model='f1'
# )

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=dataset_train,
#     eval_dataset=dataset_valid,
#     compute_metrics=compute_metrics,
#     callbacks = [early_stop]
# )

# trainer.train()

In [None]:
# t_metrics = trainer.evaluate(dataset_test)
# t_metrics