In [1]:
!pip install transformers datasets accelerate
from google.colab import drive
drive.mount('/content/drive')

Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
Collecting requests (from transformers)
  Downloading requests-2.32.3-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
  import pandas as pd
  from datasets import Dataset, concatenate_datasets
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
  from transformers.optimization import Adafactor, AdafactorSchedule
  import numpy as np
  from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error
  import torch
  import gc
  import time
  output_model = 'mdeberta-reduced-finetuned-c4'

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()

print(device)

cuda:0


In [4]:
label_names = ["human", "machine"] #0, 1
id2label = {idx:label for idx, label in enumerate(label_names)}
label2id = {v:k for k,v in id2label.items()}

In [5]:
def map_labels(example):
  label_name = example["label"]
  return {"label": label2id[label_name], "label_name": label_name}

In [6]:
tokenizer = AutoTokenizer.from_pretrained('microsoft/mdeberta-v3-base')
if tokenizer.pad_token is None:
  if tokenizer.eos_token is not None:
    tokenizer.pad_token = tokenizer.eos_token
  else:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]



In [None]:
model_name = 'microsoft/mdeberta-v3-base-c4'
start = time.time()
num_labels = len(label_names)
model = AutoModelForSequenceClassification.from_pretrained('microsoft/mdeberta-v3-base', num_labels=num_labels, label2id=label2id, id2label=id2label, ignore_mismatched_sizes=True)
model.resize_token_embeddings(len(tokenizer))
try:
  model.config.pad_token_id = tokenizer.get_vocab()[tokenizer.pad_token]
except:
  print("Warning: Exception occured while setting pad_token_id")
end = time.time()
print(f'{model_name} loading took {(end - start)/60} min')
print(f'{model_name} memory footprint {model.get_memory_footprint()/1024/1024/1024}')

pytorch_model.bin:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/mdeberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


microsoft/mdeberta-v3-base-c4 loading took 0.27504371802012123 min
microsoft/mdeberta-v3-base-c4 memory footprint 1.0360860899090767


In [None]:
train = pd.read_csv('/content/drive/MyDrive/multitude_split/multic4.csv')
train = train[train.split == "train"]

In [None]:
# print number of labe 0, 1 for each language
print(train.groupby('language')['label'].value_counts())

language  label
en        1        24149
          0        22819
es        1         7973
          0         7935
ru        0         7999
          1         7908
Name: count, dtype: int64


In [None]:

train_en = train[train.language == "en"].groupby(['label']).apply(lambda x: x.sample(min(8000, len(x)), random_state = 42)).sample(frac=1., random_state = 0).reset_index(drop=True)
train_es = train[train.language == "es"]
train_ru = train[train.language == "ru"]
train = pd.concat([train_en, train_es, train_ru], ignore_index=True, copy=False).sample(frac=1., random_state = 42).reset_index(drop=True)

In [None]:
# print number of labe 0, 1 for each language
print(train.groupby('language')['label'].value_counts())

language  label
en        0        8000
          1        8000
es        1        7973
          0        7935
ru        0        7999
          1        7908
Name: count, dtype: int64


In [None]:
balance=False
train['label'] = ["human" if "human" in x else "machine" for x in train.multi_label]
if(balance):
  train = train.groupby(['label']).apply(lambda x: x.sample(train.label.value_counts().max(), replace=True, random_state = RANDOM_SEED)).sample(frac=1., random_state = RANDOM_SEED).reset_index(drop=True)

valid = train[-(len(train)//10):]
train = train[:-(len(train)//10)]

print(train.groupby('language')['multi_label'].value_counts())
print(train.label.value_counts())

language  multi_label     
en        human               7219
          gpt-3.5-turbo        935
          opt-66b              917
          text-davinci-003     906
          opt-iml-max-1.3b     905
          llama-65b            895
          vicuna-13b           886
          gpt-4                882
          alpaca-lora-30b      863
es        human               7115
          text-davinci-003     900
          opt-iml-max-1.3b     899
          llama-65b            896
          gpt-3.5-turbo        894
          opt-66b              892
          vicuna-13b           891
          gpt-4                887
          alpaca-lora-30b      878
ru        human               7235
          gpt-4                913
          gpt-3.5-turbo        912
          llama-65b            905
          alpaca-lora-30b      896
          text-davinci-003     892
          vicuna-13b           890
          opt-66b              878
          opt-iml-max-1.3b     853
Name: count, dtype: int64
la

In [None]:
train = Dataset.from_pandas(train, split='train')
valid = Dataset.from_pandas(valid, split='validation')
train = train.map(map_labels)
valid = valid.map(map_labels)

Map:   0%|          | 0/43034 [00:00<?, ? examples/s]

Map:   0%|          | 0/4781 [00:00<?, ? examples/s]

In [None]:
def tokenize_texts(examples):
  return tokenizer(examples["text"], truncation=True, max_length=512)

In [None]:
tokenized_train = train.map(tokenize_texts, batched=True)
tokenized_valid = valid.map(tokenize_texts, batched=True)

Map:   0%|          | 0/43034 [00:00<?, ? examples/s]

Map:   0%|          | 0/4781 [00:00<?, ? examples/s]

In [None]:
batch_size = 16
gradient_accumulation_steps=4
num_train_epochs = 10
learning_rate=2e-4
metric_for_best_model = 'MacroF1'
logging_steps = len(tokenized_train) // (batch_size * num_train_epochs)
logging_steps = round(2000 / (batch_size * gradient_accumulation_steps))

In [None]:
# ! pip install -U accelerate
# ! pip install -U transformers

In [None]:
import accelerate
import transformers

transformers.__version__, accelerate.__version__

('4.41.2', '0.31.0')

In [None]:
use_fp16 = True
if "mdeberta" in model_name: use_fp16 = False

args = TrainingArguments(
    output_dir='/content/drive/MyDrive/multitude_split/mdeberta-c4',
    evaluation_strategy = "steps",
    logging_steps = logging_steps, #50,
    save_strategy="steps",
    save_steps = logging_steps, #50,
    save_total_limit=5,
    load_best_model_at_end=True,
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    gradient_checkpointing=True,
    num_train_epochs=num_train_epochs,
    weight_decay=0.01,
    push_to_hub=False,
    report_to="none",
    metric_for_best_model = metric_for_best_model,
    fp16=use_fp16 #mdeberta not working with fp16
)



In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"ACC": accuracy_score(labels, predictions), "MacroF1": f1_score(labels, predictions, average='macro'), "MAE": mean_absolute_error(labels, predictions)}

In [None]:
optimizer = Adafactor(model.parameters(), scale_parameter=True, relative_step=True, warmup_init=True, lr=None)
class MyAdafactorSchedule(AdafactorSchedule):
    def get_lr(self):
        opt = self.optimizer
        if "step" in opt.state[opt.param_groups[0]["params"][0]]:
            lrs = [opt._get_lr(group, opt.state[p]) for group in opt.param_groups for p in group["params"]]
        else:
            lrs = [args.learning_rate] #just to prevent error in some models (mdeberta), return fixed value according to set TrainingArguments
        return lrs #[lrs]
lr_scheduler = MyAdafactorSchedule(optimizer)

In [None]:
trainer = Trainer(
    model,
    args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_valid,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks = [EarlyStoppingCallback(early_stopping_patience=10)],
    optimizers=(optimizer, lr_scheduler)
)

In [None]:
start = time.time()
trainer.train()
end = time.time()
print(f'{model_name} memory footprint {model.get_memory_footprint()/1024/1024/1024}')
print(f'{model_name} fine-tuning took {(end - start)/60} min')



Step,Training Loss,Validation Loss,Acc,Macrof1,Mae
31,0.6937,0.694353,0.494666,0.330954,0.505334
62,0.6716,0.570581,0.84543,0.845234,0.15457
93,0.4145,0.262107,0.935997,0.935669,0.064003
124,0.2596,0.200157,0.938297,0.937984,0.061703
155,0.1899,0.16489,0.938716,0.938404,0.061284
186,0.177,0.162965,0.939552,0.939252,0.060448
217,0.1632,0.116951,0.951893,0.951788,0.048107
248,0.1541,0.215512,0.942062,0.941788,0.057938
279,0.1416,0.093641,0.960259,0.960213,0.039741
310,0.1272,0.088965,0.963606,0.963605,0.036394




Step,Training Loss,Validation Loss,Acc,Macrof1,Mae
31,0.6937,0.694353,0.494666,0.330954,0.505334
62,0.6716,0.570581,0.84543,0.845234,0.15457
93,0.4145,0.262107,0.935997,0.935669,0.064003
124,0.2596,0.200157,0.938297,0.937984,0.061703
155,0.1899,0.16489,0.938716,0.938404,0.061284
186,0.177,0.162965,0.939552,0.939252,0.060448
217,0.1632,0.116951,0.951893,0.951788,0.048107
248,0.1541,0.215512,0.942062,0.941788,0.057938
279,0.1416,0.093641,0.960259,0.960213,0.039741
310,0.1272,0.088965,0.963606,0.963605,0.036394




microsoft/mdeberta-v3-base-c4 memory footprint 1.0360860899090767
microsoft/mdeberta-v3-base-c4 fine-tuning took 220.39182606538137 min


In [None]:
# get model from the trainer
model = trainer.model

In [None]:
import shutil
start = time.time()
shutil.rmtree(output_model, ignore_errors=True)
trainer.save_model()
end = time.time()
print(f'{output_model} saving took {(end - start)/60} min')

mdeberta-reduced-finetuned-c4 saving took 0.09313410520553589 min


In [7]:
test = pd.read_csv('/content/drive/MyDrive/multitude_split/multic4-new.csv')

test = test[test.split == "test"].reset_index(drop=True)
test['label'] = ["human" if "human" in x else "machine" for x in test.multi_label]

In [8]:
from sklearn.metrics import classification_report
from transformers import pipeline


start = time.time()
classifier = pipeline("text-classification", model='/content/drive/MyDrive/multitude_split/mdeberta-c4', device=device, torch_dtype=torch.float16, tokenizer=tokenizer)
end = time.time()
print(f"{output_model.split('/')[-1]} loading took {(end - start)/60} min")
print(f"{output_model.split('/')[-1]} memory footprint {classifier.model.get_memory_footprint()/1024/1024/1024} GB")


mdeberta-reduced-finetuned-c4 loading took 0.3954429030418396 min
mdeberta-reduced-finetuned-c4 memory footprint 0.5180449523031712 GB


In [9]:
from tqdm import tqdm
def predict(df):
  preds = ['unknown'] * len(df)
  scores = [0] * len(df)
  for index, row in tqdm(df.iterrows(), total=len(df)):
    tokenizer_kwargs = {'truncation':True,'max_length':512}
    pred = classifier(row['text'], **tokenizer_kwargs)
    preds[index] = pred[0]['label']
    scores[index] = pred[0]['score']
  return preds, scores

In [10]:
start = time.time()
preds = predict(test)
test['predictions'] = preds[0]
test['prediction_probs'] = preds[1]
end = time.time()
print(f"{output_model.split('/')[-1]} testing took {(end - start)/60} min")
print(f"{output_model.split('/')[-1]} memory footprint {classifier.model.get_memory_footprint()/1024/1024/1024} GB")

  0%|          | 9/51295 [00:01<1:46:38,  8.02it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 51295/51295 [23:23<00:00, 36.56it/s]

mdeberta-reduced-finetuned-c4 testing took 23.386586050192516 min
mdeberta-reduced-finetuned-c4 memory footprint 0.5180449523031712 GB





In [11]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

cr = classification_report(test['label'], test['predictions'], digits=4, zero_division=0)
cm = confusion_matrix(test['label'], test['predictions'])
TN = cm[0][0]
FN = cm[1][0]
TP = cm[1][1]
FP = cm[0][1]
# Fall out or false positive rate
FPR = FP/(FP+TN) if (FP+TN) > 0 else 0
# False negative rate
FNR = FN/(TP+FN) if (TP+FN) > 0 else 0

roc = roc_auc_score([label2id[x] for x in test['label']], [label2id[x] for x in test['predictions']])
roc_prob = roc_auc_score([label2id[x] for x in test['label']], test['prediction_probs'])

print(cm)
print(cr)
print(f"FPR: {FPR}")
print(f"FNR: {FNR}")
print(f"ROC: {roc}")
print(f"ROC_prob: {roc_prob}")

[[23476  1760]
 [  637 25422]]
              precision    recall  f1-score   support

       human     0.9736    0.9303    0.9514     25236
     machine     0.9353    0.9756    0.9550     26059

    accuracy                         0.9533     51295
   macro avg     0.9544    0.9529    0.9532     51295
weighted avg     0.9541    0.9533    0.9532     51295

FPR: 0.06974163892851482
FNR: 0.02444452972101769
ROC: 0.9529069156752338
ROC_prob: 0.23800324210339693


In [None]:
# save the results csv
test.to_csv('/content/drive/MyDrive/multitude_split/mdeberta-c4-results.csv', index=False)

In [None]:
# print cr in a table
pd.DataFrame(cr).transpose()

ValueError: DataFrame constructor not properly called!

In [None]:
# generate a classification report
print(classification_report(test['label'], test['predictions']))

              precision    recall  f1-score   support

       human       0.86      0.31      0.45      3236
     machine       0.92      0.99      0.96     26059

    accuracy                           0.92     29295
   macro avg       0.89      0.65      0.71     29295
weighted avg       0.91      0.92      0.90     29295



In [None]:
# calculate roc auc
from sklearn.metrics import roc_auc_score
roc_auc_score(test['label'], test['prediction_probs'])

0.9152464935161159

In [None]:
roc_auc_score(test['label'], test['predictions'])

0.6328468710657582

In [None]:
# convert the human machine label to 0 1
# test['label'] = test['label'].map({'human': 0, 'machine': 1})
test['predictions'] = test['predictions'].map({'human': 0, 'machine': 1})

In [None]:
# calculate false positive rate
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(test['label'], test['predictions'])
print(fpr)
print(tpr)

[0.         0.72682324 1.        ]
[0.         0.99251698 1.        ]


In [None]:
#

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
data = pd.read_csv('/content/drive/MyDrive/multitude_split/dataset_all.csv')
data.head()

Unnamed: 0,text,label,multi_label,split,language,length,source,word_count,unique_word_count,char_count,...,question_mark_count,exclamation_mark_count,flesch_reading_ease,gunning_fog_index,first_person_pronoun_count,person_entity_count,date_entity_count,uniqueness_bigram,uniqueness_trigram,syntax_variety
0,Der Ausbruch des Coronavirus hat die Entwicklu...,1,text-davinci-003,test,de,174,MULTITuDE_MassiveSumm_spiegel,199.0,118.0,1067.0,...,0.0,0.0,-272.02217,11.15603,0.0,0.0,0.0,0.90404,0.979695,12.0
1,Alex Azar was officially sworn in as the U.S. ...,1,text-davinci-003,train,en,57,MULTITuDE_MassiveSumm_voanews,70.0,54.0,311.0,...,0.0,1.0,-186.793214,8.714286,0.0,5.0,2.0,1.0,1.0,11.0
2,Європейський союз вимагає зупинити розтрату ко...,1,gpt-3.5-turbo,test,uk,105,MULTITuDE_MassiveSumm_interfax,130.0,82.0,691.0,...,0.0,0.0,-269.236538,11.015385,0.0,0.0,0.0,0.860465,0.929688,14.0
3,"Yesterday, hundreds of Zambian university stud...",1,text-davinci-003,train,en,254,MULTITuDE_MassiveSumm_voanews,292.0,149.0,1419.0,...,0.0,0.0,-231.229869,11.4401,1.0,1.0,1.0,0.876289,0.965517,13.0
4,"In a narrow and highly watched vote, the US Se...",1,gpt-4,train,en,416,MULTITuDE_MassiveSumm_voanews,476.0,242.0,2259.0,...,0.0,0.0,-224.855788,13.160504,1.0,2.0,2.0,0.871579,0.974684,15.0


In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("microsoft/mdeberta-v3-base")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]



In [None]:
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=300)

In [None]:
data_test = data[data["split"] == "test"]
data_val = data[data["split"] == "test"]
# select 100 samples from each language following same label distribution
data_val = data_val.groupby("language").apply(lambda x: x.sample(100)).reset_index(drop=True)
data_train = data[data["split"] == "train"]

train_en = data_train[data_train.language == "en"].groupby(['multi_label']).apply(lambda x: x.sample(min(1000, len(x)), random_state = 42)).sample(frac=1., random_state = 0).reset_index(drop=True)
train_es = data_train[data_train.language == "es"]
train_ru = data_train[data_train.language == "ru"]
data_train = pd.concat([train_en, train_es, train_ru], ignore_index=True, copy=False).sample(frac=1., random_state = 42).reset_index(drop=True)
data_train.describe()
# #print no. of each label for each language
# print(data_train.groupby(["language", "label"]).size())
# print(data_test.groupby(["language", "label"]).size())
# print(data_val.groupby(["language", "label"]).size())

data_test = data_test[["text", "label"]]
data_train = data_train[["text", "label"]]
data_val = data_val[["text", "label"]]


In [None]:
import datasets

data_train = datasets.Dataset.from_pandas(data_train)
data_test = datasets.Dataset.from_pandas(data_test)
data_val = datasets.Dataset.from_pandas(data_val)

tokenized_data_train = data_train.map(preprocess_function, batched=True)
tokenized_data_test = data_test.map(preprocess_function, batched=True)
tokenized_data_val = data_val.map(preprocess_function, batched=True)

Map:   0%|          | 0/26817 [00:00<?, ? examples/s]

Map:   0%|          | 0/29295 [00:00<?, ? examples/s]

Map:   0%|          | 0/1100 [00:00<?, ? examples/s]

In [None]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")

In [None]:
import evaluate

accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")
recall = evaluate.load("recall")
precision = evaluate.load("precision")

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.36k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.55k [00:00<?, ?B/s]

In [None]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [None]:
from transformers import TFAutoModelForSequenceClassification, TrainingArguments, Trainer

model = TFAutoModelForSequenceClassification.from_pretrained(
    "intfloat/multilingual-e5-large", num_labels=2)

config.json:   0%|          | 0.00/690 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFXLMRobertaForSequenceClassification: ['embeddings.position_ids']
- This IS expected if you are initializing TFXLMRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFXLMRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFXLMRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for pred

In [None]:
# print the model summary
model.summary()

Model: "tfxlm_roberta_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 roberta (TFXLMRobertaMainL  multiple                  558840832 
 ayer)                                                           
                                                                 
 classifier (TFXLMRobertaCl  multiple                  1051650   
 assificationHead)                                               
                                                                 
Total params: 559892482 (2.09 GB)
Trainable params: 559892482 (2.09 GB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# print details of all layers
for layer in model.layers:
    print(layer.__class__.__name__)  # Print the class name of the layer

    print(layer.name)

TFXLMRobertaMainLayer
roberta
TFXLMRobertaClassificationHead
classifier


In [None]:
from transformers import create_optimizer
import tensorflow as tf

batch_size = 8
num_epochs = 3
batches_per_epoch = len(tokenized_data_train) // batch_size
total_train_steps = int(batches_per_epoch * num_epochs)
optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)

In [None]:
tf_train_set = model.prepare_tf_dataset(
    tokenized_data_train,
    shuffle=True,
    batch_size=8,
    collate_fn=data_collator,
)

tf_validation_set = model.prepare_tf_dataset(
    tokenized_data_val,
    shuffle=False,
    batch_size=8,
    collate_fn=data_collator,
)

In [None]:
model.compile(optimizer=optimizer)

In [None]:
from transformers.keras_callbacks import KerasMetricCallback

metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)

In [None]:
callbacks = [metric_callback]

In [None]:
model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)

Epoch 1/3


Cause: for/else statement not yet supported


Cause: for/else statement not yet supported
Epoch 2/3
Epoch 3/3


<tf_keras.src.callbacks.History at 0x7f4c00657f70>

In [None]:
# prompt: save the model

model.save("/content/drive/MyDrive/multitude_split/e5")


In [None]:
# # prompt: load the saved model
# from transformers import TFAutoModelForSequenceClassification, TrainingArguments, Trainer

# model = TFAutoModelForSequenceClassification.from_pretrained("/content/drive/MyDrive/multitude_split/ne1")


All model checkpoint layers were used when initializing TFDebertaV2ForSequenceClassification.

All the layers of TFDebertaV2ForSequenceClassification were initialized from the model checkpoint at /content/drive/MyDrive/multitude_split/ne1.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDebertaV2ForSequenceClassification for predictions without further training.


In [None]:
model.summary()

Model: "tfxlm_roberta_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 roberta (TFXLMRobertaMainL  multiple                  558840832 
 ayer)                                                           
                                                                 
 classifier (TFXLMRobertaCl  multiple                  1051650   
 assificationHead)                                               
                                                                 
Total params: 559892482 (2.09 GB)
Trainable params: 559892482 (2.09 GB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# # print architecture of each layer
# for layer in model.layers:
#     print(layer.get_config())
#     print(layer.name)

In [None]:
tf_test_set = model.prepare_tf_dataset(
    tokenized_data_test,
    shuffle=False,
    batch_size=16,
    collate_fn=data_collator,
)
predictions = model.predict(tf_test_set)



In [None]:
predictions = np.argmax(predictions.logits, axis=-1)
true_labels = data_test["label"]

In [None]:
predictions = np.argmax(predictions.logits, axis=-1)

In [None]:
predictions

In [None]:
true_labels = data_test["label"]

In [None]:
from sklearn.metrics import classification_report

print(classification_report(true_labels, predictions))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      3236
           1       0.89      1.00      0.94     26059

    accuracy                           0.89     29295
   macro avg       0.44      0.50      0.47     29295
weighted avg       0.79      0.89      0.84     29295



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(true_labels, predictions)

In [None]:
# roc_auc score
from sklearn.metrics import roc_auc_score

roc_auc_score(true_labels, predictions)

0.5

In [None]:
TN = cm[0, 0]
FP = cm[0, 1]
FPR = FP / (FP + TN)

In [None]:
FPR

1.0

In [None]:
FN = cm[1, 0]
TP = cm[1, 1]
FNR = FN / (FN + TP)
FNR

0.0

0.0

In [None]:
model