In [2]:
!pip install transformers datasets accelerate
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
from datasets import Dataset, concatenate_datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
from transformers.optimization import Adafactor, AdafactorSchedule
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error
import torch
import gc
import time
output_model = 'xlm-roberta-large-ft'

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()

print(device)

cuda:0


In [5]:
label_names = ["human", "machine"] #0, 1
id2label = {idx:label for idx, label in enumerate(label_names)}
label2id = {v:k for k,v in id2label.items()}

In [6]:
def map_labels(example):
  label_name = example["label"]
  return {"label": label2id[label_name], "label_name": label_name}

In [8]:
tokenizer = AutoTokenizer.from_pretrained('FacebookAI/xlm-roberta-large')
if tokenizer.pad_token is None:
  if tokenizer.eos_token is not None:
    tokenizer.pad_token = tokenizer.eos_token
  else:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

In [9]:
model_name = 'FacebookAI/xlm-roberta-large'
start = time.time()
num_labels = len(label_names)
model = AutoModelForSequenceClassification.from_pretrained('FacebookAI/xlm-roberta-large', num_labels=num_labels, label2id=label2id, id2label=id2label, ignore_mismatched_sizes=True)
model.resize_token_embeddings(len(tokenizer))
try:
  model.config.pad_token_id = tokenizer.get_vocab()[tokenizer.pad_token]
except:
  print("Warning: Exception occured while setting pad_token_id")
end = time.time()
print(f'{model_name} loading took {(end - start)/60} min')
print(f'{model_name} memory footprint {model.get_memory_footprint()/1024/1024/1024}')

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


FacebookAI/xlm-roberta-large loading took 0.6357929110527039 min
FacebookAI/xlm-roberta-large memory footprint 2.0857696905732155


In [10]:
train = pd.read_csv('/content/drive/MyDrive/multitude_split/dataset_all.csv')
train = train[train.split == "train"]

In [11]:
train_en = train[train.language == "en"].groupby(['multi_label']).apply(lambda x: x.sample(min(1000, len(x)), random_state = 42)).sample(frac=1., random_state = 0).reset_index(drop=True)
train_es = train[train.language == "es"]
train_ru = train[train.language == "ru"]
train = pd.concat([train_en, train_es, train_ru], ignore_index=True, copy=False).sample(frac=1., random_state = 42).reset_index(drop=True)

In [12]:
balance=False
train['label'] = ["human" if "human" in x else "machine" for x in train.multi_label]
if(balance):
  train = train.groupby(['label']).apply(lambda x: x.sample(train.label.value_counts().max(), replace=True, random_state = RANDOM_SEED)).sample(frac=1., random_state = RANDOM_SEED).reset_index(drop=True)

valid = train[-(len(train)//10):]
train = train[:-(len(train)//10)]

print(train.groupby('language')['multi_label'].value_counts())
print(train.label.value_counts())

language  multi_label     
en        llama-65b           915
          human               909
          alpaca-lora-30b     907
          opt-66b             903
          gpt-3.5-turbo       902
          vicuna-13b          894
          gpt-4               889
          opt-iml-max-1.3b    887
          text-davinci-003    886
es        gpt-4               918
          llama-65b           916
          vicuna-13b          903
          gpt-3.5-turbo       896
          opt-66b             896
          opt-iml-max-1.3b    893
          alpaca-lora-30b     887
          text-davinci-003    882
          human               854
ru        llama-65b           909
          gpt-3.5-turbo       898
          text-davinci-003    895
          alpaca-lora-30b     894
          human               892
          gpt-4               890
          vicuna-13b          883
          opt-66b             878
          opt-iml-max-1.3b    860
Name: count, dtype: int64
label
machine    21481
human 

In [13]:
train = Dataset.from_pandas(train, split='train')
valid = Dataset.from_pandas(valid, split='validation')
train = train.map(map_labels)
valid = valid.map(map_labels)

Map:   0%|          | 0/24136 [00:00<?, ? examples/s]

Map:   0%|          | 0/2681 [00:00<?, ? examples/s]

In [14]:
def tokenize_texts(examples):
  return tokenizer(examples["text"], truncation=True, max_length=512)

In [15]:
tokenized_train = train.map(tokenize_texts, batched=True)
tokenized_valid = valid.map(tokenize_texts, batched=True)

Map:   0%|          | 0/24136 [00:00<?, ? examples/s]

Map:   0%|          | 0/2681 [00:00<?, ? examples/s]

In [16]:
batch_size = 16
gradient_accumulation_steps=4
num_train_epochs = 10
learning_rate=2e-4
metric_for_best_model = 'MacroF1'
logging_steps = len(tokenized_train) // (batch_size * num_train_epochs)
logging_steps = round(2000 / (batch_size * gradient_accumulation_steps))

In [17]:
# ! pip install -U accelerate
# ! pip install -U transformers

In [18]:
import accelerate
import transformers

transformers.__version__, accelerate.__version__

('4.41.2', '0.31.0')

In [19]:
use_fp16 = True
if "mdeberta" in model_name: use_fp16 = False

args = TrainingArguments(
    output_dir='/content/drive/MyDrive/multitude_split/xlm-roberta',
    evaluation_strategy = "steps",
    logging_steps = logging_steps, #50,
    save_strategy="steps",
    save_steps = logging_steps, #50,
    save_total_limit=5,
    load_best_model_at_end=True,
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    gradient_checkpointing=True,
    num_train_epochs=num_train_epochs,
    weight_decay=0.01,
    push_to_hub=False,
    report_to="none",
    metric_for_best_model = metric_for_best_model,
    fp16=use_fp16 #mdeberta not working with fp16
)



In [20]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"ACC": accuracy_score(labels, predictions), "MacroF1": f1_score(labels, predictions, average='macro'), "MAE": mean_absolute_error(labels, predictions)}

In [21]:
optimizer = Adafactor(model.parameters(), scale_parameter=True, relative_step=True, warmup_init=True, lr=None)
class MyAdafactorSchedule(AdafactorSchedule):
    def get_lr(self):
        opt = self.optimizer
        if "step" in opt.state[opt.param_groups[0]["params"][0]]:
            lrs = [opt._get_lr(group, opt.state[p]) for group in opt.param_groups for p in group["params"]]
        else:
            lrs = [args.learning_rate] #just to prevent error in some models (mdeberta), return fixed value according to set TrainingArguments
        return lrs #[lrs]
lr_scheduler = MyAdafactorSchedule(optimizer)

In [22]:
trainer = Trainer(
    model,
    args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_valid,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks = [EarlyStoppingCallback(early_stopping_patience=10)],
    optimizers=(optimizer, lr_scheduler)
)

In [23]:
start = time.time()
trainer.train()
end = time.time()
print(f'{model_name} memory footprint {model.get_memory_footprint()/1024/1024/1024}')
print(f'{model_name} fine-tuning took {(end - start)/60} min')



Step,Training Loss,Validation Loss,Acc,Macrof1,Mae
31,0.71,0.425525,0.891831,0.478192,0.108169
62,0.3875,0.329806,0.895188,0.472348,0.104812
93,0.3317,0.321706,0.895188,0.472348,0.104812
124,0.3606,0.282956,0.895188,0.472348,0.104812
155,0.2929,0.225494,0.895188,0.472348,0.104812
186,0.2867,0.208704,0.895188,0.472348,0.104812
217,0.2454,0.196303,0.906005,0.571203,0.093995
248,0.2231,0.180079,0.913838,0.638287,0.086162
279,0.2428,0.4109,0.895188,0.472348,0.104812
310,0.2567,0.16905,0.932861,0.76442,0.067139




FacebookAI/xlm-roberta-large memory footprint 2.0857696905732155
FacebookAI/xlm-roberta-large fine-tuning took 142.3882975856463 min


In [24]:
import shutil
start = time.time()
shutil.rmtree(output_model, ignore_errors=True)
trainer.save_model()
end = time.time()
print(f'{output_model} saving took {(end - start)/60} min')

xlm-roberta-large-ft saving took 0.7460119684537252 min


In [25]:
test = pd.read_csv('/content/drive/MyDrive/multitude_split/dataset_all.csv')

test = test[test.split == "test"].reset_index(drop=True)
test['label'] = ["human" if "human" in x else "machine" for x in test.multi_label]

In [26]:
from sklearn.metrics import classification_report
from transformers import pipeline


start = time.time()
classifier = pipeline("text-classification", model='/content/drive/MyDrive/multitude_split/xlm-roberta', device=device, torch_dtype=torch.float16)
end = time.time()
print(f"{output_model.split('/')[-1]} loading took {(end - start)/60} min")
print(f"{output_model.split('/')[-1]} memory footprint {classifier.model.get_memory_footprint()/1024/1024/1024} GB")


xlm-roberta-large-ft loading took 0.25226621627807616 min
xlm-roberta-large-ft memory footprint 1.0428886748850346 GB


In [27]:
from tqdm import tqdm
def predict(df):
  preds = ['unknown'] * len(df)
  scores = [0] * len(df)
  for index, row in tqdm(df.iterrows(), total=len(df)):
    tokenizer_kwargs = {'truncation':True,'max_length':512}
    pred = classifier(row['text'], **tokenizer_kwargs)
    preds[index] = pred[0]['label']
    scores[index] = pred[0]['score']
  return preds, scores

In [28]:
start = time.time()
preds = predict(test)
test['predictions'] = preds[0]
test['prediction_probs'] = preds[1]
end = time.time()
print(f"{output_model.split('/')[-1]} testing took {(end - start)/60} min")
print(f"{output_model.split('/')[-1]} memory footprint {classifier.model.get_memory_footprint()/1024/1024/1024} GB")

  0%|          | 10/29295 [00:00<31:12, 15.64it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 29295/29295 [14:27<00:00, 33.75it/s]


xlm-roberta-large-ft testing took 14.46712209781011 min
xlm-roberta-large-ft memory footprint 1.0428886748850346 GB


In [29]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

cr = classification_report(test['label'], test['predictions'], digits=4, zero_division=0)
cm = confusion_matrix(test['label'], test['predictions'])
TN = cm[0][0]
FN = cm[1][0]
TP = cm[1][1]
FP = cm[0][1]
# Fall out or false positive rate
FPR = FP/(FP+TN) if (FP+TN) > 0 else 0
# False negative rate
FNR = FN/(TP+FN) if (TP+FN) > 0 else 0

roc = roc_auc_score([label2id[x] for x in test['label']], [label2id[x] for x in test['predictions']])
roc_prob = roc_auc_score([label2id[x] for x in test['label']], test['prediction_probs'])
print('Results bert-multi')
print(cm)
print(cr)
print(f"FPR: {FPR}")
print(f"FNR: {FNR}")
print(f"ROC: {roc}")
print(f"ROC_prob: {roc_prob}")

Results bert-multi
[[ 2065  1171]
 [  863 25196]]
              precision    recall  f1-score   support

       human     0.7053    0.6381    0.6700      3236
     machine     0.9556    0.9669    0.9612     26059

    accuracy                         0.9306     29295
   macro avg     0.8304    0.8025    0.8156     29295
weighted avg     0.9279    0.9306    0.9290     29295

FPR: 0.36186650185414093
FNR: 0.033117157220154264
ROC: 0.8025081704628524
ROC_prob: 0.8658012297472157


In [30]:
#save the results in a csv
test.to_csv('/content/drive/MyDrive/multitude_split/xlm-roberta-results.csv', index=False)

In [None]:
# generate a classification report
print(classification_report(test['label'], test['predictions']))

              precision    recall  f1-score   support

       human       0.54      0.61      0.57      3236
     machine       0.95      0.94      0.94     26059

    accuracy                           0.90     29295
   macro avg       0.75      0.77      0.76     29295
weighted avg       0.91      0.90      0.90     29295



In [None]:
# calculate roc auc
from sklearn.metrics import roc_auc_score
roc_auc_score(test['label'], test['prediction_probs'])

0.9152464935161159

In [None]:
roc_auc_score(test['label'], test['predictions'])

0.6328468710657582

In [None]:
# convert the human machine label to 0 1
# test['label'] = test['label'].map({'human': 0, 'machine': 1})
test['predictions'] = test['predictions'].map({'human': 0, 'machine': 1})

In [None]:
# calculate false positive rate
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(test['label'], test['predictions'])
print(fpr)
print(tpr)

[0.         0.72682324 1.        ]
[0.         0.99251698 1.        ]


In [None]:
#

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
data = pd.read_csv('/content/drive/MyDrive/multitude_split/dataset_all.csv')
data.head()

Unnamed: 0,text,label,multi_label,split,language,length,source,word_count,unique_word_count,char_count,...,question_mark_count,exclamation_mark_count,flesch_reading_ease,gunning_fog_index,first_person_pronoun_count,person_entity_count,date_entity_count,uniqueness_bigram,uniqueness_trigram,syntax_variety
0,Der Ausbruch des Coronavirus hat die Entwicklu...,1,text-davinci-003,test,de,174,MULTITuDE_MassiveSumm_spiegel,199.0,118.0,1067.0,...,0.0,0.0,-272.02217,11.15603,0.0,0.0,0.0,0.90404,0.979695,12.0
1,Alex Azar was officially sworn in as the U.S. ...,1,text-davinci-003,train,en,57,MULTITuDE_MassiveSumm_voanews,70.0,54.0,311.0,...,0.0,1.0,-186.793214,8.714286,0.0,5.0,2.0,1.0,1.0,11.0
2,Європейський союз вимагає зупинити розтрату ко...,1,gpt-3.5-turbo,test,uk,105,MULTITuDE_MassiveSumm_interfax,130.0,82.0,691.0,...,0.0,0.0,-269.236538,11.015385,0.0,0.0,0.0,0.860465,0.929688,14.0
3,"Yesterday, hundreds of Zambian university stud...",1,text-davinci-003,train,en,254,MULTITuDE_MassiveSumm_voanews,292.0,149.0,1419.0,...,0.0,0.0,-231.229869,11.4401,1.0,1.0,1.0,0.876289,0.965517,13.0
4,"In a narrow and highly watched vote, the US Se...",1,gpt-4,train,en,416,MULTITuDE_MassiveSumm_voanews,476.0,242.0,2259.0,...,0.0,0.0,-224.855788,13.160504,1.0,2.0,2.0,0.871579,0.974684,15.0


In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("microsoft/mdeberta-v3-base")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]



In [None]:
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=300)

In [None]:
data_test = data[data["split"] == "test"]
data_val = data[data["split"] == "test"]
# select 100 samples from each language following same label distribution
data_val = data_val.groupby("language").apply(lambda x: x.sample(100)).reset_index(drop=True)
data_train = data[data["split"] == "train"]

train_en = data_train[data_train.language == "en"].groupby(['multi_label']).apply(lambda x: x.sample(min(1000, len(x)), random_state = 42)).sample(frac=1., random_state = 0).reset_index(drop=True)
train_es = data_train[data_train.language == "es"]
train_ru = data_train[data_train.language == "ru"]
data_train = pd.concat([train_en, train_es, train_ru], ignore_index=True, copy=False).sample(frac=1., random_state = 42).reset_index(drop=True)
data_train.describe()
# #print no. of each label for each language
# print(data_train.groupby(["language", "label"]).size())
# print(data_test.groupby(["language", "label"]).size())
# print(data_val.groupby(["language", "label"]).size())

data_test = data_test[["text", "label"]]
data_train = data_train[["text", "label"]]
data_val = data_val[["text", "label"]]


In [None]:
import datasets

data_train = datasets.Dataset.from_pandas(data_train)
data_test = datasets.Dataset.from_pandas(data_test)
data_val = datasets.Dataset.from_pandas(data_val)

tokenized_data_train = data_train.map(preprocess_function, batched=True)
tokenized_data_test = data_test.map(preprocess_function, batched=True)
tokenized_data_val = data_val.map(preprocess_function, batched=True)

Map:   0%|          | 0/26817 [00:00<?, ? examples/s]

Map:   0%|          | 0/29295 [00:00<?, ? examples/s]

Map:   0%|          | 0/1100 [00:00<?, ? examples/s]

In [None]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")

In [None]:
import evaluate

accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")
recall = evaluate.load("recall")
precision = evaluate.load("precision")

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.36k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.55k [00:00<?, ?B/s]

In [None]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [None]:
from transformers import TFAutoModelForSequenceClassification, TrainingArguments, Trainer

model = TFAutoModelForSequenceClassification.from_pretrained(
    "intfloat/multilingual-e5-large", num_labels=2)

config.json:   0%|          | 0.00/690 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFXLMRobertaForSequenceClassification: ['embeddings.position_ids']
- This IS expected if you are initializing TFXLMRobertaForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFXLMRobertaForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFXLMRobertaForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for pred

In [None]:
# print the model summary
model.summary()

Model: "tfxlm_roberta_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 roberta (TFXLMRobertaMainL  multiple                  558840832 
 ayer)                                                           
                                                                 
 classifier (TFXLMRobertaCl  multiple                  1051650   
 assificationHead)                                               
                                                                 
Total params: 559892482 (2.09 GB)
Trainable params: 559892482 (2.09 GB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# print details of all layers
for layer in model.layers:
    print(layer.__class__.__name__)  # Print the class name of the layer

    print(layer.name)

TFXLMRobertaMainLayer
roberta
TFXLMRobertaClassificationHead
classifier


In [None]:
from transformers import create_optimizer
import tensorflow as tf

batch_size = 8
num_epochs = 3
batches_per_epoch = len(tokenized_data_train) // batch_size
total_train_steps = int(batches_per_epoch * num_epochs)
optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)

In [None]:
tf_train_set = model.prepare_tf_dataset(
    tokenized_data_train,
    shuffle=True,
    batch_size=8,
    collate_fn=data_collator,
)

tf_validation_set = model.prepare_tf_dataset(
    tokenized_data_val,
    shuffle=False,
    batch_size=8,
    collate_fn=data_collator,
)

In [None]:
model.compile(optimizer=optimizer)

In [None]:
from transformers.keras_callbacks import KerasMetricCallback

metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)

In [None]:
callbacks = [metric_callback]

In [None]:
model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=3, callbacks=callbacks)

Epoch 1/3


Cause: for/else statement not yet supported


Cause: for/else statement not yet supported
Epoch 2/3
Epoch 3/3


<tf_keras.src.callbacks.History at 0x7f4c00657f70>

In [None]:
# prompt: save the model

model.save("/content/drive/MyDrive/multitude_split/e5")


In [None]:
# # prompt: load the saved model
# from transformers import TFAutoModelForSequenceClassification, TrainingArguments, Trainer

# model = TFAutoModelForSequenceClassification.from_pretrained("/content/drive/MyDrive/multitude_split/ne1")


All model checkpoint layers were used when initializing TFDebertaV2ForSequenceClassification.

All the layers of TFDebertaV2ForSequenceClassification were initialized from the model checkpoint at /content/drive/MyDrive/multitude_split/ne1.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDebertaV2ForSequenceClassification for predictions without further training.


In [None]:
model.summary()

Model: "tfxlm_roberta_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 roberta (TFXLMRobertaMainL  multiple                  558840832 
 ayer)                                                           
                                                                 
 classifier (TFXLMRobertaCl  multiple                  1051650   
 assificationHead)                                               
                                                                 
Total params: 559892482 (2.09 GB)
Trainable params: 559892482 (2.09 GB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# # print architecture of each layer
# for layer in model.layers:
#     print(layer.get_config())
#     print(layer.name)

In [None]:
tf_test_set = model.prepare_tf_dataset(
    tokenized_data_test,
    shuffle=False,
    batch_size=16,
    collate_fn=data_collator,
)
predictions = model.predict(tf_test_set)



In [None]:
predictions = np.argmax(predictions.logits, axis=-1)
true_labels = data_test["label"]

In [None]:
predictions = np.argmax(predictions.logits, axis=-1)

In [None]:
predictions

In [None]:
true_labels = data_test["label"]

In [None]:
from sklearn.metrics import classification_report

print(classification_report(true_labels, predictions))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      3236
           1       0.89      1.00      0.94     26059

    accuracy                           0.89     29295
   macro avg       0.44      0.50      0.47     29295
weighted avg       0.79      0.89      0.84     29295



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(true_labels, predictions)

In [None]:
# roc_auc score
from sklearn.metrics import roc_auc_score

roc_auc_score(true_labels, predictions)

0.5

In [None]:
TN = cm[0, 0]
FP = cm[0, 1]
FPR = FP / (FP + TN)

In [None]:
FPR

1.0

In [None]:
FN = cm[1, 0]
TP = cm[1, 1]
FNR = FN / (FN + TP)
FNR

0.0

0.0

In [None]:
model