In [None]:
# !pip install numpy
# !pip install pandas
# !pip install scikit-learn
# !pip install torch
# !pip install transformers

from transformers import XLNetModel, XLNetTokenizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, hamming_loss, roc_auc_score, average_precision_score
from collections import defaultdict
from torch.cuda.amp import autocast, GradScaler
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import time

## Hyperparameters
MAX_LEN = 512
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 8
TEST_BATCH_SIZE = 8
EPOCHS = 15
LEARNING_RATE = 1e-05
THRESHOLD = 0.5 # threshold for the sigmoid
## Dataset Class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, df, tokenizer, max_len, target_list):
        self.tokenizer = tokenizer
        self.df = df
        self.title = list(df['comment'])
        self.targets = self.df[target_list].values
        self.max_len = max_len

    def __len__(self):
        return len(self.title)

    def __getitem__(self, index):
        title = str(self.title[index])
        title = " ".join(title.split())
        inputs = self.tokenizer.encode_plus(
            title,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'token_type_ids': inputs["token_type_ids"].flatten(),
            'targets': torch.FloatTensor(self.targets[index]),
            'title': title
        }
    
## Data
file = '../data/hatespeech/hate_speech.csv'
df = pd.read_csv(file)
# Split the data into train, validation, and test sets
temp_df, test_df = train_test_split(df, random_state=88, test_size=0.20, shuffle=True)
train_df, val_df = train_test_split(temp_df, random_state=88, test_size=0.20, shuffle=True)

target_list = list(train_df.columns[1:])

## Tokenizer
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
train_dataset = CustomDataset(train_df, tokenizer, MAX_LEN, target_list)
valid_dataset = CustomDataset(val_df, tokenizer, MAX_LEN, target_list)
test_dataset = CustomDataset(test_df, tokenizer, MAX_LEN, target_list)

# print(train_dataset[0])

## Data Loader
train_data_loader = torch.utils.data.DataLoader(train_dataset,
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

val_data_loader = torch.utils.data.DataLoader(valid_dataset,
    batch_size=VALID_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

test_data_loader = torch.utils.data.DataLoader(test_dataset,
    batch_size=TEST_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

## Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

## Model
class XLNETWithCNN(nn.Module):
    def __init__(self, num_classes):
        super(XLNETWithCNN, self).__init__()
        self.xlnet = XLNetModel.from_pretrained('xlnet-base-cased')
        self.drop = nn.Dropout(0.3)
        
        # CNN Layer
        self.conv1 = nn.Conv1d(in_channels=768, out_channels=128, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(kernel_size=2)
        
        # Fully connected layer
        self.fc = nn.Linear(128 * (MAX_LEN // 2), num_classes)  # Adjust based on the pooling size

    def forward(self, input_ids, attention_mask, token_type_ids):
        # Get the last hidden state from XLNet
        outputs = self.xlnet(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        last_hidden_state = outputs[0]  # Shape: (batch_size, sequence_length, hidden_size)
        
        # Apply dropout
        x = self.drop(last_hidden_state)
        
        # Permute to match the input dimensions required by Conv1d: (batch_size, hidden_size, sequence_length)
        x = x.permute(0, 2, 1)
        
        # Apply CNN
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        
        # Flatten the output from the CNN
        x = x.view(x.size(0), -1)
        
        # Fully connected layer for classification
        x = self.fc(x)
        
        return x
    

## Setting the model
model = XLNETWithCNN(num_classes=len(target_list))
model.to(device)

## Loss & Optimizer
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

# define the optimizer
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)


## Training function
def train_model(training_loader, model, optimizer, accumulation_steps=4):
    losses = []
    correct_predictions = 0
    num_samples = 0
    total_batches = len(training_loader)

    # Set model to training mode (activate dropout, batch norm)
    model.train()

    # Mixed precision
    scaler = GradScaler()

    for batch_idx, data in enumerate(training_loader):
        ids = data['input_ids'].to(device, dtype=torch.long)
        mask = data['attention_mask'].to(device, dtype=torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
        targets = data['targets'].to(device, dtype=torch.float)

        # Forward pass with mixed precision
        with autocast():
            outputs = model(ids, mask, token_type_ids)  # (batch, predict) = (8, 8)
            loss = loss_fn(outputs, targets)
            losses.append(loss.item())

        # Training accuracy, apply sigmoid, round (apply threshold 0.5)
        outputs = torch.sigmoid(outputs).cpu().detach().numpy().round()
        targets = targets.cpu().detach().numpy()
        correct_predictions += np.sum(outputs == targets)
        num_samples += targets.size  # Total number of elements in the 2D array

        # Backward pass with gradient accumulation
        loss = loss / accumulation_steps  # Normalize loss to account for accumulation
        scaler.scale(loss).backward()

        # Gradient clipping
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        # Step optimizer every accumulation_steps
        if (batch_idx + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        # Clear GPU cache
        torch.cuda.empty_cache()

    # Perform the final optimizer step if not done already
    if (batch_idx + 1) % accumulation_steps != 0:
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

    # Returning: trained model, model accuracy, mean loss
    return model, float(correct_predictions) / num_samples, np.mean(losses)


## Evaluator Function
def eval_model(validation_loader, model, threshold=0.5, target_list=None):
    model.eval()
    final_targets = []
    final_outputs = []
    final_probs = []
    losses = []

    # Mixed precision
    scaler = torch.cuda.amp.GradScaler()

    with torch.no_grad():
        for data in validation_loader:
            ids = data['input_ids'].to(device, dtype=torch.long)
            mask = data['attention_mask'].to(device, dtype=torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
            targets = data['targets'].to(device, dtype=torch.float)

            # Mixed precision forward pass
            with torch.cuda.amp.autocast():
                outputs = model(ids, mask, token_type_ids)
                loss = loss_fn(outputs, targets)
                losses.append(loss.item())

                probs = torch.sigmoid(outputs).cpu().detach().numpy()
                targets = targets.cpu().detach().numpy()
                final_outputs.extend(probs >= threshold)
                final_probs.extend(probs)
                final_targets.extend(targets)

            # Clear GPU cache
            torch.cuda.empty_cache()

    final_outputs = np.array(final_outputs) >= threshold
    final_probs = np.array(final_probs)
    final_targets = np.array(final_targets)

    # Calculating metrics
    acc = accuracy_score(final_targets, final_outputs)
    f1 = f1_score(final_targets, final_outputs, average='weighted')  # Consider using 'macro' or 'weighted' based on your problem
    precision = precision_score(final_targets, final_outputs, average='weighted')
    recall = recall_score(final_targets, final_outputs, average='weighted')
    hamming = hamming_loss(final_targets, final_outputs)

    auc_roc = roc_auc_score(final_targets, final_probs, average='weighted', multi_class='ovr')
    aupr = average_precision_score(final_targets, final_probs, average='weighted')

    average_loss = np.mean(losses)

    print(f"Accuracy: {acc}")
    print(f"F1 Score: {f1}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"Hamming Loss: {hamming}")
    print(f"Average Loss: {average_loss}")
    print(f"AUC-ROC: {auc_roc}")
    print(f"AUPR: {aupr}")
    print("\nClassification Report:\n", classification_report(final_targets, final_outputs, target_names=target_list))

    print("\n\n")
    return f1, average_loss


## Training & Evaluation
# recording starting time
start = time.time()

history = defaultdict(list)
best_f1 = 0.0

for epoch in range(1, EPOCHS+1):
    print(f'Epoch {epoch}/{EPOCHS}')
    model, train_acc, train_loss = train_model(train_data_loader, model, optimizer)
    val_f1, val_loss = eval_model(val_data_loader, model)

    history['train_acc'].append(train_acc)
    history['train_loss'].append(train_loss)
    history['val_f1'].append(val_f1)
    history['val_loss'].append(val_loss)
    # save the best model
    if val_f1 > best_f1:
        torch.save(model.state_dict(), "hate_XLNET_CNN_8_MLTC_model_state.bin")
        best_f1 = val_f1

# recording end time
end = time.time()
print(f"Total training and validation time: {end - start} seconds")


## Testing
# Loading pretrained model (best model)
print("\n\nTesting\n\n")
model = XLNETWithCNN(num_classes=len(target_list))
model.load_state_dict(torch.load("hate_XLNET_CNN_8_MLTC_model_state.bin"))
model = model.to(device)

# recording starting time
start = time.time()
# Evaluate the model using the test data
eval_model(test_data_loader, model)
# recording end time
end = time.time()
print(f"Test-set evaluation time: {end - start} seconds")

spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


Epoch 1/15


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.15963396034570412
F1 Score: 0.2980211198671963
Precision: 0.358282262233836
Recall: 0.34493452791178497
Hamming Loss: 0.3063548551093035
Average Loss: 0.6061812190990138
AUC-ROC: 0.5742214680406673
AUPR: 0.3960735345044881

Classification Report:
               precision    recall  f1-score   support

           0       0.48      0.86      0.62       918
           1       0.59      0.15      0.23       703
           2       0.00      0.00      0.00       473
           3       0.00      0.00      0.00       316
           4       0.37      0.22      0.27       492

   micro avg       0.47      0.34      0.40      2902
   macro avg       0.29      0.24      0.22      2902
weighted avg       0.36      0.34      0.30      2902
 samples avg       0.40      0.33      0.34      2902




Epoch 2/15


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1443823080833757
F1 Score: 0.3374599601316983
Precision: 0.36044778294727936
Recall: 0.34769124741557544
Hamming Loss: 0.2926283680732079
Average Loss: 0.603744692191845
AUC-ROC: 0.5989133717287708
AUPR: 0.4213995154526646

Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.72      0.60       918
           1       0.53      0.38      0.44       703
           2       0.00      0.00      0.00       473
           3       0.00      0.00      0.00       316
           4       0.41      0.17      0.25       492

   micro avg       0.51      0.35      0.41      2902
   macro avg       0.29      0.25      0.26      2902
weighted avg       0.36      0.35      0.34      2902
 samples avg       0.35      0.33      0.31      2902




Epoch 3/15


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.1423487544483986
F1 Score: 0.2743146445991632
Precision: 0.386555519448305
Recall: 0.23949000689179875
Hamming Loss: 0.27341128622267413
Average Loss: 0.5775272525180646
AUC-ROC: 0.6468315345177721
AUPR: 0.47303570950628676

Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.34      0.44       918
           1       0.58      0.54      0.56       703
           2       0.33      0.00      0.00       473
           3       0.00      0.00      0.00       316
           4       0.00      0.00      0.00       492

   micro avg       0.59      0.24      0.34      2902
   macro avg       0.30      0.18      0.20      2902
weighted avg       0.39      0.24      0.27      2902
 samples avg       0.27      0.23      0.23      2902




Epoch 4/15
Accuracy: 0.2577529232333503
F1 Score: 0.47871409738595416
Precision: 0.6658182777905914
Recall: 0.5206753962784286
Hamming Loss: 0.24250127097102187
Average Loss: 0.5208352448009863

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5/15
Accuracy: 0.3274021352313167
F1 Score: 0.6330206489622804
Precision: 0.6447455251722308
Recall: 0.635079255685734
Hamming Loss: 0.20610066090493137
Average Loss: 0.4838030182006883
AUC-ROC: 0.8129783169838374
AUPR: 0.7113749978053436

Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.80      0.73       918
           1       0.74      0.78      0.76       703
           2       0.54      0.44      0.48       473
           3       0.62      0.54      0.58       316
           4       0.58      0.37      0.45       492

   micro avg       0.66      0.64      0.65      2902
   macro avg       0.63      0.59      0.60      2902
weighted avg       0.64      0.64      0.63      2902
 samples avg       0.65      0.65      0.62      2902






  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6/15
Accuracy: 0.3924758515505846
F1 Score: 0.6690455044671282
Precision: 0.7155284231847135
Recall: 0.6316333563059958
Hamming Loss: 0.18017285205897304
Average Loss: 0.4350611279165842
AUC-ROC: 0.8366600934280495
AUPR: 0.7422687166520601

Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.75      0.76       918
           1       0.82      0.75      0.78       703
           2       0.58      0.46      0.51       473
           3       0.77      0.58      0.66       316
           4       0.58      0.44      0.50       492

   micro avg       0.72      0.63      0.67      2902
   macro avg       0.70      0.60      0.64      2902
weighted avg       0.72      0.63      0.67      2902
 samples avg       0.71      0.65      0.65      2902






  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 7/15
Accuracy: 0.3990849008642603
F1 Score: 0.6674249153217363
Precision: 0.7261648445948212
Recall: 0.6529979324603722
Hamming Loss: 0.17641077783426537
Average Loss: 0.4203185601447656
AUC-ROC: 0.8508851784656412
AUPR: 0.7697857909447913

Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.72      0.77       918
           1       0.67      0.93      0.78       703
           2       0.74      0.33      0.45       473
           3       0.75      0.71      0.73       316
           4       0.61      0.41      0.49       492

   micro avg       0.72      0.65      0.69      2902
   macro avg       0.72      0.62      0.64      2902
weighted avg       0.73      0.65      0.67      2902
 samples avg       0.73      0.67      0.67      2902




Epoch 8/15


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.43416370106761565
F1 Score: 0.7033549902984022
Precision: 0.7298966346323464
Recall: 0.687801516195727
Hamming Loss: 0.16329435688866295
Average Loss: 0.39125727892406587
AUC-ROC: 0.8616534734536546
AUPR: 0.7842663764168376

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.76      0.78       918
           1       0.77      0.87      0.82       703
           2       0.65      0.43      0.52       473
           3       0.80      0.81      0.81       316
           4       0.57      0.46      0.51       492

   micro avg       0.74      0.69      0.71      2902
   macro avg       0.72      0.67      0.69      2902
weighted avg       0.73      0.69      0.70      2902
 samples avg       0.75      0.70      0.70      2902






  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 9/15
Accuracy: 0.44636502287747837
F1 Score: 0.7155173655404174
Precision: 0.7475060038512237
Recall: 0.6950379048931771
Hamming Loss: 0.1559735638027453
Average Loss: 0.37923589615317865
AUC-ROC: 0.8658717665624641
AUPR: 0.7895253507112494

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.74      0.79       918
           1       0.79      0.89      0.84       703
           2       0.66      0.44      0.53       473
           3       0.82      0.83      0.82       316
           4       0.58      0.48      0.52       492

   micro avg       0.76      0.70      0.72      2902
   macro avg       0.73      0.68      0.70      2902
weighted avg       0.75      0.70      0.72      2902
 samples avg       0.77      0.71      0.71      2902






  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 10/15
Accuracy: 0.47839349262836806
F1 Score: 0.7281399118583372
Precision: 0.7719447443516259
Recall: 0.69710544452102
Hamming Loss: 0.1467208947635994
Average Loss: 0.37113169277828884
AUC-ROC: 0.8707920969682064
AUPR: 0.7981691513082106

Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.76      0.80       918
           1       0.86      0.85      0.85       703
           2       0.62      0.52      0.57       473
           3       0.78      0.87      0.83       316
           4       0.66      0.41      0.51       492

   micro avg       0.78      0.70      0.74      2902
   macro avg       0.75      0.68      0.71      2902
weighted avg       0.77      0.70      0.73      2902
 samples avg       0.79      0.71      0.72      2902






  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 11/15
Accuracy: 0.4667005592272496
F1 Score: 0.7059863799597
Precision: 0.7723668433456542
Recall: 0.680565127498277
Hamming Loss: 0.14966954753431622
Average Loss: 0.36374418943272374
AUC-ROC: 0.8737259496411194
AUPR: 0.8025734554463915

Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.81      0.79       918
           1       0.85      0.85      0.85       703
           2       0.72      0.40      0.51       473
           3       0.81      0.89      0.84       316
           4       0.71      0.32      0.44       492

   micro avg       0.78      0.68      0.73      2902
   macro avg       0.77      0.65      0.69      2902
weighted avg       0.77      0.68      0.71      2902
 samples avg       0.78      0.70      0.71      2902




Epoch 12/15


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.4753431621759024
F1 Score: 0.7370212804954993
Precision: 0.7525638456894201
Recall: 0.7291523087525844
Hamming Loss: 0.1480427046263345
Average Loss: 0.37000812178220205
AUC-ROC: 0.8767379076364816
AUPR: 0.8072167189424396

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.80      0.80       918
           1       0.86      0.86      0.86       703
           2       0.60      0.58      0.59       473
           3       0.77      0.90      0.83       316
           4       0.64      0.44      0.52       492

   micro avg       0.76      0.73      0.74      2902
   macro avg       0.73      0.72      0.72      2902
weighted avg       0.75      0.73      0.74      2902
 samples avg       0.78      0.74      0.73      2902






  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 13/15
Accuracy: 0.4941535332994408
F1 Score: 0.7439570618244767
Precision: 0.779261186642337
Recall: 0.7208821502412129
Hamming Loss: 0.14041687849517032
Average Loss: 0.3572882691897997
AUC-ROC: 0.8778966579417572
AUPR: 0.8072324138873542

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.76      0.81       918
           1       0.82      0.91      0.86       703
           2       0.70      0.45      0.55       473
           3       0.89      0.87      0.88       316
           4       0.59      0.54      0.56       492

   micro avg       0.79      0.72      0.75      2902
   macro avg       0.77      0.71      0.73      2902
weighted avg       0.78      0.72      0.74      2902
 samples avg       0.80      0.74      0.74      2902






  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 14/15
Accuracy: 0.4804270462633452
F1 Score: 0.7336636136653334
Precision: 0.7530969986398405
Recall: 0.7305306685044797
Hamming Loss: 0.14641586171835283
Average Loss: 0.3646102364106876
AUC-ROC: 0.8801626188189169
AUPR: 0.8114108545898627

Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.82      0.80       918
           1       0.83      0.90      0.86       703
           2       0.64      0.54      0.59       473
           3       0.78      0.90      0.83       316
           4       0.69      0.40      0.51       492

   micro avg       0.76      0.73      0.75      2902
   macro avg       0.74      0.71      0.72      2902
weighted avg       0.75      0.73      0.73      2902
 samples avg       0.79      0.74      0.73      2902




Epoch 15/15


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5043213014743264
F1 Score: 0.7538665803258737
Precision: 0.7783864598069593
Recall: 0.7356995175740868
Hamming Loss: 0.13645144890696492
Average Loss: 0.3528546757572065
AUC-ROC: 0.8827034536019018
AUPR: 0.8153693492292609

Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.78      0.81       918
           1       0.83      0.91      0.87       703
           2       0.67      0.54      0.60       473
           3       0.87      0.89      0.88       316
           4       0.63      0.50      0.55       492

   micro avg       0.79      0.74      0.76      2902
   macro avg       0.77      0.72      0.74      2902
weighted avg       0.78      0.74      0.75      2902
 samples avg       0.80      0.75      0.74      2902






  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Total training and validation time: 15803.960444927216 seconds


Testing


Accuracy: 0.5091500610004067
F1 Score: 0.7491031202228938
Precision: 0.7715157987456632
Recall: 0.7307259425998874
Hamming Loss: 0.13769825132167549
Average Loss: 0.3536288188165658
AUC-ROC: 0.8831209607277343
AUPR: 0.8041854444080775

Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.78      0.81      1149
           1       0.85      0.89      0.87       861
           2       0.67      0.54      0.60       579
           3       0.85      0.87      0.86       386
           4       0.57      0.49      0.52       579

   micro avg       0.78      0.73      0.75      3554
   macro avg       0.76      0.71      0.73      3554
weighted avg       0.77      0.73      0.75      3554
 samples avg       0.79      0.73      0.73      3554




Test-set evaluation time: 123.64667296409607 seconds


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
