In [1]:
%load_ext autoreload
%autoreload 2

### Import

In [2]:
import sys
sys.path.append('..')
from tqdm import tqdm
import pandas as pd

import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import pickle

import gensim.downloader
from gensim.models import FastText


from sklearn.metrics import classification_report

from constants import CATEGORIES



In [3]:
# CATEGORIES = ['overall_toxic']

#### GPU Usage

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
print(torch.cuda.get_device_name(0))

Using device: cuda
NVIDIA GeForce RTX 3070


### Import Data

In [5]:
df_train = pd.read_parquet('data/df_train_preprocessed.parquet')
df_val = pd.read_parquet('data/df_val_preprocessed.parquet')
df_test = pd.read_parquet('data/df_test_preprocessed.parquet')

In [6]:
preprocess_types = ['baseline',
                    'word_tokenize_no_normalization',
                    'word_tokenize_simple_normalization',
                    'word_tokenize_normalization',
                    'word_tokenize_full_normalization',
                    'bpe_tokenize_no_dup_no_punc_normalization',
                    'bpe_tokenize_simple_dup_normalization',
                    'bpe_tokenize_full_normalization']

#### Utils

In [7]:
def itemget(model_vector, nb_feature: int, item:str):
    try:
        return model_vector[item]
    except:
        return np.zeros(nb_feature)

In [8]:
def mean(list_vectors, nb_feature: int) -> np.ndarray:
    if len(list_vectors) == 0:
        return np.zeros(nb_feature)
    return np.mean(list_vectors, axis=0)

In [10]:
def vectorize_and_split(model_vector, nb_feature,preprocess_type: str) -> tuple:
    X_train = df_train[f"comment_text_{preprocess_type}"].apply(lambda x: mean([itemget(model_vector,nb_feature,word) for word in x.split()], nb_feature))
    X_valid = df_val[f"comment_text_{preprocess_type}"].apply(lambda x: mean([itemget(model_vector,nb_feature,word)  for word in x.split()], nb_feature))
    X_test = df_test[f"comment_text_{preprocess_type}"].apply(lambda x: mean([itemget(model_vector,nb_feature,word)  for word in x.split()], nb_feature))
    return (np.vstack(X_train.to_numpy()), np.vstack(X_valid.to_numpy()), np.vstack(X_test.to_numpy()), model_vector)

In [11]:
def vectorize_split_and_pickle(model, vector, filename):
    Xs_pre_trained = {preprocess_type: vectorize_and_split(model, vector, preprocess_type) for preprocess_type in preprocess_types}
    with open(f'data/{filename}.pickle', 'wb') as file:
        pickle.dump(Xs_pre_trained, file, protocol=pickle.HIGHEST_PROTOCOL)

In [12]:
def input_generator(embeddings,normalization_type):
    X_train, X_val, X_test, _ = embeddings[normalization_type]
    return X_train, X_val, X_test

In [29]:
def calculate_class_weights(df, categories, num_labels):
    class_counts = np.sum(df[categories].values, axis=0)
    class_weights = len(df) / class_counts
    class_weights /= (num_labels + 1)
    return class_weights

In [13]:
dim = 200

In [14]:
class TextDataset(Dataset):
    def __init__(self, vectors, labels, model, vector_size):
        self.vectors = vectors
        self.labels = labels
        self.model = model
        self.vector_size = vector_size

    def __len__(self):
        return self.vectors.shape[0]

    def __getitem__(self, idx):
            label = self.labels[idx]
            embedding = self.vectors[idx]
            embedding = torch.tensor(embedding, dtype=torch.float32)
            label_tensor = torch.tensor(label, dtype=torch.float32)
            return embedding, label_tensor


In [15]:
def prepare_data(vectors, labels, batch_size=1024, vector_size=200, model=None):
    dataset = TextDataset(vectors, labels, model, vector_size)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    input_dim = vector_size
    return loader, input_dim

In [16]:
def evaluate_model(model, test_loader, device, categories):
    model.eval()

    y_pred = []
    y_true = []

    with torch.no_grad():
        for inputs, targets in tqdm(test_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            y_pred.extend(outputs.cpu().numpy())
            y_true.extend(targets.cpu().numpy())

    y_true = torch.tensor(y_true)
    y_pred = torch.tensor(y_pred) > 0.5

    y_true = y_true.numpy()
    y_pred = y_pred.numpy()

    overall_non_toxic = np.zeros(y_pred.shape[0])
    overall_non_toxic[y_pred.sum(axis=1) == 0] = 1
    y_pred = np.hstack([y_pred, overall_non_toxic.reshape(-1,1)])

    overall_non_toxic = np.zeros(y_true.shape[0])
    overall_non_toxic[y_true.sum(axis=1) == 0] = 1
    y_true = np.hstack([y_true, overall_non_toxic.reshape(-1,1)])

    print(classification_report(y_true, y_pred, target_names=categories+['overall_non_toxic']))

### Embeddings

#### Glove

In [17]:
print('\n'.join(gensim.downloader.info()['models'].keys()))
# glove_vectors = gensim.downloader.load(f'glove-twitter-{dim}')
# vectorize_split_and_pickle(glove_vectors, dim, f'glove-twitter-{dim}')

fasttext-wiki-news-subwords-300
conceptnet-numberbatch-17-06-300
word2vec-ruscorpora-300
word2vec-google-news-300
glove-wiki-gigaword-50
glove-wiki-gigaword-100
glove-wiki-gigaword-200
glove-wiki-gigaword-300
glove-twitter-25
glove-twitter-50
glove-twitter-100
glove-twitter-200
__testing_word2vec-matrix-synopsis


#### FastText

In [18]:
def fasttext_model_generator(df_train: pd.DataFrame, normalization_type: str, vector_size = 200, window = 5, min_count = 1, workers = 4):
    tokenized_texts = [text.split() for text in df_train[f'comment_text_{normalization_type}']]
    model = FastText(sentences=tokenized_texts, vector_size=vector_size, window=window, min_count=min_count, workers=workers)
    return model

# fasttext_vectors = fasttext_model_generator(df_train, 'baseline', dim).wv
# vectorize_split_and_pickle(fasttext_vectors, dim, f'fasttext_{dim}d')

In [19]:
with open('data/glove_twitter_200d.pickle', 'rb') as file:
    Xs_w2v_pre_trained = pickle.load(file)

In [20]:
with open('data/fasttext_200d.pickle', 'rb') as file:
    Xs_fasttext_pre_trained= pickle.load(file)

In [37]:
embeddings = [Xs_w2v_pre_trained, Xs_fasttext_pre_trained]

#### Creation du modele

In [22]:
class FNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_labels):
        super(FNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.fc_out = nn.Linear(hidden_dim, num_labels)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = torch.relu(x)
        x = self.fc_out(x)
        x = torch.sigmoid(x)
        return x

In [26]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
        
        train_loss = train_loss / len(train_loader.dataset)
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in tqdm(val_loader):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
        
        val_loss = val_loss / len(val_loader.dataset)
        
        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

#### Initialisation du modèle

In [27]:
model_parameters = {
    'process_types': preprocess_types,
    'embeddings': embeddings,
    'hidden_dim': [32,64,128],
}

batch_size = 1024
epochs = 10

In [40]:
for proceprocess_types in model_parameters['process_types']:
    for embedding in model_parameters['embeddings']:
        for hidden_dim in model_parameters['hidden_dim']:
            if embedding is Xs_w2v_pre_trained:
                print(f'Processing {proceprocess_types} with glove-twitter-{dim} and hidden dimmension {hidden_dim}')
            else:
                print(f'Processing {proceprocess_types} with fasttext-{dim} and hidden dim {hidden_dim}')
            X_train, X_val, X_test = input_generator(embedding, proceprocess_types)
            y_train = df_train[CATEGORIES].values
            y_val = df_val[CATEGORIES].values
            y_test = df_test[CATEGORIES].values

            train_loader, input_dim = prepare_data(X_train, y_train, model=embedding, vector_size=dim)
            val_loader, _ = prepare_data(X_val, y_val, model=embedding, vector_size=dim)
            test_loader, _ = prepare_data(X_test, y_test, model=embedding, vector_size=dim)

            model = FNN(input_dim, hidden_dim, len(CATEGORIES)).to(device)
            criterion = nn.BCELoss(weight=torch.tensor(calculate_class_weights(df_train, CATEGORIES, len(CATEGORIES) + 1)).to(device))
            optimizer = optim.Adam(model.parameters(), lr=0.001)

            train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=epochs)
            evaluate_model(model, test_loader, device, CATEGORIES)
            print('\n\n')

Processing baseline with glove-twitter-200 and hidden dimmension 32


  0%|          | 0/125 [00:00<?, ?it/s]

100%|██████████| 125/125 [00:02<00:00, 59.98it/s]
100%|██████████| 32/32 [00:00<00:00, 43.40it/s]


Epoch 1/10, Train Loss: 4.7827, Val Loss: 2.4324


100%|██████████| 125/125 [00:02<00:00, 60.63it/s]
100%|██████████| 32/32 [00:00<00:00, 66.13it/s]


Epoch 2/10, Train Loss: 1.5972, Val Loss: 1.0181


100%|██████████| 125/125 [00:02<00:00, 61.78it/s]
100%|██████████| 32/32 [00:00<00:00, 48.36it/s]


Epoch 3/10, Train Loss: 0.8193, Val Loss: 0.5148


100%|██████████| 125/125 [00:02<00:00, 62.19it/s]
100%|██████████| 32/32 [00:00<00:00, 67.74it/s]


Epoch 4/10, Train Loss: 0.5831, Val Loss: 0.5740


100%|██████████| 125/125 [00:02<00:00, 61.32it/s]
100%|██████████| 32/32 [00:00<00:00, 48.11it/s]


Epoch 5/10, Train Loss: 0.4844, Val Loss: 0.4600


100%|██████████| 125/125 [00:01<00:00, 62.70it/s]
100%|██████████| 32/32 [00:00<00:00, 69.92it/s]


Epoch 6/10, Train Loss: 0.4346, Val Loss: 0.3913


100%|██████████| 125/125 [00:01<00:00, 64.07it/s]
100%|██████████| 32/32 [00:00<00:00, 48.62it/s]


Epoch 7/10, Train Loss: 0.4068, Val Loss: 0.4247


100%|██████████| 125/125 [00:01<00:00, 63.18it/s]
100%|██████████| 32/32 [00:00<00:00, 69.21it/s]


Epoch 8/10, Train Loss: 0.3873, Val Loss: 0.3947


100%|██████████| 125/125 [00:01<00:00, 63.45it/s]
100%|██████████| 32/32 [00:00<00:00, 48.25it/s]


Epoch 9/10, Train Loss: 0.3746, Val Loss: 0.3661


100%|██████████| 125/125 [00:01<00:00, 62.89it/s]
100%|██████████| 32/32 [00:00<00:00, 68.77it/s]


Epoch 10/10, Train Loss: 0.3658, Val Loss: 0.3651


100%|██████████| 63/63 [00:00<00:00, 67.13it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.69      0.37      0.49      6090
     severe_toxic       0.35      0.06      0.10       367
          obscene       0.71      0.34      0.46      3691
           threat       0.00      0.00      0.00       211
           insult       0.66      0.29      0.40      3427
    identity_hate       0.00      0.00      0.00       712
overall_non_toxic       0.94      0.98      0.96     57735

        micro avg       0.91      0.85      0.88     72233
        macro avg       0.48      0.29      0.34     72233
     weighted avg       0.88      0.85      0.85     72233
      samples avg       0.92      0.91      0.91     72233




Processing baseline with glove-twitter-200 and hidden dimmension 64


100%|██████████| 125/125 [00:02<00:00, 56.86it/s]
100%|██████████| 32/32 [00:00<00:00, 67.13it/s]


Epoch 1/10, Train Loss: 3.0494, Val Loss: 1.1909


100%|██████████| 125/125 [00:02<00:00, 60.65it/s]
100%|██████████| 32/32 [00:00<00:00, 67.28it/s]


Epoch 2/10, Train Loss: 0.8100, Val Loss: 0.6251


100%|██████████| 125/125 [00:02<00:00, 56.94it/s]
100%|██████████| 32/32 [00:00<00:00, 69.49it/s]


Epoch 3/10, Train Loss: 0.5228, Val Loss: 0.4331


100%|██████████| 125/125 [00:02<00:00, 62.37it/s]
100%|██████████| 32/32 [00:00<00:00, 69.84it/s]


Epoch 4/10, Train Loss: 0.4368, Val Loss: 0.4184


100%|██████████| 125/125 [00:02<00:00, 57.63it/s]
100%|██████████| 32/32 [00:00<00:00, 65.51it/s]


Epoch 5/10, Train Loss: 0.3971, Val Loss: 0.3803


100%|██████████| 125/125 [00:02<00:00, 62.44it/s]
100%|██████████| 32/32 [00:00<00:00, 68.58it/s]


Epoch 6/10, Train Loss: 0.3753, Val Loss: 0.3694


100%|██████████| 125/125 [00:02<00:00, 58.57it/s]
100%|██████████| 32/32 [00:00<00:00, 70.85it/s]


Epoch 7/10, Train Loss: 0.3612, Val Loss: 0.3681


100%|██████████| 125/125 [00:01<00:00, 62.97it/s]
100%|██████████| 32/32 [00:00<00:00, 73.13it/s]


Epoch 8/10, Train Loss: 0.3504, Val Loss: 0.3573


100%|██████████| 125/125 [00:02<00:00, 55.60it/s]
100%|██████████| 32/32 [00:00<00:00, 70.03it/s]


Epoch 9/10, Train Loss: 0.3437, Val Loss: 0.3615


100%|██████████| 125/125 [00:01<00:00, 63.56it/s]
100%|██████████| 32/32 [00:00<00:00, 68.07it/s]


Epoch 10/10, Train Loss: 0.3372, Val Loss: 0.3547


100%|██████████| 63/63 [00:01<00:00, 49.66it/s]


                   precision    recall  f1-score   support

            toxic       0.66      0.41      0.50      6090
     severe_toxic       0.36      0.09      0.14       367
          obscene       0.69      0.35      0.46      3691
           threat       1.00      0.00      0.01       211
           insult       0.66      0.32      0.43      3427
    identity_hate       0.65      0.06      0.10       712
overall_non_toxic       0.94      0.98      0.96     57735

        micro avg       0.91      0.85      0.88     72233
        macro avg       0.71      0.31      0.37     72233
     weighted avg       0.88      0.85      0.85     72233
      samples avg       0.92      0.91      0.91     72233




Processing baseline with glove-twitter-200 and hidden dimmension 128


100%|██████████| 125/125 [00:02<00:00, 60.77it/s]
100%|██████████| 32/32 [00:00<00:00, 69.36it/s]


Epoch 1/10, Train Loss: 2.2872, Val Loss: 0.6877


100%|██████████| 125/125 [00:02<00:00, 60.95it/s]
100%|██████████| 32/32 [00:00<00:00, 68.20it/s]


Epoch 2/10, Train Loss: 0.5772, Val Loss: 0.5148


100%|██████████| 125/125 [00:02<00:00, 56.64it/s]
100%|██████████| 32/32 [00:00<00:00, 70.11it/s]


Epoch 3/10, Train Loss: 0.4377, Val Loss: 0.4035


100%|██████████| 125/125 [00:02<00:00, 61.87it/s]
100%|██████████| 32/32 [00:00<00:00, 67.58it/s]


Epoch 4/10, Train Loss: 0.3914, Val Loss: 0.3788


100%|██████████| 125/125 [00:02<00:00, 54.84it/s]
100%|██████████| 32/32 [00:00<00:00, 67.76it/s]


Epoch 5/10, Train Loss: 0.3682, Val Loss: 0.3687


100%|██████████| 125/125 [00:02<00:00, 62.35it/s]
100%|██████████| 32/32 [00:00<00:00, 67.29it/s]


Epoch 6/10, Train Loss: 0.3531, Val Loss: 0.3712


100%|██████████| 125/125 [00:02<00:00, 56.23it/s]
100%|██████████| 32/32 [00:00<00:00, 69.65it/s]


Epoch 7/10, Train Loss: 0.3415, Val Loss: 0.3503


100%|██████████| 125/125 [00:01<00:00, 63.68it/s]
100%|██████████| 32/32 [00:00<00:00, 70.10it/s]


Epoch 8/10, Train Loss: 0.3340, Val Loss: 0.3548


100%|██████████| 125/125 [00:02<00:00, 58.17it/s]
100%|██████████| 32/32 [00:00<00:00, 71.46it/s]


Epoch 9/10, Train Loss: 0.3280, Val Loss: 0.3551


100%|██████████| 125/125 [00:01<00:00, 63.69it/s]
100%|██████████| 32/32 [00:00<00:00, 69.45it/s]


Epoch 10/10, Train Loss: 0.3221, Val Loss: 0.3487


100%|██████████| 63/63 [00:00<00:00, 68.66it/s]


                   precision    recall  f1-score   support

            toxic       0.60      0.48      0.53      6090
     severe_toxic       0.38      0.16      0.23       367
          obscene       0.66      0.40      0.50      3691
           threat       0.76      0.12      0.20       211
           insult       0.65      0.36      0.46      3427
    identity_hate       0.63      0.14      0.23       712
overall_non_toxic       0.94      0.97      0.96     57735

        micro avg       0.90      0.85      0.88     72233
        macro avg       0.66      0.38      0.44     72233
     weighted avg       0.88      0.85      0.86     72233
      samples avg       0.91      0.91      0.91     72233




Processing baseline with fasttext-200 and hidden dim 32


100%|██████████| 125/125 [00:02<00:00, 57.36it/s]
100%|██████████| 32/32 [00:00<00:00, 71.73it/s]


Epoch 1/10, Train Loss: 3.8678, Val Loss: 1.9680


100%|██████████| 125/125 [00:01<00:00, 64.12it/s]
100%|██████████| 32/32 [00:00<00:00, 70.82it/s]


Epoch 2/10, Train Loss: 1.2333, Val Loss: 0.9058


100%|██████████| 125/125 [00:01<00:00, 64.53it/s]
100%|██████████| 32/32 [00:00<00:00, 72.10it/s]


Epoch 3/10, Train Loss: 0.6739, Val Loss: 0.6468


100%|██████████| 125/125 [00:02<00:00, 57.63it/s]
100%|██████████| 32/32 [00:00<00:00, 70.01it/s]


Epoch 4/10, Train Loss: 0.5052, Val Loss: 0.4732


100%|██████████| 125/125 [00:01<00:00, 63.16it/s]
100%|██████████| 32/32 [00:00<00:00, 69.59it/s]


Epoch 5/10, Train Loss: 0.4347, Val Loss: 0.4124


100%|██████████| 125/125 [00:02<00:00, 56.30it/s]
100%|██████████| 32/32 [00:00<00:00, 66.94it/s]


Epoch 6/10, Train Loss: 0.3970, Val Loss: 0.3834


100%|██████████| 125/125 [00:02<00:00, 61.39it/s]
100%|██████████| 32/32 [00:00<00:00, 67.80it/s]


Epoch 7/10, Train Loss: 0.3753, Val Loss: 0.3551


100%|██████████| 125/125 [00:02<00:00, 56.47it/s]
100%|██████████| 32/32 [00:00<00:00, 67.82it/s]


Epoch 8/10, Train Loss: 0.3598, Val Loss: 0.3677


100%|██████████| 125/125 [00:01<00:00, 62.62it/s]
100%|██████████| 32/32 [00:00<00:00, 66.99it/s]


Epoch 9/10, Train Loss: 0.3485, Val Loss: 0.3577


100%|██████████| 125/125 [00:02<00:00, 56.21it/s]
100%|██████████| 32/32 [00:00<00:00, 65.61it/s]


Epoch 10/10, Train Loss: 0.3403, Val Loss: 0.3560


100%|██████████| 63/63 [00:01<00:00, 59.79it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.66      0.46      0.54      6090
     severe_toxic       0.31      0.08      0.13       367
          obscene       0.70      0.41      0.52      3691
           threat       0.00      0.00      0.00       211
           insult       0.65      0.35      0.45      3427
    identity_hate       0.00      0.00      0.00       712
overall_non_toxic       0.94      0.97      0.96     57735

        micro avg       0.91      0.86      0.88     72233
        macro avg       0.47      0.33      0.37     72233
     weighted avg       0.88      0.86      0.86     72233
      samples avg       0.92      0.91      0.91     72233




Processing baseline with fasttext-200 and hidden dim 64


100%|██████████| 125/125 [00:02<00:00, 59.54it/s]
100%|██████████| 32/32 [00:00<00:00, 68.21it/s]


Epoch 1/10, Train Loss: 3.3883, Val Loss: 1.3791


100%|██████████| 125/125 [00:02<00:00, 55.70it/s]
100%|██████████| 32/32 [00:00<00:00, 71.27it/s]


Epoch 2/10, Train Loss: 0.8584, Val Loss: 0.5686


100%|██████████| 125/125 [00:01<00:00, 64.74it/s]
100%|██████████| 32/32 [00:00<00:00, 70.74it/s]


Epoch 3/10, Train Loss: 0.5332, Val Loss: 0.4747


100%|██████████| 125/125 [00:01<00:00, 63.85it/s]
100%|██████████| 32/32 [00:00<00:00, 71.64it/s]


Epoch 4/10, Train Loss: 0.4360, Val Loss: 0.4195


100%|██████████| 125/125 [00:02<00:00, 57.18it/s]
100%|██████████| 32/32 [00:00<00:00, 67.68it/s]


Epoch 5/10, Train Loss: 0.3926, Val Loss: 0.3772


100%|██████████| 125/125 [00:01<00:00, 64.09it/s]
100%|██████████| 32/32 [00:00<00:00, 71.36it/s]


Epoch 6/10, Train Loss: 0.3684, Val Loss: 0.3654


100%|██████████| 125/125 [00:02<00:00, 58.37it/s]
100%|██████████| 32/32 [00:00<00:00, 72.04it/s]


Epoch 7/10, Train Loss: 0.3531, Val Loss: 0.3505


100%|██████████| 125/125 [00:01<00:00, 65.31it/s]
100%|██████████| 32/32 [00:00<00:00, 72.07it/s]


Epoch 8/10, Train Loss: 0.3426, Val Loss: 0.3517


100%|██████████| 125/125 [00:01<00:00, 65.44it/s]
100%|██████████| 32/32 [00:00<00:00, 69.39it/s]


Epoch 9/10, Train Loss: 0.3340, Val Loss: 0.3432


100%|██████████| 125/125 [00:02<00:00, 57.82it/s]
100%|██████████| 32/32 [00:00<00:00, 64.89it/s]


Epoch 10/10, Train Loss: 0.3279, Val Loss: 0.3420


100%|██████████| 63/63 [00:00<00:00, 67.06it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.59      0.53      0.56      6090
     severe_toxic       0.31      0.13      0.19       367
          obscene       0.62      0.47      0.54      3691
           threat       0.00      0.00      0.00       211
           insult       0.60      0.39      0.47      3427
    identity_hate       0.00      0.00      0.00       712
overall_non_toxic       0.95      0.96      0.96     57735

        micro avg       0.90      0.86      0.88     72233
        macro avg       0.44      0.36      0.39     72233
     weighted avg       0.87      0.86      0.86     72233
      samples avg       0.91      0.91      0.91     72233




Processing baseline with fasttext-200 and hidden dim 128


100%|██████████| 125/125 [00:01<00:00, 64.83it/s]
100%|██████████| 32/32 [00:00<00:00, 70.48it/s]


Epoch 1/10, Train Loss: 2.0421, Val Loss: 0.7326


100%|██████████| 125/125 [00:02<00:00, 55.96it/s]
100%|██████████| 32/32 [00:00<00:00, 66.66it/s]


Epoch 2/10, Train Loss: 0.5354, Val Loss: 0.4595


100%|██████████| 125/125 [00:02<00:00, 60.99it/s]
100%|██████████| 32/32 [00:00<00:00, 69.75it/s]


Epoch 3/10, Train Loss: 0.4161, Val Loss: 0.3958


100%|██████████| 125/125 [00:02<00:00, 55.69it/s]
100%|██████████| 32/32 [00:00<00:00, 68.08it/s]


Epoch 4/10, Train Loss: 0.3763, Val Loss: 0.3893


100%|██████████| 125/125 [00:02<00:00, 61.65it/s]
100%|██████████| 32/32 [00:00<00:00, 66.97it/s]


Epoch 5/10, Train Loss: 0.3539, Val Loss: 0.3674


100%|██████████| 125/125 [00:02<00:00, 54.10it/s]
100%|██████████| 32/32 [00:00<00:00, 62.99it/s]


Epoch 6/10, Train Loss: 0.3410, Val Loss: 0.3549


100%|██████████| 125/125 [00:01<00:00, 62.63it/s]
100%|██████████| 32/32 [00:00<00:00, 65.53it/s]


Epoch 7/10, Train Loss: 0.3306, Val Loss: 0.3416


100%|██████████| 125/125 [00:02<00:00, 52.04it/s]
100%|██████████| 32/32 [00:00<00:00, 65.98it/s]


Epoch 8/10, Train Loss: 0.3230, Val Loss: 0.3374


100%|██████████| 125/125 [00:02<00:00, 55.19it/s]
100%|██████████| 32/32 [00:00<00:00, 63.72it/s]


Epoch 9/10, Train Loss: 0.3184, Val Loss: 0.3400


100%|██████████| 125/125 [00:02<00:00, 52.91it/s]
100%|██████████| 32/32 [00:00<00:00, 61.40it/s]


Epoch 10/10, Train Loss: 0.3137, Val Loss: 0.3375


100%|██████████| 63/63 [00:00<00:00, 66.99it/s]


                   precision    recall  f1-score   support

            toxic       0.63      0.51      0.57      6090
     severe_toxic       0.29      0.15      0.20       367
          obscene       0.67      0.46      0.54      3691
           threat       0.71      0.02      0.05       211
           insult       0.63      0.39      0.48      3427
    identity_hate       0.33      0.00      0.01       712
overall_non_toxic       0.95      0.97      0.96     57735

        micro avg       0.90      0.86      0.88     72233
        macro avg       0.60      0.36      0.40     72233
     weighted avg       0.88      0.86      0.87     72233
      samples avg       0.92      0.91      0.91     72233




Processing word_tokenize_no_normalization with glove-twitter-200 and hidden dimmension 32


100%|██████████| 125/125 [00:02<00:00, 51.53it/s]
100%|██████████| 32/32 [00:00<00:00, 61.43it/s]


Epoch 1/10, Train Loss: 5.8514, Val Loss: 3.0696


100%|██████████| 125/125 [00:02<00:00, 60.97it/s]
100%|██████████| 32/32 [00:00<00:00, 67.75it/s]


Epoch 2/10, Train Loss: 1.8658, Val Loss: 1.1840


100%|██████████| 125/125 [00:02<00:00, 54.96it/s]
100%|██████████| 32/32 [00:00<00:00, 68.73it/s]


Epoch 3/10, Train Loss: 0.8700, Val Loss: 0.6587


100%|██████████| 125/125 [00:02<00:00, 62.29it/s]
100%|██████████| 32/32 [00:00<00:00, 67.29it/s]


Epoch 4/10, Train Loss: 0.5857, Val Loss: 0.4908


100%|██████████| 125/125 [00:02<00:00, 54.73it/s]
100%|██████████| 32/32 [00:00<00:00, 59.28it/s]


Epoch 5/10, Train Loss: 0.4704, Val Loss: 0.4630


100%|██████████| 125/125 [00:02<00:00, 58.54it/s]
100%|██████████| 32/32 [00:00<00:00, 61.28it/s]


Epoch 6/10, Train Loss: 0.4140, Val Loss: 0.3941


100%|██████████| 125/125 [00:02<00:00, 52.20it/s]
100%|██████████| 32/32 [00:00<00:00, 64.90it/s]


Epoch 7/10, Train Loss: 0.3813, Val Loss: 0.3951


100%|██████████| 125/125 [00:02<00:00, 58.11it/s]
100%|██████████| 32/32 [00:00<00:00, 65.58it/s]


Epoch 8/10, Train Loss: 0.3606, Val Loss: 0.4394


100%|██████████| 125/125 [00:02<00:00, 52.22it/s]
100%|██████████| 32/32 [00:00<00:00, 67.13it/s]


Epoch 9/10, Train Loss: 0.3452, Val Loss: 0.3562


100%|██████████| 125/125 [00:02<00:00, 62.10it/s]
100%|██████████| 32/32 [00:00<00:00, 68.38it/s]


Epoch 10/10, Train Loss: 0.3348, Val Loss: 0.3403


100%|██████████| 63/63 [00:01<00:00, 62.00it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.56      0.57      0.57      6090
     severe_toxic       0.39      0.03      0.06       367
          obscene       0.68      0.42      0.52      3691
           threat       0.00      0.00      0.00       211
           insult       0.66      0.37      0.48      3427
    identity_hate       0.00      0.00      0.00       712
overall_non_toxic       0.95      0.95      0.95     57735

        micro avg       0.90      0.85      0.87     72233
        macro avg       0.46      0.34      0.37     72233
     weighted avg       0.88      0.85      0.86     72233
      samples avg       0.91      0.90      0.90     72233




Processing word_tokenize_no_normalization with glove-twitter-200 and hidden dimmension 64


100%|██████████| 125/125 [00:02<00:00, 48.76it/s]
100%|██████████| 32/32 [00:00<00:00, 58.92it/s]


Epoch 1/10, Train Loss: 3.7472, Val Loss: 1.5893


100%|██████████| 125/125 [00:02<00:00, 56.90it/s]
100%|██████████| 32/32 [00:00<00:00, 63.51it/s]


Epoch 2/10, Train Loss: 0.9202, Val Loss: 0.7612


100%|██████████| 125/125 [00:02<00:00, 53.77it/s]
100%|██████████| 32/32 [00:00<00:00, 58.00it/s]


Epoch 3/10, Train Loss: 0.5439, Val Loss: 0.4276


100%|██████████| 125/125 [00:02<00:00, 50.36it/s]
100%|██████████| 32/32 [00:00<00:00, 65.24it/s]


Epoch 4/10, Train Loss: 0.4317, Val Loss: 0.3790


100%|██████████| 125/125 [00:02<00:00, 55.88it/s]
100%|██████████| 32/32 [00:00<00:00, 53.25it/s]


Epoch 5/10, Train Loss: 0.3806, Val Loss: 0.3732


100%|██████████| 125/125 [00:02<00:00, 51.25it/s]
100%|██████████| 32/32 [00:00<00:00, 58.82it/s]


Epoch 6/10, Train Loss: 0.3539, Val Loss: 0.3621


100%|██████████| 125/125 [00:02<00:00, 55.22it/s]
100%|██████████| 32/32 [00:00<00:00, 60.97it/s]


Epoch 7/10, Train Loss: 0.3346, Val Loss: 0.3300


100%|██████████| 125/125 [00:02<00:00, 60.54it/s]
100%|██████████| 32/32 [00:00<00:00, 68.27it/s]


Epoch 8/10, Train Loss: 0.3226, Val Loss: 0.3287


100%|██████████| 125/125 [00:02<00:00, 54.40it/s]
100%|██████████| 32/32 [00:00<00:00, 59.82it/s]


Epoch 9/10, Train Loss: 0.3126, Val Loss: 0.3154


100%|██████████| 125/125 [00:02<00:00, 56.33it/s]
100%|██████████| 32/32 [00:00<00:00, 61.52it/s]


Epoch 10/10, Train Loss: 0.3032, Val Loss: 0.3160


100%|██████████| 63/63 [00:00<00:00, 63.10it/s]


                   precision    recall  f1-score   support

            toxic       0.57      0.59      0.58      6090
     severe_toxic       0.34      0.14      0.20       367
          obscene       0.71      0.42      0.53      3691
           threat       0.73      0.05      0.10       211
           insult       0.65      0.40      0.50      3427
    identity_hate       0.64      0.11      0.19       712
overall_non_toxic       0.95      0.95      0.95     57735

        micro avg       0.90      0.85      0.88     72233
        macro avg       0.66      0.38      0.43     72233
     weighted avg       0.89      0.85      0.87     72233
      samples avg       0.91      0.90      0.90     72233




Processing word_tokenize_no_normalization with glove-twitter-200 and hidden dimmension 128


100%|██████████| 125/125 [00:02<00:00, 50.60it/s]
100%|██████████| 32/32 [00:00<00:00, 64.25it/s]


Epoch 1/10, Train Loss: 2.2438, Val Loss: 0.7138


100%|██████████| 125/125 [00:02<00:00, 54.81it/s]
100%|██████████| 32/32 [00:00<00:00, 61.40it/s]


Epoch 2/10, Train Loss: 0.5368, Val Loss: 0.4569


100%|██████████| 125/125 [00:02<00:00, 48.55it/s]
100%|██████████| 32/32 [00:00<00:00, 64.53it/s]


Epoch 3/10, Train Loss: 0.4000, Val Loss: 0.3639


100%|██████████| 125/125 [00:02<00:00, 56.69it/s]
100%|██████████| 32/32 [00:00<00:00, 61.85it/s]


Epoch 4/10, Train Loss: 0.3535, Val Loss: 0.3986


100%|██████████| 125/125 [00:02<00:00, 49.52it/s]
100%|██████████| 32/32 [00:00<00:00, 61.13it/s]


Epoch 5/10, Train Loss: 0.3303, Val Loss: 0.3287


100%|██████████| 125/125 [00:02<00:00, 57.31it/s]
100%|██████████| 32/32 [00:00<00:00, 57.27it/s]


Epoch 6/10, Train Loss: 0.3155, Val Loss: 0.3144


100%|██████████| 125/125 [00:02<00:00, 52.88it/s]
100%|██████████| 32/32 [00:00<00:00, 58.39it/s]


Epoch 7/10, Train Loss: 0.3045, Val Loss: 0.3172


100%|██████████| 125/125 [00:02<00:00, 57.96it/s]
100%|██████████| 32/32 [00:00<00:00, 62.62it/s]


Epoch 8/10, Train Loss: 0.2963, Val Loss: 0.3128


100%|██████████| 125/125 [00:02<00:00, 50.11it/s]
100%|██████████| 32/32 [00:00<00:00, 60.38it/s]


Epoch 9/10, Train Loss: 0.2888, Val Loss: 0.3164


100%|██████████| 125/125 [00:02<00:00, 54.81it/s]
100%|██████████| 32/32 [00:00<00:00, 62.17it/s]


Epoch 10/10, Train Loss: 0.2843, Val Loss: 0.3075


100%|██████████| 63/63 [00:01<00:00, 46.27it/s]


                   precision    recall  f1-score   support

            toxic       0.56      0.62      0.59      6090
     severe_toxic       0.32      0.17      0.23       367
          obscene       0.66      0.46      0.54      3691
           threat       0.78      0.12      0.21       211
           insult       0.66      0.41      0.51      3427
    identity_hate       0.60      0.18      0.28       712
overall_non_toxic       0.96      0.95      0.95     57735

        micro avg       0.90      0.86      0.88     72233
        macro avg       0.65      0.42      0.47     72233
     weighted avg       0.89      0.86      0.87     72233
      samples avg       0.91      0.90      0.90     72233




Processing word_tokenize_no_normalization with fasttext-200 and hidden dim 32


100%|██████████| 125/125 [00:02<00:00, 55.41it/s]
100%|██████████| 32/32 [00:00<00:00, 63.15it/s]


Epoch 1/10, Train Loss: 4.4068, Val Loss: 2.1712


100%|██████████| 125/125 [00:02<00:00, 49.85it/s]
100%|██████████| 32/32 [00:00<00:00, 62.14it/s]


Epoch 2/10, Train Loss: 1.4466, Val Loss: 0.9196


100%|██████████| 125/125 [00:02<00:00, 57.01it/s]
100%|██████████| 32/32 [00:00<00:00, 55.55it/s]


Epoch 3/10, Train Loss: 0.7588, Val Loss: 0.6368


100%|██████████| 125/125 [00:02<00:00, 52.53it/s]
100%|██████████| 32/32 [00:00<00:00, 59.61it/s]


Epoch 4/10, Train Loss: 0.5469, Val Loss: 0.5543


100%|██████████| 125/125 [00:02<00:00, 56.73it/s]
100%|██████████| 32/32 [00:00<00:00, 62.19it/s]


Epoch 5/10, Train Loss: 0.4562, Val Loss: 0.4681


100%|██████████| 125/125 [00:02<00:00, 51.62it/s]
100%|██████████| 32/32 [00:00<00:00, 59.16it/s]


Epoch 6/10, Train Loss: 0.4106, Val Loss: 0.4150


100%|██████████| 125/125 [00:02<00:00, 56.61it/s]
100%|██████████| 32/32 [00:00<00:00, 62.59it/s]


Epoch 7/10, Train Loss: 0.3828, Val Loss: 0.3793


100%|██████████| 125/125 [00:02<00:00, 50.21it/s]
100%|██████████| 32/32 [00:00<00:00, 67.64it/s]


Epoch 8/10, Train Loss: 0.3642, Val Loss: 0.3602


100%|██████████| 125/125 [00:02<00:00, 50.70it/s]
100%|██████████| 32/32 [00:00<00:00, 53.81it/s]


Epoch 9/10, Train Loss: 0.3521, Val Loss: 0.3644


100%|██████████| 125/125 [00:02<00:00, 48.21it/s]
100%|██████████| 32/32 [00:00<00:00, 61.32it/s]


Epoch 10/10, Train Loss: 0.3418, Val Loss: 0.3561


100%|██████████| 63/63 [00:01<00:00, 62.41it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.61      0.52      0.56      6090
     severe_toxic       0.25      0.10      0.14       367
          obscene       0.68      0.44      0.53      3691
           threat       0.00      0.00      0.00       211
           insult       0.62      0.38      0.47      3427
    identity_hate       0.50      0.00      0.00       712
overall_non_toxic       0.95      0.97      0.96     57735

        micro avg       0.90      0.86      0.88     72233
        macro avg       0.52      0.34      0.38     72233
     weighted avg       0.88      0.86      0.86     72233
      samples avg       0.92      0.91      0.91     72233




Processing word_tokenize_no_normalization with fasttext-200 and hidden dim 64


100%|██████████| 125/125 [00:02<00:00, 57.76it/s]
100%|██████████| 32/32 [00:00<00:00, 68.17it/s]


Epoch 1/10, Train Loss: 3.5495, Val Loss: 1.4878


100%|██████████| 125/125 [00:02<00:00, 49.04it/s]
100%|██████████| 32/32 [00:00<00:00, 57.78it/s]


Epoch 2/10, Train Loss: 0.8932, Val Loss: 0.5495


100%|██████████| 125/125 [00:02<00:00, 56.95it/s]
100%|██████████| 32/32 [00:00<00:00, 65.34it/s]


Epoch 3/10, Train Loss: 0.5435, Val Loss: 0.4972


100%|██████████| 125/125 [00:02<00:00, 50.08it/s]
100%|██████████| 32/32 [00:00<00:00, 64.94it/s]


Epoch 4/10, Train Loss: 0.4391, Val Loss: 0.4275


100%|██████████| 125/125 [00:02<00:00, 55.31it/s]
100%|██████████| 32/32 [00:00<00:00, 60.12it/s]


Epoch 5/10, Train Loss: 0.3931, Val Loss: 0.4018


100%|██████████| 125/125 [00:02<00:00, 51.04it/s]
100%|██████████| 32/32 [00:00<00:00, 65.42it/s]


Epoch 6/10, Train Loss: 0.3662, Val Loss: 0.3676


100%|██████████| 125/125 [00:02<00:00, 58.88it/s]
100%|██████████| 32/32 [00:00<00:00, 64.29it/s]


Epoch 7/10, Train Loss: 0.3510, Val Loss: 0.3526


100%|██████████| 125/125 [00:02<00:00, 52.30it/s]
100%|██████████| 32/32 [00:00<00:00, 64.46it/s]


Epoch 8/10, Train Loss: 0.3391, Val Loss: 0.3621


100%|██████████| 125/125 [00:02<00:00, 58.47it/s]
100%|██████████| 32/32 [00:00<00:00, 65.18it/s]


Epoch 9/10, Train Loss: 0.3321, Val Loss: 0.3400


100%|██████████| 125/125 [00:02<00:00, 50.53it/s]
100%|██████████| 32/32 [00:00<00:00, 65.10it/s]


Epoch 10/10, Train Loss: 0.3253, Val Loss: 0.3421


100%|██████████| 63/63 [00:00<00:00, 64.97it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.61      0.53      0.57      6090
     severe_toxic       0.25      0.14      0.18       367
          obscene       0.66      0.47      0.55      3691
           threat       0.00      0.00      0.00       211
           insult       0.63      0.40      0.49      3427
    identity_hate       0.00      0.00      0.00       712
overall_non_toxic       0.95      0.96      0.96     57735

        micro avg       0.90      0.86      0.88     72233
        macro avg       0.44      0.36      0.39     72233
     weighted avg       0.87      0.86      0.86     72233
      samples avg       0.92      0.91      0.91     72233




Processing word_tokenize_no_normalization with fasttext-200 and hidden dim 128


100%|██████████| 125/125 [00:02<00:00, 57.82it/s]
100%|██████████| 32/32 [00:00<00:00, 69.46it/s]


Epoch 1/10, Train Loss: 2.4305, Val Loss: 0.8152


100%|██████████| 125/125 [00:02<00:00, 55.63it/s]
100%|██████████| 32/32 [00:00<00:00, 66.83it/s]


Epoch 2/10, Train Loss: 0.5820, Val Loss: 0.5062


100%|██████████| 125/125 [00:02<00:00, 60.56it/s]
100%|██████████| 32/32 [00:00<00:00, 67.14it/s]


Epoch 3/10, Train Loss: 0.4300, Val Loss: 0.3838


100%|██████████| 125/125 [00:02<00:00, 59.39it/s]
100%|██████████| 32/32 [00:00<00:00, 66.95it/s]


Epoch 4/10, Train Loss: 0.3800, Val Loss: 0.3828


100%|██████████| 125/125 [00:02<00:00, 53.12it/s]
100%|██████████| 32/32 [00:00<00:00, 68.14it/s]


Epoch 5/10, Train Loss: 0.3555, Val Loss: 0.3607


100%|██████████| 125/125 [00:02<00:00, 58.31it/s]
100%|██████████| 32/32 [00:00<00:00, 66.40it/s]


Epoch 6/10, Train Loss: 0.3398, Val Loss: 0.3457


100%|██████████| 125/125 [00:02<00:00, 52.20it/s]
100%|██████████| 32/32 [00:00<00:00, 63.03it/s]


Epoch 7/10, Train Loss: 0.3295, Val Loss: 0.3424


100%|██████████| 125/125 [00:02<00:00, 56.86it/s]
100%|██████████| 32/32 [00:00<00:00, 65.53it/s]


Epoch 8/10, Train Loss: 0.3224, Val Loss: 0.3486


100%|██████████| 125/125 [00:02<00:00, 57.16it/s]
100%|██████████| 32/32 [00:00<00:00, 63.00it/s]


Epoch 9/10, Train Loss: 0.3158, Val Loss: 0.3453


100%|██████████| 125/125 [00:02<00:00, 56.91it/s]
100%|██████████| 32/32 [00:00<00:00, 42.89it/s]


Epoch 10/10, Train Loss: 0.3100, Val Loss: 0.3496


100%|██████████| 63/63 [00:01<00:00, 61.19it/s]


                   precision    recall  f1-score   support

            toxic       0.55      0.59      0.57      6090
     severe_toxic       0.35      0.16      0.22       367
          obscene       0.65      0.49      0.56      3691
           threat       0.50      0.00      0.01       211
           insult       0.60      0.43      0.50      3427
    identity_hate       0.50      0.01      0.02       712
overall_non_toxic       0.96      0.95      0.95     57735

        micro avg       0.89      0.86      0.87     72233
        macro avg       0.59      0.38      0.40     72233
     weighted avg       0.88      0.86      0.86     72233
      samples avg       0.91      0.90      0.90     72233




Processing word_tokenize_simple_normalization with glove-twitter-200 and hidden dimmension 32


100%|██████████| 125/125 [00:02<00:00, 59.04it/s]
100%|██████████| 32/32 [00:00<00:00, 63.99it/s]


Epoch 1/10, Train Loss: 4.8613, Val Loss: 2.6289


100%|██████████| 125/125 [00:02<00:00, 53.16it/s]
100%|██████████| 32/32 [00:00<00:00, 62.73it/s]


Epoch 2/10, Train Loss: 1.5171, Val Loss: 0.9462


100%|██████████| 125/125 [00:02<00:00, 58.07it/s]
100%|██████████| 32/32 [00:00<00:00, 64.26it/s]


Epoch 3/10, Train Loss: 0.7391, Val Loss: 0.5502


100%|██████████| 125/125 [00:02<00:00, 51.66it/s]
100%|██████████| 32/32 [00:00<00:00, 63.06it/s]


Epoch 4/10, Train Loss: 0.5091, Val Loss: 0.4036


100%|██████████| 125/125 [00:02<00:00, 58.40it/s]
100%|██████████| 32/32 [00:00<00:00, 64.20it/s]


Epoch 5/10, Train Loss: 0.4135, Val Loss: 0.3749


100%|██████████| 125/125 [00:02<00:00, 51.39it/s]
100%|██████████| 32/32 [00:00<00:00, 61.70it/s]


Epoch 6/10, Train Loss: 0.3642, Val Loss: 0.3586


100%|██████████| 125/125 [00:02<00:00, 57.05it/s]
100%|██████████| 32/32 [00:00<00:00, 62.27it/s]


Epoch 7/10, Train Loss: 0.3348, Val Loss: 0.3257


100%|██████████| 125/125 [00:02<00:00, 52.02it/s]
100%|██████████| 32/32 [00:00<00:00, 62.21it/s]


Epoch 8/10, Train Loss: 0.3155, Val Loss: 0.3089


100%|██████████| 125/125 [00:02<00:00, 58.48it/s]
100%|██████████| 32/32 [00:00<00:00, 61.83it/s]


Epoch 9/10, Train Loss: 0.3010, Val Loss: 0.3242


100%|██████████| 125/125 [00:02<00:00, 55.61it/s]
100%|██████████| 32/32 [00:00<00:00, 65.20it/s]


Epoch 10/10, Train Loss: 0.2903, Val Loss: 0.3106


100%|██████████| 63/63 [00:01<00:00, 60.73it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.67      0.52      0.59      6090
     severe_toxic       0.00      0.00      0.00       367
          obscene       0.73      0.47      0.57      3691
           threat       0.00      0.00      0.00       211
           insult       0.68      0.42      0.52      3427
    identity_hate       0.00      0.00      0.00       712
overall_non_toxic       0.95      0.97      0.96     57735

        micro avg       0.91      0.87      0.89     72233
        macro avg       0.43      0.34      0.38     72233
     weighted avg       0.88      0.87      0.87     72233
      samples avg       0.93      0.92      0.92     72233




Processing word_tokenize_simple_normalization with glove-twitter-200 and hidden dimmension 64


100%|██████████| 125/125 [00:02<00:00, 56.83it/s]
100%|██████████| 32/32 [00:00<00:00, 63.63it/s]


Epoch 1/10, Train Loss: 2.6938, Val Loss: 0.9202


100%|██████████| 125/125 [00:02<00:00, 52.06it/s]
100%|██████████| 32/32 [00:00<00:00, 66.55it/s]


Epoch 2/10, Train Loss: 0.6826, Val Loss: 0.5257


100%|██████████| 125/125 [00:02<00:00, 59.84it/s]
100%|██████████| 32/32 [00:00<00:00, 65.46it/s]


Epoch 3/10, Train Loss: 0.4308, Val Loss: 0.4036


100%|██████████| 125/125 [00:02<00:00, 50.20it/s]
100%|██████████| 32/32 [00:00<00:00, 63.37it/s]


Epoch 4/10, Train Loss: 0.3528, Val Loss: 0.3488


100%|██████████| 125/125 [00:02<00:00, 59.25it/s]
100%|██████████| 32/32 [00:00<00:00, 59.77it/s]


Epoch 5/10, Train Loss: 0.3152, Val Loss: 0.3132


100%|██████████| 125/125 [00:02<00:00, 53.38it/s]
100%|██████████| 32/32 [00:00<00:00, 68.21it/s]


Epoch 6/10, Train Loss: 0.2923, Val Loss: 0.2994


100%|██████████| 125/125 [00:02<00:00, 62.38it/s]
100%|██████████| 32/32 [00:00<00:00, 68.19it/s]


Epoch 7/10, Train Loss: 0.2778, Val Loss: 0.2900


100%|██████████| 125/125 [00:02<00:00, 54.25it/s]
100%|██████████| 32/32 [00:00<00:00, 63.87it/s]


Epoch 8/10, Train Loss: 0.2665, Val Loss: 0.2868


100%|██████████| 125/125 [00:02<00:00, 56.74it/s]
100%|██████████| 32/32 [00:00<00:00, 65.40it/s]


Epoch 9/10, Train Loss: 0.2573, Val Loss: 0.2872


100%|██████████| 125/125 [00:02<00:00, 52.64it/s]
100%|██████████| 32/32 [00:00<00:00, 58.35it/s]


Epoch 10/10, Train Loss: 0.2499, Val Loss: 0.2740


100%|██████████| 63/63 [00:01<00:00, 62.02it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.61      0.65      0.63      6090
     severe_toxic       0.34      0.28      0.31       367
          obscene       0.70      0.54      0.61      3691
           threat       0.00      0.00      0.00       211
           insult       0.68      0.50      0.58      3427
    identity_hate       0.68      0.13      0.22       712
overall_non_toxic       0.96      0.96      0.96     57735

        micro avg       0.90      0.87      0.89     72233
        macro avg       0.57      0.44      0.47     72233
     weighted avg       0.90      0.87      0.88     72233
      samples avg       0.92      0.91      0.91     72233




Processing word_tokenize_simple_normalization with glove-twitter-200 and hidden dimmension 128


100%|██████████| 125/125 [00:02<00:00, 57.04it/s]
100%|██████████| 32/32 [00:00<00:00, 64.62it/s]


Epoch 1/10, Train Loss: 2.4470, Val Loss: 0.7580


100%|██████████| 125/125 [00:02<00:00, 54.88it/s]
100%|██████████| 32/32 [00:00<00:00, 66.52it/s]


Epoch 2/10, Train Loss: 0.5271, Val Loss: 0.4114


100%|██████████| 125/125 [00:02<00:00, 60.90it/s]
100%|██████████| 32/32 [00:00<00:00, 65.02it/s]


Epoch 3/10, Train Loss: 0.3690, Val Loss: 0.3148


100%|██████████| 125/125 [00:02<00:00, 61.07it/s]
100%|██████████| 32/32 [00:00<00:00, 44.02it/s]


Epoch 4/10, Train Loss: 0.3152, Val Loss: 0.3053


100%|██████████| 125/125 [00:02<00:00, 56.90it/s]
100%|██████████| 32/32 [00:00<00:00, 67.04it/s]


Epoch 5/10, Train Loss: 0.2880, Val Loss: 0.2841


100%|██████████| 125/125 [00:02<00:00, 57.63it/s]
100%|██████████| 32/32 [00:00<00:00, 38.09it/s]


Epoch 6/10, Train Loss: 0.2701, Val Loss: 0.3032


100%|██████████| 125/125 [00:02<00:00, 56.41it/s]
100%|██████████| 32/32 [00:00<00:00, 61.53it/s]


Epoch 7/10, Train Loss: 0.2594, Val Loss: 0.2730


100%|██████████| 125/125 [00:02<00:00, 53.51it/s]
100%|██████████| 32/32 [00:00<00:00, 64.74it/s]


Epoch 8/10, Train Loss: 0.2486, Val Loss: 0.2701


100%|██████████| 125/125 [00:02<00:00, 48.35it/s]
100%|██████████| 32/32 [00:00<00:00, 61.87it/s]


Epoch 9/10, Train Loss: 0.2403, Val Loss: 0.2753


100%|██████████| 125/125 [00:02<00:00, 53.81it/s]
100%|██████████| 32/32 [00:00<00:00, 49.99it/s]


Epoch 10/10, Train Loss: 0.2342, Val Loss: 0.2641


100%|██████████| 63/63 [00:01<00:00, 51.23it/s]


                   precision    recall  f1-score   support

            toxic       0.62      0.63      0.63      6090
     severe_toxic       0.32      0.32      0.32       367
          obscene       0.70      0.55      0.62      3691
           threat       0.69      0.14      0.23       211
           insult       0.68      0.50      0.57      3427
    identity_hate       0.65      0.27      0.38       712
overall_non_toxic       0.96      0.96      0.96     57735

        micro avg       0.90      0.88      0.89     72233
        macro avg       0.66      0.48      0.53     72233
     weighted avg       0.90      0.88      0.88     72233
      samples avg       0.92      0.91      0.91     72233




Processing word_tokenize_simple_normalization with fasttext-200 and hidden dim 32


100%|██████████| 125/125 [00:02<00:00, 45.15it/s]
100%|██████████| 32/32 [00:00<00:00, 62.03it/s]


Epoch 1/10, Train Loss: 4.6335, Val Loss: 2.1761


100%|██████████| 125/125 [00:02<00:00, 49.53it/s]
100%|██████████| 32/32 [00:00<00:00, 56.31it/s]


Epoch 2/10, Train Loss: 1.5223, Val Loss: 0.8917


100%|██████████| 125/125 [00:02<00:00, 46.34it/s]
100%|██████████| 32/32 [00:00<00:00, 59.26it/s]


Epoch 3/10, Train Loss: 0.7703, Val Loss: 0.6305


100%|██████████| 125/125 [00:02<00:00, 53.08it/s]
100%|██████████| 32/32 [00:00<00:00, 61.30it/s]


Epoch 4/10, Train Loss: 0.5437, Val Loss: 0.4887


100%|██████████| 125/125 [00:02<00:00, 45.77it/s]
100%|██████████| 32/32 [00:00<00:00, 58.74it/s]


Epoch 5/10, Train Loss: 0.4480, Val Loss: 0.4420


100%|██████████| 125/125 [00:02<00:00, 52.42it/s]
100%|██████████| 32/32 [00:00<00:00, 61.03it/s]


Epoch 6/10, Train Loss: 0.3998, Val Loss: 0.3824


100%|██████████| 125/125 [00:02<00:00, 44.90it/s]
100%|██████████| 32/32 [00:00<00:00, 47.30it/s]


Epoch 7/10, Train Loss: 0.3715, Val Loss: 0.3710


100%|██████████| 125/125 [00:02<00:00, 52.07it/s]
100%|██████████| 32/32 [00:00<00:00, 59.49it/s]


Epoch 8/10, Train Loss: 0.3514, Val Loss: 0.3489


100%|██████████| 125/125 [00:02<00:00, 44.98it/s]
100%|██████████| 32/32 [00:00<00:00, 60.72it/s]


Epoch 9/10, Train Loss: 0.3382, Val Loss: 0.3381


100%|██████████| 125/125 [00:02<00:00, 51.98it/s]
100%|██████████| 32/32 [00:00<00:00, 58.92it/s]


Epoch 10/10, Train Loss: 0.3277, Val Loss: 0.3964


100%|██████████| 63/63 [00:01<00:00, 53.26it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.61      0.54      0.57      6090
     severe_toxic       0.31      0.09      0.14       367
          obscene       0.65      0.49      0.56      3691
           threat       0.00      0.00      0.00       211
           insult       0.63      0.43      0.51      3427
    identity_hate       0.00      0.00      0.00       712
overall_non_toxic       0.95      0.96      0.96     57735

        micro avg       0.90      0.86      0.88     72233
        macro avg       0.45      0.36      0.39     72233
     weighted avg       0.88      0.86      0.87     72233
      samples avg       0.92      0.91      0.91     72233




Processing word_tokenize_simple_normalization with fasttext-200 and hidden dim 64


100%|██████████| 125/125 [00:02<00:00, 44.43it/s]
100%|██████████| 32/32 [00:00<00:00, 49.25it/s]


Epoch 1/10, Train Loss: 3.1090, Val Loss: 1.1734


100%|██████████| 125/125 [00:02<00:00, 46.67it/s]
100%|██████████| 32/32 [00:00<00:00, 51.97it/s]


Epoch 2/10, Train Loss: 0.8049, Val Loss: 0.6327


100%|██████████| 125/125 [00:02<00:00, 47.01it/s]
100%|██████████| 32/32 [00:00<00:00, 54.27it/s]


Epoch 3/10, Train Loss: 0.5082, Val Loss: 0.4482


100%|██████████| 125/125 [00:03<00:00, 41.20it/s]
100%|██████████| 32/32 [00:00<00:00, 49.06it/s]


Epoch 4/10, Train Loss: 0.4160, Val Loss: 0.4290


100%|██████████| 125/125 [00:02<00:00, 47.30it/s]
100%|██████████| 32/32 [00:00<00:00, 50.04it/s]


Epoch 5/10, Train Loss: 0.3723, Val Loss: 0.3511


100%|██████████| 125/125 [00:03<00:00, 39.64it/s]
100%|██████████| 32/32 [00:00<00:00, 58.86it/s]


Epoch 6/10, Train Loss: 0.3471, Val Loss: 0.3455


100%|██████████| 125/125 [00:02<00:00, 51.71it/s]
100%|██████████| 32/32 [00:00<00:00, 60.58it/s]


Epoch 7/10, Train Loss: 0.3317, Val Loss: 0.3324


100%|██████████| 125/125 [00:02<00:00, 48.13it/s]
100%|██████████| 32/32 [00:00<00:00, 56.92it/s]


Epoch 8/10, Train Loss: 0.3206, Val Loss: 0.3463


100%|██████████| 125/125 [00:02<00:00, 45.17it/s]
100%|██████████| 32/32 [00:00<00:00, 57.54it/s]


Epoch 9/10, Train Loss: 0.3124, Val Loss: 0.3214


100%|██████████| 125/125 [00:02<00:00, 54.70it/s]
100%|██████████| 32/32 [00:00<00:00, 59.84it/s]


Epoch 10/10, Train Loss: 0.3067, Val Loss: 0.3221


100%|██████████| 63/63 [00:01<00:00, 57.32it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.60      0.55      0.58      6090
     severe_toxic       0.29      0.15      0.20       367
          obscene       0.68      0.52      0.59      3691
           threat       0.00      0.00      0.00       211
           insult       0.65      0.44      0.52      3427
    identity_hate       0.50      0.01      0.01       712
overall_non_toxic       0.95      0.96      0.96     57735

        micro avg       0.90      0.86      0.88     72233
        macro avg       0.52      0.38      0.41     72233
     weighted avg       0.88      0.86      0.87     72233
      samples avg       0.92      0.91      0.91     72233




Processing word_tokenize_simple_normalization with fasttext-200 and hidden dim 128


100%|██████████| 125/125 [00:02<00:00, 42.99it/s]
100%|██████████| 32/32 [00:00<00:00, 62.13it/s]


Epoch 1/10, Train Loss: 2.1646, Val Loss: 0.7689


100%|██████████| 125/125 [00:02<00:00, 54.66it/s]
100%|██████████| 32/32 [00:00<00:00, 61.20it/s]


Epoch 2/10, Train Loss: 0.5428, Val Loss: 0.4472


100%|██████████| 125/125 [00:02<00:00, 49.23it/s]
100%|██████████| 32/32 [00:00<00:00, 59.94it/s]


Epoch 3/10, Train Loss: 0.4066, Val Loss: 0.3623


100%|██████████| 125/125 [00:02<00:00, 56.57it/s]
100%|██████████| 32/32 [00:00<00:00, 62.83it/s]


Epoch 4/10, Train Loss: 0.3599, Val Loss: 0.3727


100%|██████████| 125/125 [00:02<00:00, 51.48it/s]
100%|██████████| 32/32 [00:00<00:00, 66.30it/s]


Epoch 5/10, Train Loss: 0.3365, Val Loss: 0.3338


100%|██████████| 125/125 [00:02<00:00, 60.24it/s]
100%|██████████| 32/32 [00:00<00:00, 66.95it/s]


Epoch 6/10, Train Loss: 0.3210, Val Loss: 0.3316


100%|██████████| 125/125 [00:02<00:00, 53.72it/s]
100%|██████████| 32/32 [00:00<00:00, 66.70it/s]


Epoch 7/10, Train Loss: 0.3116, Val Loss: 0.3202


100%|██████████| 125/125 [00:02<00:00, 59.44it/s]
100%|██████████| 32/32 [00:00<00:00, 64.83it/s]


Epoch 8/10, Train Loss: 0.3031, Val Loss: 0.3247


100%|██████████| 125/125 [00:02<00:00, 52.54it/s]
100%|██████████| 32/32 [00:00<00:00, 61.78it/s]


Epoch 9/10, Train Loss: 0.2972, Val Loss: 0.3147


100%|██████████| 125/125 [00:02<00:00, 59.79it/s]
100%|██████████| 32/32 [00:00<00:00, 62.56it/s]


Epoch 10/10, Train Loss: 0.2921, Val Loss: 0.3346


100%|██████████| 63/63 [00:01<00:00, 48.87it/s]


                   precision    recall  f1-score   support

            toxic       0.55      0.63      0.59      6090
     severe_toxic       0.30      0.24      0.27       367
          obscene       0.63      0.57      0.60      3691
           threat       0.78      0.09      0.15       211
           insult       0.65      0.45      0.53      3427
    identity_hate       0.71      0.01      0.03       712
overall_non_toxic       0.96      0.95      0.95     57735

        micro avg       0.89      0.86      0.88     72233
        macro avg       0.66      0.42      0.45     72233
     weighted avg       0.89      0.86      0.87     72233
      samples avg       0.91      0.90      0.90     72233




Processing word_tokenize_normalization with glove-twitter-200 and hidden dimmension 32


100%|██████████| 125/125 [00:02<00:00, 57.94it/s]
100%|██████████| 32/32 [00:00<00:00, 61.91it/s]


Epoch 1/10, Train Loss: 5.1456, Val Loss: 2.5171


100%|██████████| 125/125 [00:02<00:00, 48.60it/s]
100%|██████████| 32/32 [00:00<00:00, 63.51it/s]


Epoch 2/10, Train Loss: 1.6806, Val Loss: 1.0550


100%|██████████| 125/125 [00:02<00:00, 54.40it/s]
100%|██████████| 32/32 [00:00<00:00, 62.63it/s]


Epoch 3/10, Train Loss: 0.8165, Val Loss: 0.6017


100%|██████████| 125/125 [00:02<00:00, 49.74it/s]
100%|██████████| 32/32 [00:00<00:00, 62.68it/s]


Epoch 4/10, Train Loss: 0.5507, Val Loss: 0.4452


100%|██████████| 125/125 [00:02<00:00, 55.87it/s]
100%|██████████| 32/32 [00:00<00:00, 61.57it/s]


Epoch 5/10, Train Loss: 0.4403, Val Loss: 0.4240


100%|██████████| 125/125 [00:02<00:00, 46.97it/s]
100%|██████████| 32/32 [00:00<00:00, 50.81it/s]


Epoch 6/10, Train Loss: 0.3828, Val Loss: 0.3609


100%|██████████| 125/125 [00:02<00:00, 53.69it/s]
100%|██████████| 32/32 [00:00<00:00, 64.70it/s]


Epoch 7/10, Train Loss: 0.3487, Val Loss: 0.3445


100%|██████████| 125/125 [00:02<00:00, 48.67it/s]
100%|██████████| 32/32 [00:00<00:00, 61.37it/s]


Epoch 8/10, Train Loss: 0.3256, Val Loss: 0.3203


100%|██████████| 125/125 [00:02<00:00, 53.08it/s]
100%|██████████| 32/32 [00:00<00:00, 62.05it/s]


Epoch 9/10, Train Loss: 0.3097, Val Loss: 0.3085


100%|██████████| 125/125 [00:02<00:00, 52.40it/s]
100%|██████████| 32/32 [00:00<00:00, 65.63it/s]


Epoch 10/10, Train Loss: 0.2974, Val Loss: 0.3002


100%|██████████| 63/63 [00:00<00:00, 63.59it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.65      0.59      0.62      6090
     severe_toxic       0.30      0.05      0.08       367
          obscene       0.68      0.53      0.60      3691
           threat       0.00      0.00      0.00       211
           insult       0.64      0.47      0.54      3427
    identity_hate       0.89      0.03      0.06       712
overall_non_toxic       0.96      0.97      0.96     57735

        micro avg       0.91      0.87      0.89     72233
        macro avg       0.59      0.38      0.41     72233
     weighted avg       0.89      0.87      0.88     72233
      samples avg       0.92      0.92      0.92     72233




Processing word_tokenize_normalization with glove-twitter-200 and hidden dimmension 64


100%|██████████| 125/125 [00:02<00:00, 60.06it/s]
100%|██████████| 32/32 [00:00<00:00, 67.00it/s]


Epoch 1/10, Train Loss: 3.4783, Val Loss: 1.2381


100%|██████████| 125/125 [00:02<00:00, 53.01it/s]
100%|██████████| 32/32 [00:00<00:00, 67.84it/s]


Epoch 2/10, Train Loss: 0.8309, Val Loss: 0.5612


100%|██████████| 125/125 [00:02<00:00, 60.31it/s]
100%|██████████| 32/32 [00:00<00:00, 58.81it/s]


Epoch 3/10, Train Loss: 0.4880, Val Loss: 0.4896


100%|██████████| 125/125 [00:02<00:00, 54.97it/s]
100%|██████████| 32/32 [00:00<00:00, 61.04it/s]


Epoch 4/10, Train Loss: 0.3857, Val Loss: 0.3510


100%|██████████| 125/125 [00:02<00:00, 52.43it/s]
100%|██████████| 32/32 [00:00<00:00, 62.52it/s]


Epoch 5/10, Train Loss: 0.3396, Val Loss: 0.3263


100%|██████████| 125/125 [00:02<00:00, 52.37it/s]
100%|██████████| 32/32 [00:00<00:00, 58.21it/s]


Epoch 6/10, Train Loss: 0.3125, Val Loss: 0.3054


100%|██████████| 125/125 [00:02<00:00, 43.18it/s]
100%|██████████| 32/32 [00:00<00:00, 59.10it/s]


Epoch 7/10, Train Loss: 0.2948, Val Loss: 0.2956


100%|██████████| 125/125 [00:02<00:00, 55.76it/s]
100%|██████████| 32/32 [00:00<00:00, 60.27it/s]


Epoch 8/10, Train Loss: 0.2813, Val Loss: 0.2957


100%|██████████| 125/125 [00:02<00:00, 51.24it/s]
100%|██████████| 32/32 [00:00<00:00, 56.87it/s]


Epoch 9/10, Train Loss: 0.2717, Val Loss: 0.2851


100%|██████████| 125/125 [00:02<00:00, 51.21it/s]
100%|██████████| 32/32 [00:00<00:00, 36.39it/s]


Epoch 10/10, Train Loss: 0.2632, Val Loss: 0.2813


100%|██████████| 63/63 [00:01<00:00, 58.94it/s]


                   precision    recall  f1-score   support

            toxic       0.60      0.63      0.62      6090
     severe_toxic       0.34      0.19      0.24       367
          obscene       0.69      0.55      0.61      3691
           threat       0.45      0.06      0.11       211
           insult       0.66      0.48      0.56      3427
    identity_hate       0.69      0.12      0.21       712
overall_non_toxic       0.96      0.96      0.96     57735

        micro avg       0.90      0.87      0.89     72233
        macro avg       0.63      0.43      0.47     72233
     weighted avg       0.89      0.87      0.88     72233
      samples avg       0.92      0.91      0.91     72233




Processing word_tokenize_normalization with glove-twitter-200 and hidden dimmension 128


100%|██████████| 125/125 [00:02<00:00, 54.50it/s]
100%|██████████| 32/32 [00:00<00:00, 61.62it/s]


Epoch 1/10, Train Loss: 2.4291, Val Loss: 0.6614


100%|██████████| 125/125 [00:02<00:00, 46.90it/s]
100%|██████████| 32/32 [00:00<00:00, 59.90it/s]


Epoch 2/10, Train Loss: 0.5312, Val Loss: 0.4262


100%|██████████| 125/125 [00:02<00:00, 49.41it/s]
100%|██████████| 32/32 [00:00<00:00, 56.58it/s]


Epoch 3/10, Train Loss: 0.3754, Val Loss: 0.3356


100%|██████████| 125/125 [00:02<00:00, 48.62it/s]
100%|██████████| 32/32 [00:00<00:00, 62.66it/s]


Epoch 4/10, Train Loss: 0.3220, Val Loss: 0.3002


100%|██████████| 125/125 [00:02<00:00, 56.88it/s]
100%|██████████| 32/32 [00:00<00:00, 63.03it/s]


Epoch 5/10, Train Loss: 0.2945, Val Loss: 0.3098


100%|██████████| 125/125 [00:02<00:00, 49.52it/s]
100%|██████████| 32/32 [00:00<00:00, 59.52it/s]


Epoch 6/10, Train Loss: 0.2785, Val Loss: 0.2879


100%|██████████| 125/125 [00:02<00:00, 58.11it/s]
100%|██████████| 32/32 [00:00<00:00, 62.17it/s]


Epoch 7/10, Train Loss: 0.2650, Val Loss: 0.2797


100%|██████████| 125/125 [00:02<00:00, 45.82it/s]
100%|██████████| 32/32 [00:00<00:00, 56.34it/s]


Epoch 8/10, Train Loss: 0.2553, Val Loss: 0.2821


100%|██████████| 125/125 [00:02<00:00, 50.48it/s]
100%|██████████| 32/32 [00:00<00:00, 53.38it/s]


Epoch 9/10, Train Loss: 0.2489, Val Loss: 0.2793


100%|██████████| 125/125 [00:02<00:00, 46.44it/s]
100%|██████████| 32/32 [00:00<00:00, 61.52it/s]


Epoch 10/10, Train Loss: 0.2422, Val Loss: 0.2701


100%|██████████| 63/63 [00:01<00:00, 58.72it/s]


                   precision    recall  f1-score   support

            toxic       0.61      0.65      0.63      6090
     severe_toxic       0.35      0.22      0.27       367
          obscene       0.69      0.57      0.62      3691
           threat       0.48      0.15      0.23       211
           insult       0.68      0.48      0.57      3427
    identity_hate       0.72      0.23      0.35       712
overall_non_toxic       0.96      0.96      0.96     57735

        micro avg       0.90      0.88      0.89     72233
        macro avg       0.64      0.47      0.52     72233
     weighted avg       0.90      0.88      0.88     72233
      samples avg       0.92      0.91      0.91     72233




Processing word_tokenize_normalization with fasttext-200 and hidden dim 32


100%|██████████| 125/125 [00:02<00:00, 55.42it/s]
100%|██████████| 32/32 [00:00<00:00, 62.06it/s]


Epoch 1/10, Train Loss: 4.1580, Val Loss: 1.9874


100%|██████████| 125/125 [00:02<00:00, 45.18it/s]
100%|██████████| 32/32 [00:00<00:00, 61.51it/s]


Epoch 2/10, Train Loss: 1.3787, Val Loss: 0.8771


100%|██████████| 125/125 [00:02<00:00, 54.00it/s]
100%|██████████| 32/32 [00:00<00:00, 52.00it/s]


Epoch 3/10, Train Loss: 0.7349, Val Loss: 0.6246


100%|██████████| 125/125 [00:02<00:00, 46.17it/s]
100%|██████████| 32/32 [00:00<00:00, 60.91it/s]


Epoch 4/10, Train Loss: 0.5354, Val Loss: 0.4209


100%|██████████| 125/125 [00:02<00:00, 53.37it/s]
100%|██████████| 32/32 [00:00<00:00, 60.20it/s]


Epoch 5/10, Train Loss: 0.4488, Val Loss: 0.3970


100%|██████████| 125/125 [00:02<00:00, 49.09it/s]
100%|██████████| 32/32 [00:00<00:00, 61.26it/s]


Epoch 6/10, Train Loss: 0.3999, Val Loss: 0.3930


100%|██████████| 125/125 [00:02<00:00, 54.81it/s]
100%|██████████| 32/32 [00:00<00:00, 58.96it/s]


Epoch 7/10, Train Loss: 0.3722, Val Loss: 0.3537


100%|██████████| 125/125 [00:02<00:00, 45.18it/s]
100%|██████████| 32/32 [00:00<00:00, 57.72it/s]


Epoch 8/10, Train Loss: 0.3524, Val Loss: 0.3734


100%|██████████| 125/125 [00:02<00:00, 55.84it/s]
100%|██████████| 32/32 [00:00<00:00, 61.82it/s]


Epoch 9/10, Train Loss: 0.3401, Val Loss: 0.3812


100%|██████████| 125/125 [00:02<00:00, 51.83it/s]
100%|██████████| 32/32 [00:00<00:00, 61.28it/s]


Epoch 10/10, Train Loss: 0.3304, Val Loss: 0.3691


100%|██████████| 63/63 [00:01<00:00, 58.17it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.61      0.57      0.59      6090
     severe_toxic       0.07      0.00      0.01       367
          obscene       0.63      0.58      0.60      3691
           threat       0.00      0.00      0.00       211
           insult       0.61      0.45      0.52      3427
    identity_hate       0.00      0.00      0.00       712
overall_non_toxic       0.95      0.96      0.96     57735

        micro avg       0.90      0.87      0.88     72233
        macro avg       0.41      0.37      0.38     72233
     weighted avg       0.88      0.87      0.87     72233
      samples avg       0.92      0.91      0.91     72233




Processing word_tokenize_normalization with fasttext-200 and hidden dim 64


100%|██████████| 125/125 [00:02<00:00, 56.04it/s]
100%|██████████| 32/32 [00:00<00:00, 57.41it/s]


Epoch 1/10, Train Loss: 3.5337, Val Loss: 1.2211


100%|██████████| 125/125 [00:02<00:00, 47.95it/s]
100%|██████████| 32/32 [00:00<00:00, 62.93it/s]


Epoch 2/10, Train Loss: 0.8703, Val Loss: 0.6805


100%|██████████| 125/125 [00:02<00:00, 51.65it/s]
100%|██████████| 32/32 [00:00<00:00, 53.10it/s]


Epoch 3/10, Train Loss: 0.5251, Val Loss: 0.3788


100%|██████████| 125/125 [00:02<00:00, 53.55it/s]
100%|██████████| 32/32 [00:00<00:00, 36.67it/s]


Epoch 4/10, Train Loss: 0.4218, Val Loss: 0.4127


100%|██████████| 125/125 [00:02<00:00, 53.47it/s]
100%|██████████| 32/32 [00:00<00:00, 61.44it/s]


Epoch 5/10, Train Loss: 0.3772, Val Loss: 0.3548


100%|██████████| 125/125 [00:02<00:00, 53.85it/s]
100%|██████████| 32/32 [00:00<00:00, 39.42it/s]


Epoch 6/10, Train Loss: 0.3504, Val Loss: 0.3492


100%|██████████| 125/125 [00:02<00:00, 57.31it/s]
100%|██████████| 32/32 [00:00<00:00, 63.16it/s]


Epoch 7/10, Train Loss: 0.3355, Val Loss: 0.3469


100%|██████████| 125/125 [00:02<00:00, 56.76it/s]
100%|██████████| 32/32 [00:00<00:00, 60.82it/s]


Epoch 8/10, Train Loss: 0.3240, Val Loss: 0.3222


100%|██████████| 125/125 [00:02<00:00, 48.27it/s]
100%|██████████| 32/32 [00:00<00:00, 60.42it/s]


Epoch 9/10, Train Loss: 0.3163, Val Loss: 0.3454


100%|██████████| 125/125 [00:02<00:00, 55.06it/s]
100%|██████████| 32/32 [00:00<00:00, 61.72it/s]


Epoch 10/10, Train Loss: 0.3109, Val Loss: 0.3257


100%|██████████| 63/63 [00:00<00:00, 64.42it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                   precision    recall  f1-score   support

            toxic       0.73      0.44      0.55      6090
     severe_toxic       0.27      0.14      0.19       367
          obscene       0.78      0.44      0.56      3691
           threat       0.00      0.00      0.00       211
           insult       0.68      0.36      0.47      3427
    identity_hate       0.74      0.04      0.07       712
overall_non_toxic       0.94      0.98      0.96     57735

        micro avg       0.92      0.86      0.89     72233
        macro avg       0.59      0.34      0.40     72233
     weighted avg       0.89      0.86      0.87     72233
      samples avg       0.93      0.92      0.92     72233




Processing word_tokenize_normalization with fasttext-200 and hidden dim 128


100%|██████████| 125/125 [00:02<00:00, 48.20it/s]
100%|██████████| 32/32 [00:00<00:00, 58.64it/s]


Epoch 1/10, Train Loss: 2.1101, Val Loss: 0.6066


 14%|█▍        | 18/125 [00:00<00:01, 55.27it/s]


KeyboardInterrupt: 