In [1]:
'''Import libraries'''
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import wandb
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
import os
from torch.utils.data import Dataset
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
from imblearn.under_sampling import RandomUnderSampler
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizerFast, BertForSequenceClassification, AdamW, Trainer, TrainingArguments
from tqdm import tqdm
from torch.nn import functional as F
import torch.nn as nn

wandb.login()

  from .autonotebook import tqdm as notebook_tqdm
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33malberto-rodero557[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:
'''Variables and parameters'''

SAMPLES_TO_TRAIN=10000
DIMENSIONS=200

N_LABELS=2
MAX_LEN = 256
EPOCHS=50
PATIENCE=10
LEARNING_RATE=.00005
WEIGHT_DECAY=.01
BATCH_SIZE=16
METRIC_FOR_BEST_MODEL='eval_loss'
if METRIC_FOR_BEST_MODEL=='eval_loss':
    GREATER_IS_BETTER = False
else:
    GREATER_IS_BETTER = True

In [3]:
'''Preparing dataset'''

df = pd.read_json(os.getcwd()+'/datasets/subtaskA_train_monolingual.jsonl', lines=True)
df = df[['text', 'label']]

df=df.sample(round(SAMPLES_TO_TRAIN))
# test_train_df=df.sample(round(SAMPLES_TO_TRAIN*.2))

# df = pd.read_json(os.getcwd()+'/datasets/subtaskA_dev_monolingual.jsonl', lines=True)
# df = df[['text', 'label']]

# val_df= df.sample(round(SAMPLES_TO_TRAIN*.2))
# test_dev_df= df.sample(round(SAMPLES_TO_TRAIN*.2))

# we balance the training set
print(f'Dataset size before balancing: {df.shape}')
counts = df['label'].value_counts()
sampler = RandomUnderSampler(random_state=42)
x_text, y = sampler.fit_resample(df[['text']], df['label'])

print(f'Dataset size after balancing: {x_text.shape}')
print(f'Entried dropped: {df.shape[0]-x_text.shape[0]}')

# Create a new balanced DataFrame
df = pd.DataFrame({'text': x_text['text'], 'label': y})

# Print the balanced DataFrame
print("\nBalanced DataFrame:")
print(df['label'].value_counts())

Dataset size before balancing: (10000, 2)
Dataset size after balancing: (9382, 1)
Entried dropped: 618

Balanced DataFrame:
label
0    4691
1    4691
Name: count, dtype: int64


In [4]:
'''loading glove'''
embeddings_index={}
with open('../0 playground and indoor/OtherData/glove.6B.200d.txt','r',encoding='utf-8') as f:
    for line in f:
        values=line.split()
        word=values[0]
        vectors=np.asarray(values[1:],'float32')
        embeddings_index[word]=vectors
f.close()
print('Found %s word vectors.' % len(embeddings_index))


Found 400000 word vectors.


In [5]:
'''glove building'''

from nltk.tokenize import word_tokenize
from tqdm import tqdm 

def sent2vec(s):
    """ Function Creates a normalized vector for the whole sentence"""
    words = str(s).lower()
    words = word_tokenize(words)
    words = [w for w in words if w.isalpha()]
    M = []
    for w in words:
        try:
            M.append(embeddings_index[w])
        except:
            continue
    M = np.array(M)
    v = M.sum(axis=0)
    if type(v) != np.ndarray:
        return np.zeros(200)
    return v / np.sqrt((v ** 2).sum())

print('Training df:')
df_x = np.array([sent2vec(x) for x in tqdm(df['text'])])
print(df_x.shape)
train_y=df['label']


Training df:


100%|██████████| 9382/9382 [00:14<00:00, 665.06it/s]

(9382, 200)





In [6]:
'''Preparing for training'''

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Initialize the StandardScaler
scaler = StandardScaler()
# Fit the scaler to the training data and transform the data
train_x = scaler.fit_transform(df_x)

import pickle

# Save the trained scaler
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)


In [7]:
'''metrics'''

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    auc = roc_auc_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'auc': auc,
        'precision': precision,
        'recall': recall,
    }

In [8]:
class Data(Dataset):
    def __init__(self, X_train, y_train):
        self.X = torch.from_numpy(X_train.astype(np.float32))
        self.y = torch.from_numpy(y_train).type(torch.LongTensor)
        self.len = self.X.shape[0]

    def __getitem__(self, index):
        return {'input_ids': self.X[index], 'labels': self.y[index]}

    def __len__(self):
        return self.len

X_train, X_test, y_train, y_test = train_test_split(train_x, train_y.values, test_size=0.2, random_state=42)
traindata = Data(X_train, y_train)
testdata = Data(X_test, y_test)

In [20]:
# number of features (len of X cols)
input_dim = train_x.shape[-1]

# number of classes (unique of y)
output_dim = 2

class RNNModel(nn.Module):
    def __init__(self):
        super(RNNModel, self).__init__()
        
        self.lstm1 = nn.LSTM(input_dim, 512, batch_first=True)
        self.ln1 = nn.LayerNorm(512)
        self.dropout1 = nn.Dropout(0.2)
        
        self.lstm2 = nn.LSTM(512, 512, batch_first=True)
        self.ln2 = nn.LayerNorm(512)
        self.dropout2 = nn.Dropout(0.2)
        
        self.fc = nn.Linear(512, output_dim)
        
        self.loss = nn.CrossEntropyLoss()

    def forward(self, input_ids, labels=None):
        # print(f"Input shape: {input_ids.shape}")
        
        x, _ = self.lstm1(input_ids)
        # print(f"After LSTM1: {x.shape}")

        x = self.ln1(x)
        x = self.dropout1(x)
        
        # print(f"Before LSTM2: {x.shape}")
        
        x, _ = self.lstm2(x)
        # print(f"After LSTM2: {x.shape}")

        x = self.ln2(x)
        x = self.dropout2(x)
        
        x = self.fc(x)
        # print(f"Output shape: {x.shape}")
        
        outputs = (x,)
        if labels is not None:
            loss = self.loss(x, labels)
            outputs = (loss,) + outputs
            
        return (outputs if len(outputs) > 1 else outputs[0])

# Instantiate the model with appropriate dimensions
model = RNNModel()

In [21]:
from transformers import EarlyStoppingCallback

model = RNNModel()

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    warmup_steps=500,
    weight_decay=WEIGHT_DECAY,
    metric_for_best_model=METRIC_FOR_BEST_MODEL,
    greater_is_better=GREATER_IS_BETTER,
    logging_dir='./logs',
    logging_steps=15000,
    do_train=True,
    do_eval=True,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    push_to_hub=False,
    logging_first_step=False,
    load_best_model_at_end=True,
    save_total_limit=2,
    report_to="wandb"
)

# Create trainer
trainer = Trainer(
    model=model, 
    args=training_args, 
    train_dataset=traindata,
    eval_dataset=testdata, 
    compute_metrics=compute_metrics,# training dataset
    callbacks=[EarlyStoppingCallback(early_stopping_patience=PATIENCE)]
)

# Train the model
# print(trainer.evaluate())

trainer.train()

print(trainer.evaluate())

  2%|▏         | 454/23500 [00:01<01:21, 284.24it/s]
[A
[A

[A[A                                           
                                                    
  2%|▏         | 470/23500 [00:01<01:21, 284.24it/s]
  2%|▏         | 511/23500 [00:02<01:55, 198.62it/s]

{'eval_loss': 0.604365885257721, 'eval_accuracy': 0.6723494938732019, 'eval_f1': 0.6332737030411449, 'eval_auc': 0.6710493444436116, 'eval_precision': 0.7070572569906791, 'eval_recall': 0.5734341252699784, 'eval_runtime': 0.1557, 'eval_samples_per_second': 12058.45, 'eval_steps_per_second': 758.07, 'epoch': 1.0}


  4%|▍         | 916/23500 [00:03<01:19, 285.07it/s]
[A
[A

[A[A                                           
                                                    
  4%|▍         | 940/23500 [00:03<01:19, 285.07it/s]
  4%|▍         | 973/23500 [00:03<01:53, 199.33it/s]

{'eval_loss': 0.5453975796699524, 'eval_accuracy': 0.7187000532765051, 'eval_f1': 0.7121046892039259, 'eval_auc': 0.7185223920256727, 'eval_precision': 0.7191629955947136, 'eval_recall': 0.7051835853131749, 'eval_runtime': 0.1583, 'eval_samples_per_second': 11859.757, 'eval_steps_per_second': 745.579, 'epoch': 2.0}


  6%|▌         | 1396/23500 [00:05<01:16, 289.32it/s]
[A
[A

[A[A                                           
                                                     
  6%|▌         | 1410/23500 [00:05<01:16, 289.32it/s]
  6%|▌         | 1425/23500 [00:05<02:01, 181.77it/s]

{'eval_loss': 0.5118682980537415, 'eval_accuracy': 0.742141715503463, 'eval_f1': 0.7392241379310346, 'eval_auc': 0.7421243524492804, 'eval_precision': 0.7376344086021506, 'eval_recall': 0.7408207343412527, 'eval_runtime': 0.1567, 'eval_samples_per_second': 11980.972, 'eval_steps_per_second': 753.199, 'epoch': 3.0}


  8%|▊         | 1859/23500 [00:07<01:16, 282.26it/s]
[A
[A

[A[A                                           
                                                     
  8%|▊         | 1880/23500 [00:07<01:16, 282.26it/s]
  8%|▊         | 1916/23500 [00:07<01:48, 198.97it/s]

{'eval_loss': 0.4929945766925812, 'eval_accuracy': 0.7575919019712307, 'eval_f1': 0.7501372872048325, 'eval_auc': 0.7573288774122044, 'eval_precision': 0.7631284916201118, 'eval_recall': 0.7375809935205183, 'eval_runtime': 0.159, 'eval_samples_per_second': 11807.357, 'eval_steps_per_second': 742.285, 'epoch': 4.0}


 10%|▉         | 2322/23500 [00:09<01:13, 286.97it/s]
[A
[A

[A[A                                           
                                                     
 10%|█         | 2350/23500 [00:09<01:13, 286.97it/s]
 10%|█         | 2379/23500 [00:09<01:46, 198.68it/s]

{'eval_loss': 0.475911021232605, 'eval_accuracy': 0.7757059136920618, 'eval_f1': 0.7700709994538504, 'eval_auc': 0.7755170753532146, 'eval_precision': 0.7790055248618785, 'eval_recall': 0.7613390928725702, 'eval_runtime': 0.1612, 'eval_samples_per_second': 11644.865, 'eval_steps_per_second': 732.069, 'epoch': 5.0}


 12%|█▏        | 2820/23500 [00:11<01:11, 290.17it/s]
[A
[A

[A[A                                           
                                                     
 12%|█▏        | 2820/23500 [00:11<01:11, 290.17it/s]
 12%|█▏        | 2850/23500 [00:11<01:55, 178.09it/s]

{'eval_loss': 0.46301543712615967, 'eval_accuracy': 0.783697389451252, 'eval_f1': 0.7769230769230769, 'eval_auc': 0.7834318995805257, 'eval_precision': 0.79082774049217, 'eval_recall': 0.7634989200863931, 'eval_runtime': 0.1647, 'eval_samples_per_second': 11397.945, 'eval_steps_per_second': 716.546, 'epoch': 6.0}


 14%|█▍        | 3289/23500 [00:12<01:10, 286.11it/s]
[A
[A

[A[A                                           
                                                     
 14%|█▍        | 3290/23500 [00:13<01:10, 286.11it/s]
 14%|█▍        | 3318/23500 [00:13<01:50, 182.79it/s]

{'eval_loss': 0.4575531482696533, 'eval_accuracy': 0.7895578050079914, 'eval_f1': 0.7821290678433536, 'eval_auc': 0.7892436743861754, 'eval_precision': 0.7993235625704622, 'eval_recall': 0.765658747300216, 'eval_runtime': 0.1478, 'eval_samples_per_second': 12696.729, 'eval_steps_per_second': 798.196, 'epoch': 7.0}


 16%|█▌        | 3733/23500 [00:14<01:08, 289.72it/s]
[A
[A

[A[A                                           
                                                     
 16%|█▌        | 3760/23500 [00:14<01:08, 289.72it/s]
 16%|█▌        | 3791/23500 [00:15<01:36, 204.43it/s]

{'eval_loss': 0.4661900997161865, 'eval_accuracy': 0.7799680340969632, 'eval_f1': 0.7744401966138722, 'eval_auc': 0.779779951988699, 'eval_precision': 0.7834254143646409, 'eval_recall': 0.765658747300216, 'eval_runtime': 0.1531, 'eval_samples_per_second': 12260.093, 'eval_steps_per_second': 770.746, 'epoch': 8.0}


 18%|█▊        | 4204/23500 [00:16<01:05, 292.94it/s]
[A
[A

[A[A                                           
                                                     
 18%|█▊        | 4230/23500 [00:16<01:05, 292.94it/s]
 18%|█▊        | 4262/23500 [00:16<01:34, 203.49it/s]

{'eval_loss': 0.46777409315109253, 'eval_accuracy': 0.7842301545018647, 'eval_f1': 0.7768595041322315, 'eval_auc': 0.7839292730398602, 'eval_precision': 0.7930258717660292, 'eval_recall': 0.7613390928725702, 'eval_runtime': 0.1568, 'eval_samples_per_second': 11970.552, 'eval_steps_per_second': 752.544, 'epoch': 9.0}


 20%|█▉        | 4671/23500 [00:18<01:05, 288.23it/s]
[A
[A

[A[A                                           
                                                     
 20%|██        | 4700/23500 [00:18<01:05, 288.23it/s]
 20%|██        | 4729/23500 [00:18<01:33, 201.40it/s]

{'eval_loss': 0.4467627704143524, 'eval_accuracy': 0.8002131060202451, 'eval_f1': 0.7929320817228052, 'eval_auc': 0.7998866715268457, 'eval_precision': 0.8112994350282486, 'eval_recall': 0.775377969762419, 'eval_runtime': 0.1552, 'eval_samples_per_second': 12096.618, 'eval_steps_per_second': 760.469, 'epoch': 10.0}


 22%|██▏       | 5160/23500 [00:20<01:04, 284.06it/s]
[A
[A

[A[A                                           
                                                     
 22%|██▏       | 5170/23500 [00:20<01:04, 284.06it/s]
 22%|██▏       | 5189/23500 [00:20<01:43, 177.33it/s]

{'eval_loss': 0.4522332549095154, 'eval_accuracy': 0.8082045817794352, 'eval_f1': 0.7986577181208053, 'eval_auc': 0.8077163290659145, 'eval_precision': 0.8283062645011601, 'eval_recall': 0.7710583153347732, 'eval_runtime': 0.1539, 'eval_samples_per_second': 12193.802, 'eval_steps_per_second': 766.579, 'epoch': 11.0}


 24%|██▍       | 5629/23500 [00:22<01:02, 285.82it/s]
[A
[A

[A[A                                           
                                                     
 24%|██▍       | 5640/23500 [00:22<01:02, 285.82it/s]
 24%|██▍       | 5686/23500 [00:22<01:29, 199.63it/s]

{'eval_loss': 0.46159839630126953, 'eval_accuracy': 0.8039424613745338, 'eval_f1': 0.7997823721436345, 'eval_auc': 0.8038083136314395, 'eval_precision': 0.805921052631579, 'eval_recall': 0.7937365010799136, 'eval_runtime': 0.1591, 'eval_samples_per_second': 11795.416, 'eval_steps_per_second': 741.534, 'epoch': 12.0}


 26%|██▌       | 6093/23500 [00:23<01:01, 284.82it/s]
[A
[A

[A[A                                           
                                                     
 26%|██▌       | 6110/23500 [00:24<01:01, 284.82it/s]
 26%|██▌       | 6149/23500 [00:24<01:27, 198.51it/s]

{'eval_loss': 0.4600223898887634, 'eval_accuracy': 0.7996803409696324, 'eval_f1': 0.7954298150163222, 'eval_auc': 0.7995454369959551, 'eval_precision': 0.8015350877192983, 'eval_recall': 0.7894168466522679, 'eval_runtime': 0.1569, 'eval_samples_per_second': 11962.239, 'eval_steps_per_second': 752.021, 'epoch': 13.0}


 28%|██▊       | 6555/23500 [00:25<00:59, 283.76it/s]
[A
[A

[A[A                                           
                                                     
 28%|██▊       | 6580/23500 [00:25<00:59, 283.76it/s]
 28%|██▊       | 6611/23500 [00:26<01:25, 198.54it/s]

{'eval_loss': 0.4642864465713501, 'eval_accuracy': 0.8007458710708577, 'eval_f1': 0.7924528301886792, 'eval_auc': 0.8003556560900995, 'eval_precision': 0.815068493150685, 'eval_recall': 0.7710583153347732, 'eval_runtime': 0.1549, 'eval_samples_per_second': 12120.605, 'eval_steps_per_second': 761.977, 'epoch': 14.0}


 30%|██▉       | 7046/23500 [00:27<00:57, 284.50it/s]
[A
[A

[A[A                                           
                                                     
 30%|███       | 7050/23500 [00:27<00:57, 284.50it/s]
 30%|███       | 7075/23500 [00:27<01:32, 178.49it/s]

{'eval_loss': 0.4454619586467743, 'eval_accuracy': 0.8092701118806607, 'eval_f1': 0.8017718715393134, 'eval_auc': 0.8089097982571489, 'eval_precision': 0.8227272727272728, 'eval_recall': 0.7818574514038877, 'eval_runtime': 0.1553, 'eval_samples_per_second': 12089.8, 'eval_steps_per_second': 760.041, 'epoch': 15.0}


 32%|███▏      | 7514/23500 [00:29<00:55, 288.52it/s]
[A
[A

[A[A                                           
                                                     
 32%|███▏      | 7520/23500 [00:29<00:55, 288.52it/s]
 32%|███▏      | 7543/23500 [00:29<01:28, 180.93it/s]

{'eval_loss': 0.46293017268180847, 'eval_accuracy': 0.8076718167288226, 'eval_f1': 0.7977591036414565, 'eval_auc': 0.8071621778144183, 'eval_precision': 0.8288707799767171, 'eval_recall': 0.7688984881209503, 'eval_runtime': 0.1588, 'eval_samples_per_second': 11822.126, 'eval_steps_per_second': 743.213, 'epoch': 16.0}


 34%|███▍      | 7982/23500 [00:31<00:54, 285.45it/s]
[A
[A

[A[A                                           
                                                     
 34%|███▍      | 7990/23500 [00:31<00:54, 285.45it/s]
 34%|███▍      | 8011/23500 [00:31<01:27, 177.90it/s]

{'eval_loss': 0.45777833461761475, 'eval_accuracy': 0.8087373468300479, 'eval_f1': 0.8026388125343595, 'eval_auc': 0.8084692025899757, 'eval_precision': 0.8174692049272116, 'eval_recall': 0.7883369330453563, 'eval_runtime': 0.1583, 'eval_samples_per_second': 11857.167, 'eval_steps_per_second': 745.416, 'epoch': 17.0}


 36%|███▌      | 8451/23500 [00:33<00:51, 291.57it/s]
[A
[A

[A[A                                           
                                                     
 36%|███▌      | 8460/23500 [00:33<00:51, 291.57it/s]
 36%|███▌      | 8481/23500 [00:33<01:22, 182.34it/s]

{'eval_loss': 0.464586079120636, 'eval_accuracy': 0.8071390516782099, 'eval_f1': 0.8017524644030667, 'eval_auc': 0.8069203044198104, 'eval_precision': 0.8133333333333334, 'eval_recall': 0.7904967602591793, 'eval_runtime': 0.1567, 'eval_samples_per_second': 11977.563, 'eval_steps_per_second': 752.985, 'epoch': 18.0}


 38%|███▊      | 8926/23500 [00:35<00:50, 287.66it/s]
[A
[A

[A[A                                           
                                                     
 38%|███▊      | 8930/23500 [00:35<00:50, 287.66it/s]
 38%|███▊      | 8955/23500 [00:35<01:20, 180.95it/s]

{'eval_loss': 0.4774799346923828, 'eval_accuracy': 0.8012786361214704, 'eval_f1': 0.7931225734886301, 'eval_auc': 0.8008956128935553, 'eval_precision': 0.8152793614595211, 'eval_recall': 0.7721382289416847, 'eval_runtime': 0.1529, 'eval_samples_per_second': 12276.592, 'eval_steps_per_second': 771.784, 'epoch': 19.0}


 40%|███▉      | 9397/23500 [00:36<00:48, 288.44it/s]
[A
[A

[A[A                                           
                                                     
 40%|████      | 9400/23500 [00:37<00:48, 288.44it/s]
 40%|████      | 9426/23500 [00:37<01:16, 183.16it/s]

{'eval_loss': 0.46631118655204773, 'eval_accuracy': 0.8060735215769845, 'eval_f1': 0.8023887079261671, 'eval_auc': 0.8059681408452624, 'eval_precision': 0.8067685589519651, 'eval_recall': 0.7980561555075594, 'eval_runtime': 0.1539, 'eval_samples_per_second': 12198.355, 'eval_steps_per_second': 766.865, 'epoch': 20.0}


 42%|████▏     | 9862/23500 [00:38<00:47, 286.13it/s]
[A
[A

                                                     
 42%|████▏     | 9870/23500 [00:38<00:47, 286.13it/s]
 42%|████▏     | 9891/23500 [00:38<01:15, 180.72it/s]

{'eval_loss': 0.4738948345184326, 'eval_accuracy': 0.798614810868407, 'eval_f1': 0.7916207276736493, 'eval_auc': 0.7983093844605996, 'eval_precision': 0.8085585585585585, 'eval_recall': 0.775377969762419, 'eval_runtime': 0.1538, 'eval_samples_per_second': 12206.601, 'eval_steps_per_second': 767.384, 'epoch': 21.0}


 44%|████▍     | 10325/23500 [00:40<00:45, 287.60it/s]
[A
[A

[A[A                                           
                                                      
 44%|████▍     | 10340/23500 [00:40<00:45, 287.60it/s]
 44%|████▍     | 10354/23500 [00:40<01:13, 179.03it/s]

{'eval_loss': 0.4900762736797333, 'eval_accuracy': 0.8002131060202451, 'eval_f1': 0.800213106020245, 'eval_auc': 0.8003550883121779, 'eval_precision': 0.7896950578338591, 'eval_recall': 0.8110151187904968, 'eval_runtime': 0.1601, 'eval_samples_per_second': 11726.264, 'eval_steps_per_second': 737.187, 'epoch': 22.0}


 46%|████▌     | 10793/23500 [00:42<00:44, 286.89it/s]
[A
[A

[A[A                                           
                                                      
 46%|████▌     | 10810/23500 [00:42<00:44, 286.89it/s]
 46%|████▌     | 10850/23500 [00:42<01:02, 201.15it/s]

{'eval_loss': 0.4829609990119934, 'eval_accuracy': 0.8018114011720832, 'eval_f1': 0.8002148227712137, 'eval_auc': 0.8018472086901818, 'eval_precision': 0.7959401709401709, 'eval_recall': 0.8045356371490281, 'eval_runtime': 0.1513, 'eval_samples_per_second': 12407.971, 'eval_steps_per_second': 780.043, 'epoch': 23.0}


 48%|████▊     | 11257/23500 [00:44<00:42, 289.60it/s]
[A
[A

[A[A                                           
                                                      
 48%|████▊     | 11280/23500 [00:44<00:42, 289.60it/s]
 48%|████▊     | 11315/23500 [00:44<01:00, 200.99it/s]

{'eval_loss': 0.4860065281391144, 'eval_accuracy': 0.8066062866275973, 'eval_f1': 0.8034650785056848, 'eval_auc': 0.8065364865447989, 'eval_precision': 0.8056460369163952, 'eval_recall': 0.8012958963282938, 'eval_runtime': 0.1571, 'eval_samples_per_second': 11944.073, 'eval_steps_per_second': 750.879, 'epoch': 24.0}


 50%|████▉     | 11748/23500 [00:46<00:41, 285.81it/s]
[A
[A

[A[A                                           
                                                      
 50%|█████     | 11750/23500 [00:46<00:41, 285.81it/s]
[A
 50%|█████     | 11750/23500 [00:46<00:46, 253.63it/s]


{'eval_loss': 0.48666319251060486, 'eval_accuracy': 0.8050079914757592, 'eval_f1': 0.8042780748663102, 'eval_auc': 0.8051011439589565, 'eval_precision': 0.7966101694915254, 'eval_recall': 0.8120950323974082, 'eval_runtime': 0.1557, 'eval_samples_per_second': 12056.825, 'eval_steps_per_second': 757.968, 'epoch': 25.0}
{'train_runtime': 46.3268, 'train_samples_per_second': 8100.065, 'train_steps_per_second': 507.266, 'train_loss': 0.3763123753324468, 'epoch': 25.0}


100%|██████████| 118/118 [00:00<00:00, 719.10it/s]

{'eval_loss': 0.4454619586467743, 'eval_accuracy': 0.8092701118806607, 'eval_f1': 0.8017718715393134, 'eval_auc': 0.8089097982571489, 'eval_precision': 0.8227272727272728, 'eval_recall': 0.7818574514038877, 'eval_runtime': 0.1656, 'eval_samples_per_second': 11334.586, 'eval_steps_per_second': 712.563, 'epoch': 25.0}





In [None]:
# first LSTM
# 'eval_loss': 0.4162973463535309, 'eval_accuracy': 0.8286620835536753, 'eval_f1': 0.8205980066445182
