In [1]:
import sys
sys.path.append('../')

In [2]:
import random

In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, auc, roc_auc_score
from sklearn.metrics import precision_recall_curve, average_precision_score
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [4]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset 

In [5]:
from mlpack.utils import to_device, to_fp16
from mlpack.trainer import TrainArgs, BaseTrainer
from mlpack.imbalanced_sampler import ImbalancedDatasetSampler

# Data

In [6]:
df = pd.read_csv('../datasets/ENEL/df_BoxCox.csv', sep='\t', index_col=0)
df.head()

Unnamed: 0_level_0,2014-01-05 00:00:00,2014-01-06 00:00:00,2014-01-07 00:00:00,2014-01-08 00:00:00,2014-01-09 00:00:00,2014-01-10 00:00:00,2014-01-11 00:00:00,2014-01-12 00:00:00,2014-01-13 00:00:00,2014-01-14 00:00:00,...,2016-10-22 00:00:00,2016-10-23 00:00:00,2016-10-24 00:00:00,2016-10-25 00:00:00,2016-10-26 00:00:00,2016-10-27 00:00:00,2016-10-28 00:00:00,2016-10-29 00:00:00,2016-10-30 00:00:00,flag
CONS_NO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0387DD8A07E07FDA6271170F86AD9151,-0.828836,-0.824899,-0.815932,-0.822209,-0.807259,-0.791015,-0.794555,-0.817321,-0.826747,-0.812172,...,0.516633,0.582324,0.689155,0.788221,0.332229,0.657189,0.68974,0.60419,0.789638,1
01D6177B5D4FFE0CABA9EF17DAFC2B84,-0.828836,-0.824899,-0.815932,-0.822209,-0.807259,-0.791015,-0.794555,-0.817321,-0.826747,-0.812172,...,-1.459474,-1.458461,-1.463758,-1.465554,-1.466755,-1.466997,-1.468293,-1.463369,-1.468039,1
4B75AC4F2D8434CFF62DB64D0BB43103,-0.828836,-0.824899,-0.815932,-0.822209,-0.807259,-0.791015,-0.794555,-0.817321,-0.826747,-0.812172,...,0.839572,1.195724,1.314021,1.253836,1.377947,1.252855,1.216697,1.209608,1.144558,1
B32AC8CC6D5D805AC053557AB05F5343,-0.828836,-0.824899,-0.815932,-0.822209,-0.807259,-0.791015,-0.794555,-0.817321,-0.826747,-0.812172,...,0.960309,1.063827,1.288434,1.137711,1.30905,1.317606,1.191288,1.343474,1.159274,1
EDFC78B07BA2908B3395C4EB2304665E,0.974797,1.189786,1.165372,1.030638,1.032385,1.057883,1.100623,1.119949,0.982739,1.092215,...,1.040139,0.762741,0.725347,0.443744,0.413011,0.458308,0.52866,0.893334,0.84906,1


In [43]:
df_train, df_valid = train_test_split(df, random_state=1, test_size=0.5, shuffle=True)

In [44]:
data_train = df_train.values
data_valid = df_valid.values

In [45]:
x_train = data_train[:,:-1]
y_train = data_train[:,-1]

x_valid = data_valid[:,:-1]
y_valid = data_valid[:,-1]

In [46]:
# x_train = np.concatenate(([2*[0] for _ in range(len(x_train))], x_train), axis=1)
# x_valid = np.concatenate(([2*[0] for _ in range(len(x_valid))], x_valid), axis=1)

In [47]:
x_train.shape, x_valid.shape

((21186, 1029), (21186, 1029))

In [48]:
x_train = x_train.reshape(len(x_train), -1, 7)
x_train = np.expand_dims(x_train, 1)

x_valid = x_valid.reshape(len(x_valid), -1, 7)
x_valid = np.expand_dims(x_valid, 1)

In [49]:
x_train = x_train.transpose(0, 1, 3, 2)

In [50]:
x_valid = x_valid.transpose(0, 1, 3, 2)

In [51]:
len(x_valid), len(x_train)

(21186, 21186)

In [52]:
np.unique(y_train), np.unique(y_valid)

(array([0., 1.]), array([0., 1.]))

# Dataset

In [53]:
class ENELDataset(Dataset):
    def __init__(self, x, y):
        self.x, self.y = x, y
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        x, y = self.x[idx], self.y[idx]        
        return x.astype(np.float32), y.astype(np.int64)

In [54]:
ds_train = ENELDataset(x_train, y_train)
ds_valid = ENELDataset(x_valid, y_valid)

In [55]:
class MySampler(ImbalancedDatasetSampler):
    
    def _get_label(self, dataset, idx):
        _, y = dataset[idx]
        return y

In [56]:
dl_train = DataLoader(ds_train, 
                      sampler=MySampler(ds_train),
                      batch_size=64, pin_memory=True, num_workers=8)
dl_valid = DataLoader(ds_valid,
                      batch_size=64, pin_memory=True, num_workers=8)

# Conv

In [57]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [58]:
from torch.nn import functional as F

In [59]:
class GELU(nn.Module):
    r"""Applies the Gaussian Error Linear Units function:
    .. math::
        \text{GELU}(x) = x * \Phi(x)
    where :math:`\Phi(x)` is the Cumulative Distribution Function for Gaussian Distribution.
    Shape:
        - Input: :math:`(N, *)` where `*` means, any number of additional
          dimensions
        - Output: :math:`(N, *)`, same shape as the input
    .. image:: scripts/activation_images/GELU.png
    Examples::
        >>> m = nn.GELU()
        >>> input = torch.randn(2)
        >>> output = m(input)
    """
    def forward(self, input):
        return F.gelu(input)

In [62]:
class SelfAttentionConv(nn.Module):
    
    def __init__(self, *args, **kwargs):
        super().__init__()
        
        self.query = nn.Conv2d(*args, **kwargs)
        self.key = nn.Conv2d(*args, **kwargs)
        self.value = nn.Conv2d(*args, **kwargs)
        
        self.d = self.query.out_channels**(0.5)
        
    def forward(self, *args, **kwargs):
        query = self.query(*args, **kwargs)
        key = self.key(*args, **kwargs)
        value = self.value(*args, **kwargs)
        
        attention = torch.matmul(query, key.transpose(-2, -1))
        attention = attention / self.d
        attention = nn.Softmax(dim=-1)(attention)
        attention = torch.matmul(attention, value)
        return attention

In [63]:
class MySelfConvModel(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            self.block(1, 90),
            self.block(90,90),
            self.block(90,90),
            self.block(90,90),
#             self.block(90,90),
            nn.MaxPool2d(3)
        )
        self.classifier = nn.Sequential(
            nn.Linear(90 * 2 * 49, 90),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(90,2)
        )
        
    def block(self, in_channels, out_channels, dropout=0.5):
        return nn.Sequential(
            SelfAttentionConv(in_channels, out_channels, kernel_size=3, padding=1),  # n x out_channels x m x out_features
            nn.ReLU(),
            nn.Dropout(dropout),
        )
    
    def forward(self, x):
        o = self.net(x)
        o = o.view(x.shape[0], -1)
        o = self.classifier(o)
        return o

In [64]:
class CompleteModel(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.conv = MySelfConvModel()
        self.wide = WideLinear()
        self.classifier = nn.Linear(60 + 90, 2)
        self.dropout = nn.Dropout(0.4)
        
    def forward(self, x):
        conv = self.conv(x)
        wide = self.wide(x)
        o = torch.cat((wide, conv), dim=1)
        o = self.dropout(o)
        o = self.classifier(o)
        return o

In [65]:
class MixedConv(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            self.conv_block(1, 90),
#             self.conv_block(90,90),
            self.conv_block(90,90),
            self.attention_block(90,90),
            nn.MaxPool2d(3)
        )
        self.classifier = nn.Sequential(
            nn.Linear(90 * 2 * 49, 90),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(90,2)
        )
        
    def attention_block(self, in_channels, out_channels, dropout=0.4):
        return nn.Sequential(
            SelfAttentionConv(in_channels, out_channels, kernel_size=3, padding=1),  # n x out_channels x m x out_features
            nn.ReLU(),
            nn.Dropout(dropout),
        )
    
    def conv_block(self, in_channels, out_channels, dropout=0.4):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),  # n x out_channels x m x out_features
            nn.ReLU(),
            nn.Dropout(dropout),
        )
    
    def forward(self, x):
        o = self.net(x)
        o = o.view(x.shape[0], -1)
        o = self.classifier(o)
        return o

In [66]:
model = MySelfConvModel()

In [67]:
model.to(device)

MySelfConvModel(
  (net): Sequential(
    (0): Sequential(
      (0): SelfAttentionConv(
        (query): Conv2d(1, 90, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (key): Conv2d(1, 90, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (value): Conv2d(1, 90, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
      (1): ReLU()
      (2): Dropout(p=0.5, inplace=False)
    )
    (1): Sequential(
      (0): SelfAttentionConv(
        (query): Conv2d(90, 90, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (key): Conv2d(90, 90, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (value): Conv2d(90, 90, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
      (1): ReLU()
      (2): Dropout(p=0.5, inplace=False)
    )
    (2): Sequential(
      (0): SelfAttentionConv(
        (query): Conv2d(90, 90, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (key): Conv2d(90, 90, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        

In [68]:
x, y = next(iter(dl_train))

In [69]:
with torch.no_grad():
    o = model(x.to(device))
o.shape

torch.Size([64, 2])

# Eval

In [70]:
import numpy as np

def precision_at_k(y_true,class_probs,k,threshold=0.5,class_of_interest=1,isSorted=False):

    if (not isSorted):

        # Selecting the probs on the class of interest (coi)
        coi_probs = class_probs[:,class_of_interest]

        # print('Selecionando as probs da classe de interesse:',class_of_interest)
        # print(coi_probs)

        # print('Ordem de entrada dos targets')
        # print(y_true)

        # Sorting the coi probs and selecting the top k
        # print('Ordenando as probs e os targets correspondentes:')
        sorted_coi_probs = np.sort(coi_probs)[::-1]
        sorted_y = y_true[np.argsort(coi_probs)[::-1]]
        
        # Calculating the precision for 
        # print(sorted_coi_probs)
        # print(sorted_y)

    else:
        # Code otimization so the algorithm doesn't have to sort the data twice
        sorted_coi_probs = class_probs
        sorted_y = y_true


    # Selecting the top k probs and their respective labels
    sorted_coi_probs = sorted_coi_probs[:k]
    sorted_y = sorted_y[:k]

    # print('Selecionando as top',k,'probs')
    # print(sorted_coi_probs)
    # print(sorted_y)

    # Atributing class based on the threshold
    sorted_predicted_classes = np.where(sorted_coi_probs>threshold,
                                        float(class_of_interest),
                                        0.0)

    # print('Atribuindo a classe baseada no threshold de ',threshold)
#     print(sorted_predicted_classes)

    # print('Precisão em K:')

    # print(k)

    precisionK = np.sum(sorted_predicted_classes == sorted_y)/k  

    return precisionK

def map_at_N(y_true,class_probs,N,thrs=0.5,class_of_interest=1):

    # Calls the precision at k function with the values already sorted and calculates the average precision
    # weighted by the number of positive classes inside the sample

    # Declaring the list to hold the precisions
    pks = []

    # Selecting the probs on the class of interest (coi)
    coi_probs = class_probs[:,class_of_interest]

    # Sorting the class of interest and its respective label
    sorted_coi_probs = np.sort(coi_probs)[::-1]
    sorted_y = y_true[np.argsort(coi_probs)[::-1]]

    # Selecting the top N scores 
    sorted_coi_probs = sorted_coi_probs[:N]
    sorted_y = sorted_y[:N]


    ## TESTANDO APENAS ##
    sorted_y[-1] = 1 

    # Identifying the positions of the class of interest inside the top N
    top_coi_indexes = np.argwhere(sorted_y>0)

    for value in top_coi_indexes:
        
        # Adjusting the index
        limite = value[0] + 1
        
        pks.append(
                    precision_at_k(sorted_y[:limite],
                    sorted_coi_probs[:limite],
                    limite,threshold=thrs,isSorted=True)
                    )

    pks = np.array(pks)
    
    # print(pks)
    # print(sorted_coi_probs)
    # print(sorted_y)
    return pks.mean()

In [71]:
class MyTrainer(BaseTrainer):
    
    @staticmethod
    def dataloader_generator(dataloader):
        for x, y in dataloader:
#             x = x.unsqueeze(-1)
            x, y = to_device(x, y, device=device)
            yield {
                'inputs': {
                    'x': x
                },
                'targets': {
                    'y': y
                }
            }
        
    @staticmethod
    def loss_from_model(model_output, targets, loss_fn=None):
        y = targets['y']
        return loss_fn(model_output, y)
    
    def evaluate_fn(self, model, dataloader, loss_fn):
        model.eval()
        losses = []
        preds = []
        trues = []
        probs = []
        dl_gen = self.dataloader_generator(dataloader)
        for batch in self.tqdm(dl_gen, leave=False, desc='Eval...', total=len(dataloader)):
            inputs = batch['inputs']
            targets = batch['targets']

            with torch.no_grad():
                o = model(**inputs)

            loss = self.loss_from_model(o, targets, loss_fn)

            y = targets['y']
            
            probs += torch.softmax(o, dim=-1).detach().cpu().numpy().tolist()
            preds += o.argmax(1).detach().cpu().numpy().tolist()
            trues += y.detach().cpu().numpy().tolist()
            losses.append(loss.item())

        acc = accuracy_score(trues, preds)
        f1 = f1_score(trues, preds)
        conf = confusion_matrix(trues, preds)
        map100 = map_at_N(np.array(trues), np.array(probs), 100)
        map200 = map_at_N(np.array(trues), np.array(probs), 200)
#         return trues, preds, probs
        roc = roc_auc_score(np.array(trues), np.array(probs)[:,1])

        print('--- Validation ---')
        print(f'F1 = {f1}\t Acc = {acc}')
        print(f'MAP@100 = {map100}')
        print(f'MAP@200 = {map200}')
        print(f'ROC = {roc}')
        print(conf)
        return np.array(losses).mean(), map100

# Optim

In [72]:
import math
import torch
from torch.optim.optimizer import Optimizer, required


class RAdam(Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, degenerated_to_sgd=True):
        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if not 0.0 <= eps:
            raise ValueError("Invalid epsilon value: {}".format(eps))
        if not 0.0 <= betas[0] < 1.0:
            raise ValueError(
                "Invalid beta parameter at index 0: {}".format(betas[0]))
        if not 0.0 <= betas[1] < 1.0:
            raise ValueError(
                "Invalid beta parameter at index 1: {}".format(betas[1]))

        self.degenerated_to_sgd = degenerated_to_sgd
        if isinstance(params, (list, tuple)) and len(params) > 0 and isinstance(params[0], dict):
            for param in params:
                if 'betas' in param and (param['betas'][0] != betas[0] or param['betas'][1] != betas[1]):
                    param['buffer'] = [[None, None, None] for _ in range(10)]
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, buffer=[
                        [None, None, None] for _ in range(10)])
        super(RAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(RAdam, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError(
                        'RAdam does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(
                        p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                state['step'] += 1
                buffered = group['buffer'][int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * \
                        state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma

                    # more conservative since it's an approximated value
                    if N_sma >= 5:
                        step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (
                            N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    elif self.degenerated_to_sgd:
                        step_size = 1.0 / (1 - beta1 ** state['step'])
                    else:
                        step_size = -1
                    buffered[2] = step_size

                # more conservative since it's an approximated value
                if N_sma >= 5:
                    if group['weight_decay'] != 0:
                        p_data_fp32.add_(-group['weight_decay']
                                         * group['lr'], p_data_fp32)
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size *
                                         group['lr'], exp_avg, denom)
                    p.data.copy_(p_data_fp32)
                elif step_size > 0:
                    if group['weight_decay'] != 0:
                        p_data_fp32.add_(-group['weight_decay']
                                         * group['lr'], p_data_fp32)
                    p_data_fp32.add_(-step_size * group['lr'], exp_avg)
                    p.data.copy_(p_data_fp32)

        return loss

In [73]:
loss_fn = nn.CrossEntropyLoss(torch.tensor([1., 1.]).to(device))
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=2e-5)
# optimizer = torch.optim.SGD(model.parameters(), lr=5e-4, weight_decay=2e-4)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, 5e-4, 1e-3, step_size_up=3*len(dl_train), cycle_momentum=False)

In [74]:
args = TrainArgs(100, 'enel_selfatt_conv_boxcox_split80.ckp')

In [75]:
trainer = MyTrainer(grad_steps=1)

In [76]:
model.load_state_dict(torch.load('enel_selfatt_conv_boxcox_split50.ckp'), strict=False)

<All keys matched successfully>

In [40]:
# optimizer.load_state_dict(torch.load('enel_selfatt_conv_10_boxcox_optimizer.ckp'))

In [77]:
trainer.evaluate_fn(model, dl_valid, loss_fn)

HBox(children=(IntProgress(value=0, description='Eval...', max=332, style=ProgressStyle(description_width='ini…

--- Validation ---
F1 = 0.4344885883347422	 Acc = 0.905267629566695
MAP@100 = 0.9919810663885326
MAP@200 = 0.9625359408012584
ROC = 0.7888609094529886
[[18408   952]
 [ 1055   771]]


(0.30554942591452455, 0.9919810663885326)

In [42]:
trainer.train(args, model, dl_train, dl_valid, optimizer, loss_fn, scheduler=scheduler)

HBox(children=(IntProgress(value=0, description='Training...', style=ProgressStyle(description_width='initial'…

HBox(children=(IntProgress(value=0, max=530), HTML(value='')))

-- Train Loss 0.5963239251442676


HBox(children=(IntProgress(value=0, description='Eval...', max=133, style=ProgressStyle(description_width='ini…

--- Validation ---
F1 = 0.30082543564659125	 Acc = 0.7301474926253687
MAP@100 = 0.7369067196199709
MAP@200 = 0.6735970384084653
ROC = 0.7750521960837545
[[5696 2040]
 [ 247  492]]
---Valid
Loss 0.6112719352980306
Metric 0.7369067196199709
Saved new checkpoint


HBox(children=(IntProgress(value=0, max=530), HTML(value='')))

-- Train Loss 0.5370396554470063


HBox(children=(IntProgress(value=0, description='Eval...', max=133, style=ProgressStyle(description_width='ini…

--- Validation ---
F1 = 0.34945631796025495	 Acc = 0.7952802359882006
MAP@100 = 0.8358511404922284
MAP@200 = 0.7831746323508135
ROC = 0.7939919054089416
[[6274 1462]
 [ 273  466]]
---Valid
Loss 0.5396848748949238
Metric 0.8358511404922284
Saved new checkpoint


HBox(children=(IntProgress(value=0, max=530), HTML(value='')))

-- Train Loss 0.5029909178895771


HBox(children=(IntProgress(value=0, description='Eval...', max=133, style=ProgressStyle(description_width='ini…

--- Validation ---
F1 = 0.37488869100623323	 Acc = 0.8343362831858407
MAP@100 = 0.8467411214210699
MAP@200 = 0.8014969838286127
ROC = 0.8007679506250237
[[6650 1086]
 [ 318  421]]
---Valid
Loss 0.47484045884663
Metric 0.8467411214210699
Saved new checkpoint


HBox(children=(IntProgress(value=0, max=530), HTML(value='')))

-- Train Loss 0.47330415732455705


HBox(children=(IntProgress(value=0, description='Eval...', max=133, style=ProgressStyle(description_width='ini…

--- Validation ---
F1 = 0.377814845704754	 Acc = 0.823952802359882
MAP@100 = 0.8665521766776687
MAP@200 = 0.8264995672785402
ROC = 0.807388492092923
[[6530 1206]
 [ 286  453]]
---Valid
Loss 0.4362959982757282
Metric 0.8665521766776687
Saved new checkpoint


HBox(children=(IntProgress(value=0, max=530), HTML(value='')))

-- Train Loss 0.42448259355886925


HBox(children=(IntProgress(value=0, description='Eval...', max=133, style=ProgressStyle(description_width='ini…

--- Validation ---
F1 = 0.37835534725181086	 Acc = 0.8278466076696165
MAP@100 = 0.871309688653996
MAP@200 = 0.834986385521709
ROC = 0.8136115281977797
[[6572 1164]
 [ 295  444]]
---Valid
Loss 0.43927088379859924
Metric 0.871309688653996
Saved new checkpoint


HBox(children=(IntProgress(value=0, max=530), HTML(value='')))

-- Train Loss 0.39533989496950833


HBox(children=(IntProgress(value=0, description='Eval...', max=133, style=ProgressStyle(description_width='ini…

--- Validation ---
F1 = 0.4154447702834799	 Acc = 0.8588790560471976
MAP@100 = 0.9054059952239062
MAP@200 = 0.8646331665164813
ROC = 0.8075156588251263
[[6854  882]
 [ 314  425]]
---Valid
Loss 0.382755560534341
Metric 0.9054059952239062
Saved new checkpoint


HBox(children=(IntProgress(value=0, max=530), HTML(value='')))

KeyboardInterrupt: 