<a href="https://colab.research.google.com/github/edypidy/SkyElephant-not-a-FlyingElephant/blob/main/CustomModel/BiconFTTransformer_BaseLine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Sun Nov 27 19:10:07 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P8    11W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from google.colab import drive
drive.mount('/content/drive')

import os
import sys

PATH = os.path.join(os.getcwd(), '/drive/MyDrive/DataScience_Midterm_Project')
sys.path.append(PATH)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install einops

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [64]:
import warnings
warnings.filterwarnings("ignore")

from tqdm.auto import tqdm
import random

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from einops import repeat


from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix, make_scorer
from sklearn.preprocessing import StandardScaler

In [5]:
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# DATASET

In [55]:
# train_df = pd.read_csv(f'{PATH}/train/train.csv')
# test_df = pd.read_csv(f'{PATH}/test_without_labels/test_without_labels.csv')
train_df = pd.read_csv('/content/drive/MyDrive/DataScience_Midterm_Project/train/train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/DataScience_Midterm_Project/test_without_labels/test_without_labels.csv')

num_features = ['Age', 'RestingBP', 'MaxHR', 'Oldpeak']
cat_features = ['Sex', 'FastingBS', 'ExerciseAngina', 'ChestPainType', 'RestingECG', 'ST_Slope']
target = ['HeartDisease', 'Cholesterol']

In [56]:
def class_mapping(df, feature):
    class_map = {cls:val for val,cls in enumerate(df[feature].unique())}
    print(f'{feature} => {class_map}')
    return df[feature].map(class_map), class_map

categories = []
for feature in cat_features:
    train_df[feature], class_map = class_mapping(train_df, feature)
    test_df[feature] = test_df[feature].map(class_map)

    # categories class num
    categories.append(len(class_map))
categories = tuple(categories)

Sex => {'M': 0, 'F': 1}
FastingBS => {0: 0, 1: 1}
ExerciseAngina => {'N': 0, 'Y': 1}
ChestPainType => {'ATA': 0, 'NAP': 1, 'ASY': 2, 'TA': 3}
RestingECG => {'Normal': 0, 'ST': 1, 'LVH': 2}
ST_Slope => {'Up': 0, 'Flat': 1, 'Down': 2}


In [16]:
# def prep_df(df, train=True):
#     # 인위적 처리
#     df = df[df['RestingBP'] != 0]
#     if train:
#         df = df[df['Cholesterol'] != 0]
#     df['RestingBP'] = np.round(df['RestingBP']*2, decimals=-1)/2
#     return df

# train_df = prep_df(train_df, train=True)
# test_df = prep_df(test_df, train=False)

In [58]:
train_df, valid_df, train_labels, valid_labels = train_test_split(train_df[num_features + cat_features], 
                                                                  train_df[target[0]], # target[0] : HeartDisease 
                                                                  test_size=0.2, 
                                                                  random_state=42,
                                                                  )

In [59]:
bicons = [lambda x: x['ChestPainType'] == 2,
          lambda x: x['Oldpeak'] == 0,
          lambda x: x['ST_Slope'] == 0,]

class CustomDataset(Dataset):
    def __init__(self, df, labels=None, bicons=bicons, cat_features=cat_features, num_features=num_features):
        self.cat_df = df[cat_features]
        self.num_df = df[num_features]
        self.labels = labels
        
        bicon_df = []
        for bicon in bicons:
            bicon_df.append(df.apply(bicon, axis=1))
        self.bicon_df = pd.concat(bicon_df, axis=1)


    def __getitem__(self, index):
                
        if self.labels is not None:
            x_categ = torch.Tensor(self.cat_df.iloc[index])
            x_numer = torch.Tensor(self.num_df.iloc[index])
            x_bicon = torch.Tensor(self.bicon_df.iloc[index])
            label = self.labels.iloc[index]
            return x_categ, x_numer, x_bicon, label
        else:
            x_categ = torch.Tensor(self.cat_df.iloc[index])
            x_numer = torch.Tensor(self.num_df.iloc[index])
            x_bicon = torch.Tensor(self.bicon_df.iloc[index])
            return x_categ, x_numer, x_bicon
        
    def __len__(self):
        return len(self.cat_df)

In [60]:
bicons = [lambda x: x['ChestPainType'] == 2,
          lambda x: x['Oldpeak'] == 0,
          lambda x: x['ST_Slope'] == 0,]

train_dataset = CustomDataset(train_df, train_labels)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)

valid_dataset = CustomDataset(valid_df, valid_labels)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, num_workers=0)

# MODEL

In [61]:
# numerical embedder

class NumericalEmbedder(nn.Module):
    def __init__(self, dim, num_numerical_types):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(num_numerical_types, dim))
        self.biases = nn.Parameter(torch.randn(num_numerical_types, dim))

    def forward(self, x):
        x = x.unsqueeze(-1)
        return x * self.weights + self.biases


# Feedforward

class GEGLU(nn.Module):
    def forward(self, x):
        x, gates = x.chunk(2, dim = -1)
        return x * F.gelu(gates)

class FeedForward(nn.Module):
    def __init__(self, in_dim, hidden_mult = 4, dropout = 0.):
        super().__init__()
        self.Layer1 = nn.Sequential(nn.LayerNorm(in_dim),
                                    nn.Linear(in_dim, in_dim*hidden_mult*2),
                                    GEGLU(),
                                    nn.Dropout(dropout))
        self.Layer2 = nn.Linear(in_dim*hidden_mult, in_dim)
        self.norm = nn.LayerNorm(in_dim)
    
    def forward(self, x):
        output = self.Layer1(x)
        output = self.Layer2(output)
        output = self.norm(output)
        output = output + x # residual
        return output


# Attention for Binary Conditions

class Attention(nn.Module):
    def __init__(self, embed_dim, num_heads=8, dropout=0.):
        super().__init__()
        self.norm = nn.LayerNorm(embed_dim)
        self.attn = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads, dropout=dropout, batch_first=True)

    def forward(self, x, k, v):
        output = self.attn(x,k,v)[0]
        output = self.norm(output)
        output = output + x # residual
        return output


# Self Attention

class SelfAttention(nn.Module):
    def __init__(self, embed_dim, num_heads=8, dropout=0.):
        super().__init__()
        self.norm = nn.LayerNorm(embed_dim)
        self.attn = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads, dropout=dropout, batch_first=True)

    def forward(self, x):
        output = self.attn(x,x,x)[0]
        output = self.norm(output)
        output = output + x # residual
        return output


# Transformer

class Transformer(nn.Module):
    def __init__(self, embed_dim, depth, num_heads, attn_dropout, ff_dropout):
        super().__init__()
        self.layers = nn.ModuleList([])

        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                SelfAttention(embed_dim, num_heads=num_heads, dropout=attn_dropout),
                FeedForward(embed_dim, dropout=ff_dropout),
            ]))

    def forward(self, x):
        for attn, ff in self.layers:
            x = attn(x)
            x = ff(x)

        return x

In [62]:
class BiconFTTransformer(nn.Module):
    def __init__(self, *,
        categories,
        num_continuous,
        num_bicons, # Number of Binary Conditions (Input)
        embed_dim = 16,
        depth = 2,
        heads = 8,
        dim_out = 1,
        num_special_tokens = 2,
        attn_dropout = 0.,
        ff_dropout = 0.):
        
        super().__init__()

        # Treat Categories

        self.num_categories = len(categories)
        self.num_unique_categories = sum(categories)

        # Create category embeddings table

        self.num_special_tokens = num_special_tokens # Since add categories_offset to x_categories, first 'num_special_tokens' special tokens mean NA
        total_tokens = self.num_unique_categories + num_special_tokens
        # embedding table
        self.categorical_embeds = nn.Embedding(total_tokens, embed_dim) # LookUp Table : total_tokens x embed_dim

        # offset of categories for the categories embedding table like positional encoding (Alternative methodology from paper)
        categories_offset = F.pad(torch.tensor(list(categories)), (1, 0), value = num_special_tokens)
        categories_offset = categories_offset.cumsum(dim = -1)[:-1] # by cumsuming so every category is distinguished
        self.register_buffer('categories_offset', categories_offset) # categories offset must be unlearnable


        # Treat Continuous

        self.numerical_embedder = NumericalEmbedder(embed_dim, num_continuous)
        

        # Treat Binary Condition

        self.bicon_embeds = nn.Embedding(2*num_bicons, embed_dim)
        bicon_offset = torch.arange(0,2*num_bicons,2) # every Binary Condition is distinguished
        self.register_buffer('bicon_offset', bicon_offset) # bicon offset must be unlearnable


        # cls token

        self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))


        # FeedForward & Attention for Bicon

        self.feedfoward = FeedForward(in_dim=embed_dim,
                                      hidden_mult = 4,
                                      dropout = 0.)

        self.attention = Attention(embed_dim=embed_dim,
                                   num_heads=8,
                                   dropout=0.)


        # Transformer

        self.transformer = Transformer(embed_dim=embed_dim,
                                       depth=depth,
                                       num_heads=heads,
                                       attn_dropout=attn_dropout,
                                       ff_dropout=ff_dropout,
                                       )


        # To logits

        self.to_logits = nn.Sequential(nn.LayerNorm(embed_dim),
                                       nn.ReLU(),
                                       nn.Linear(embed_dim, dim_out)
                                       )




    def forward(self, x_categ, x_numer, x_bicon):
        b = x_categ.shape[0] # batch size

        assert x_categ.shape[-1] == self.num_categories, f'you must pass in {self.num_categories} values for your categories input'
        x_categ += self.categories_offset

        x_categ = self.categorical_embeds(x_categ) # Categories Embedding is 'LookUp Table' method => batch x categ_col_nums x embed_dim

        # add numerically embedded tokens

        x_numer = self.numerical_embedder(x_numer)

        # concat categorical and numerical

        x = torch.cat((x_categ, x_numer), dim = 1)

        # Append cls tokens by batch == torch.cat([self.cls_token for _ in range(b)], dim=0)

        cls_tokens = repeat(self.cls_token, '1 1 d -> b 1 d', b = b)
        x = torch.cat((cls_tokens, x), dim = 1)

        # Tabular transformer

        x = self.transformer(x)

        # bicon

        x_bicon += self.bicon_offset
        x_bicon = self.bicon_embeds(x_bicon)
        x = self.attention(x, x_bicon, x_bicon)
        x = self.feedfoward(x)

        # get cls token

        x = x[:, 0]


        return self.to_logits(x)

# CONFIG Model

In [63]:
model = BiconFTTransformer(categories = categories,
                           num_continuous = len(num_features),
                           num_bicons = len(bicons), # Number of Binary Conditions (Input)
                           embed_dim = 16,
                           depth = 2,
                           heads = 8,
                           dim_out = 1,
                           num_special_tokens = 2,
                           attn_dropout = 0.,
                           ff_dropout = 0.,
                          )          

# TRAIN

In [74]:
TRAINING_CFG = {
    'EPOCHS' : 30,
    'LEARNING_RATE' : 5e-3,

    'SCHEDULER' : {
        'mode':'max',
        'factor':0.5,
        'patience':1,
        'threshold_mode':'abs',
        'min_lr':1e-8,
        'verbose':True,
    },
    
    'VALIDATION_CFG' : {
        'THRESHOLD' : 0.5,
    },
}

In [75]:
def train(model, optimizer, train_loader, val_loader, device, scheduler, cfg):
    model.to(device)
    criterion = nn.BCEWithLogitsLoss().to(device)
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, cfg['EPOCHS']+1):
        model.train()
        train_loss = []
        for x_categ, x_numer, x_bicon, label in tqdm(iter(train_loader)):

            x_categ = x_categ.int().to(device)
            x_numer = x_numer.float().to(device)
            x_bicon = x_bicon.int().to(device)
            label = label.float().to(device)
            
            optimizer.zero_grad()
            
            model_pred = model(x_categ, x_numer, x_bicon)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_score = validation(model, criterion, val_loader, device, cfg['VALIDATION_CFG'])
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] Val Score : [{val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_score)
        
        if best_score < val_score:
            best_score = val_score
            best_model = model
    
    return best_model

In [76]:
def validation(model, criterion, val_loader, device, cfg):
    model.eval()
    pred_labels = []
    true_labels = []
    val_loss = []
    threshold = cfg['THRESHOLD']
    with torch.no_grad():
        for x_categ, x_numer, x_bicon, label in tqdm(iter(val_loader)):
            true_labels += label.tolist()
            
            x_categ = x_categ.int().to(device)
            x_numer = x_numer.float().to(device)
            x_bicon = x_bicon.int().to(device)
            label = label.float().to(device)
            
            model_pred = model(x_categ, x_numer, x_bicon)
            
            loss = criterion(model_pred, label.reshape(-1,1))
            
            val_loss.append(loss.item())
            
            model_pred = model_pred.squeeze(1).to('cpu')  
            pred_labels += model_pred.tolist()
    
    pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
    val_score = accuracy_score(y_true=true_labels, y_pred=pred_labels)
    return np.mean(val_loss), val_score

In [77]:
model = nn.DataParallel(model)
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = TRAINING_CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, **TRAINING_CFG['SCHEDULER'])

In [78]:
infer_model = train(model, optimizer, train_loader, valid_loader, device, scheduler, TRAINING_CFG)

  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.37017] Val Loss : [0.39265] Val Score : [0.82317]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.36962] Val Loss : [0.38418] Val Score : [0.83537]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.38661] Val Loss : [0.40575] Val Score : [0.83537]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.36806] Val Loss : [0.40059] Val Score : [0.78659]
Epoch 00004: reducing learning rate of group 0 to 2.5000e-03.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.35652] Val Loss : [0.39159] Val Score : [0.83537]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.35708] Val Loss : [0.39686] Val Score : [0.83537]
Epoch 00006: reducing learning rate of group 0 to 1.2500e-03.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.35489] Val Loss : [0.39530] Val Score : [0.83537]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.35251] Val Loss : [0.39781] Val Score : [0.83537]
Epoch 00008: reducing learning rate of group 0 to 6.2500e-04.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.35204] Val Loss : [0.39665] Val Score : [0.85976]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.35055] Val Loss : [0.39634] Val Score : [0.85976]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.35054] Val Loss : [0.39734] Val Score : [0.81098]
Epoch 00011: reducing learning rate of group 0 to 3.1250e-04.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.35012] Val Loss : [0.39708] Val Score : [0.84756]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.34961] Val Loss : [0.39565] Val Score : [0.85976]
Epoch 00013: reducing learning rate of group 0 to 1.5625e-04.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.34879] Val Loss : [0.39530] Val Score : [0.85366]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.34862] Val Loss : [0.39544] Val Score : [0.85976]
Epoch 00015: reducing learning rate of group 0 to 7.8125e-05.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.34897] Val Loss : [0.39537] Val Score : [0.85976]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.34914] Val Loss : [0.39602] Val Score : [0.84146]
Epoch 00017: reducing learning rate of group 0 to 3.9063e-05.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.34930] Val Loss : [0.39575] Val Score : [0.85366]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.34842] Val Loss : [0.39609] Val Score : [0.84146]
Epoch 00019: reducing learning rate of group 0 to 1.9531e-05.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.34797] Val Loss : [0.39600] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.34834] Val Loss : [0.39606] Val Score : [0.84146]
Epoch 00021: reducing learning rate of group 0 to 9.7656e-06.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.34840] Val Loss : [0.39603] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.34834] Val Loss : [0.39603] Val Score : [0.84146]
Epoch 00023: reducing learning rate of group 0 to 4.8828e-06.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.34857] Val Loss : [0.39603] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.34791] Val Loss : [0.39606] Val Score : [0.84146]
Epoch 00025: reducing learning rate of group 0 to 2.4414e-06.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.34795] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.34835] Val Loss : [0.39605] Val Score : [0.84146]
Epoch 00027: reducing learning rate of group 0 to 1.2207e-06.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.34782] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.34802] Val Loss : [0.39604] Val Score : [0.84146]
Epoch 00029: reducing learning rate of group 0 to 6.1035e-07.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [30], Train Loss : [0.34827] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [31], Train Loss : [0.34861] Val Loss : [0.39605] Val Score : [0.84146]
Epoch 00031: reducing learning rate of group 0 to 3.0518e-07.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [32], Train Loss : [0.34777] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [33], Train Loss : [0.34823] Val Loss : [0.39605] Val Score : [0.84146]
Epoch 00033: reducing learning rate of group 0 to 1.5259e-07.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [34], Train Loss : [0.34789] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [35], Train Loss : [0.34801] Val Loss : [0.39605] Val Score : [0.84146]
Epoch 00035: reducing learning rate of group 0 to 7.6294e-08.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [36], Train Loss : [0.34796] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [37], Train Loss : [0.34908] Val Loss : [0.39605] Val Score : [0.84146]
Epoch 00037: reducing learning rate of group 0 to 3.8147e-08.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [38], Train Loss : [0.34872] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [39], Train Loss : [0.34775] Val Loss : [0.39605] Val Score : [0.84146]
Epoch 00039: reducing learning rate of group 0 to 1.9073e-08.


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [40], Train Loss : [0.34871] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [41], Train Loss : [0.34862] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [42], Train Loss : [0.34813] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [43], Train Loss : [0.34819] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [44], Train Loss : [0.34888] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [45], Train Loss : [0.34804] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [46], Train Loss : [0.34887] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [47], Train Loss : [0.34833] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [48], Train Loss : [0.34823] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [49], Train Loss : [0.34835] Val Loss : [0.39605] Val Score : [0.84146]


  0%|          | 0/41 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Epoch [50], Train Loss : [0.34837] Val Loss : [0.39605] Val Score : [0.84146]
