### NOTE: 此模板為 ML 2021 HW1 之模板
reference: https://github.com/ga642381/ML2021-Spring/blob/main/HW01/HW01.ipynb 

### Download data source

# Package

In [92]:
# Numerical Operations
import math
import numpy as np
import random

# Reading/Writing Data
import pandas as pd
import os
import csv

# for saving record
from datetime import datetime, timedelta

from mlxtend.preprocessing import standardize

# Pytorch
import torch 
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split, ConcatDataset
import torch.nn.functional as F

from sklearn.metrics import confusion_matrix, precision_score, PrecisionRecallDisplay
import json
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

In [93]:
# !gdown '1-GOOjbfKwV8ZF9GGFM0plkD6QB-VIGh8' --output x_training.npy
# !gdown '1-J9BpHAc8UcH1qospqDdtmbix9AXqbgm' --output y_training.npy
# !gdown '1-7yBuAfJSQNy1mmOPxm5cUTCsaOhYAQO' --output x_testing.npy
# !gdown '1-3fX5qDlhkni5APiAcNRnr0wxzWKBNPB' --output y_testing.npy
!gdown '1N7DXMf6s0JZhJIe0cahjShK9Q6v9DJmD' --output x_training_new.npy
!gdown '1--0LkjbYBawOk5eWf7JSaY5ZyOpNnVRB' --output y_training_new.npy
!gdown '1eptcZzxSSU8x62zLiA-2H3blIssJbz0U' --output x_testing_new.npy
!gdown '1-0Oysf7ifne8vrVvbJzWHI3hybs-cElt' --output y_testing_new.npy

Downloading...
From: https://drive.google.com/uc?id=1N7DXMf6s0JZhJIe0cahjShK9Q6v9DJmD
To: /content/x_training_new.npy
100% 69.4M/69.4M [00:00<00:00, 184MB/s]
Downloading...
From: https://drive.google.com/uc?id=1--0LkjbYBawOk5eWf7JSaY5ZyOpNnVRB
To: /content/y_training_new.npy
100% 20.8M/20.8M [00:00<00:00, 226MB/s]
Downloading...
From: https://drive.google.com/uc?id=1eptcZzxSSU8x62zLiA-2H3blIssJbz0U
To: /content/x_testing_new.npy
100% 145M/145M [00:00<00:00, 185MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-0Oysf7ifne8vrVvbJzWHI3hybs-cElt
To: /content/y_testing_new.npy
100% 43.5M/43.5M [00:00<00:00, 221MB/s]


In [94]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
config = {
    'seed': 13423,      
    'valid_ratio': 0.4,  
    'n_epochs': 100,        
    'batch_size': 64, 
    'learning_rate': 1e-2,          
    'early_stop': 40,    
    'save_path': './model.ckpt',
    'momentum': 0.9,
    'weight_decay': 0,
    'input_size': 10, # feature count
}

In [95]:
def same_seed(seed): 
    '''Fixes random number generator seeds for reproducibility.'''
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

def train_valid_split(data_set, valid_ratio, seed):
    '''Split provided training data into training set and validation set'''
    valid_set_size = int(valid_ratio * len(data_set)) 
    train_set_size = len(data_set) - valid_set_size
    train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed))
    return np.array(train_set), np.array(valid_set)

# Read Dataset

In [96]:
x_training = np.load('./x_training_new.npy', allow_pickle=True)
y_training = np.load('./y_training_new.npy', allow_pickle=True)

x_training.shape, y_training.shape

((867676, 10), (867676, 3))

In [97]:
x_testing = np.load('./x_testing_new.npy', allow_pickle=True)
y_testing = np.load('./y_testing_new.npy', allow_pickle=True)

x_testing.shape, y_testing.shape

((1814557, 10), (1814557, 3))

In [98]:
x_training = x_training[:, 1:]
x_testing = x_testing[:, 1:]

x_training.shape, x_testing.shape

((867676, 9), (1814557, 9))

In [99]:
x_training = standardize(x_training, columns=[0, 1, 2, 3, 4, 5, 6, 7, 8])
x_testing = standardize(x_testing, columns=[0, 1, 2, 3, 4, 5, 6, 7, 8])

In [100]:
# x_training
# np.amin(x_training, axis=0
# x_training = x_training - x_training.min(axis=0)
# x_testing = x_testing - x_testing.min(axis=0)

In [101]:
y_training_argmax = np.argmax(y_training, axis=1)

unique, counts = np.unique(y_training_argmax, return_counts=True)
dict(zip(unique, counts))

{0: 401806, 1: 435300, 2: 30570}

In [102]:
y_training_argmax = np.argmax(y_training, axis=1)

a_indexes = np.where(y_training_argmax == 0)[0]
p_indexes = np.where(y_training_argmax == 1)[0]
l_indexes = np.where(y_training_argmax == 2)[0]

min_label_cnt = min(len(a_indexes), len(p_indexes), len(l_indexes))

sample_a_indexes = np.random.choice(a_indexes, size=min_label_cnt, replace=False)
sample_p_indexes = np.random.choice(p_indexes, size=min_label_cnt, replace=False)
sample_l_indexes = np.random.choice(l_indexes, size=min_label_cnt, replace=False)

new_indexes = np.concatenate((sample_a_indexes, sample_p_indexes, sample_l_indexes), axis=0)

new_x_training = x_training[new_indexes, :]
new_y_training = y_training[new_indexes, :]

len(new_x_training), len(new_y_training)

(91710, 91710)

In [103]:
tmp = np.argmax(new_y_training, axis=1)

unique, counts = np.unique(tmp, return_counts=True)
dict(zip(unique, counts))

{0: 30570, 1: 30570, 2: 30570}

In [104]:
# 05.31 edited: 因為沒有AN、AR、AS，只有AA、AP、AL，所以 y 的每個維度代表的標的順序是 A、P、L。

# N A P L R S
# [0, 0.1, 0.5, 0.3, 0.2, 0.4]

# Build model

In [105]:
class SimpleNN_Model(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN_Model, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.Dropout(0.5), 
            nn.ReLU(), 
            # nn.Linear(64, 128),
            # nn.Dropout(0.2), 
            # nn.ReLU(),
            nn.Linear(64, 16),
            nn.Dropout(0.3), 
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.Dropout(0.2), 
            nn.ReLU(),
            nn.Linear(8, 3),
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.layers(x)
        x = x.squeeze(1) # (B, 1) -> (B)
        x = self.sigmoid(x)
        return x

# Fit dataset

In [106]:
class CustomDataset(Dataset):
    def __init__(self, x, y=None):
        if y is None:
            self.y = y
        else:
            self.y = torch.FloatTensor(y)
        self.x = torch.FloatTensor(x)

    def __getitem__(self, idx):
        if self.y is None:
            return self.x[idx]
        else:
            return self.x[idx], self.y[idx]

    def __len__(self):
        return len(self.x)

In [107]:
def trainer(train_loader, valid_loader, model, config, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'], betas=(0.9, 0.999), eps=1e-08, weight_decay=config['weight_decay'], amsgrad=True)
    n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0
    writer = SummaryWriter()

    for epoch in range(n_epochs):
        model.train()
        loss_record = []

        # tqdm is a package to visualize your training progress.
        train_pbar = tqdm(train_loader, position=0, leave=True)

        for x, y in train_pbar:
            optimizer.zero_grad()               # Set gradient to zero.
            x, y = x.to(device), y.to(device)   # Move your data to device. 
            pred = model(x)  
            # print(pred)           
            loss = criterion(pred, y)
            loss.backward()                     # Compute gradient (backpropagation).
            optimizer.step()                    # Update parameters.
            step += 1
            loss_record.append(loss.detach().item())
            
            # Display current epoch number and loss on tqdm progress bar.
            train_pbar.set_description(f'Epoch [{epoch + 1} / {n_epochs}]')
            train_pbar.set_postfix({'loss': loss.detach().item()})

        mean_train_loss = sum(loss_record) / len(loss_record)
        writer.add_scalar('Loss / train', mean_train_loss, step)

        model.eval() # Set your model to evaluation mode.
        loss_record = []
        for x, y in valid_loader:
            x, y = x.to(device), y.to(device)
            with torch.no_grad():
                pred = model(x)
                loss = criterion(pred, y)

            loss_record.append(loss.item())
            
        mean_valid_loss = sum(loss_record) / len(loss_record)
        print(f'Epoch [{epoch + 1} / {n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}')
        writer.add_scalar('Loss / valid', mean_valid_loss, step)

        if mean_valid_loss < best_loss:
            best_loss = mean_valid_loss
            torch.save(model.state_dict(), config['save_path']) # Save your best model
            print('Saving model with loss {:.3f}...'.format(best_loss))
            early_stop_count = 0
        else: 
            early_stop_count += 1

        print('current best loss is:', best_loss)
        if early_stop_count >= config['early_stop']:
            print('\nModel is not improving, so we halt the training session.')
            print('best loss is:', best_loss)
            return best_loss
    
    return best_loss

In [108]:
def predict(test_loader, model, device):
    model.eval() # Set your model to evaluation mode.
    preds = []
    for x in tqdm(test_loader):
        x = x[0].to(device)                        
        with torch.no_grad():                   
            pred = model(x)                     
            preds.append(pred.detach().cpu())   
    preds = torch.cat(preds, dim=0).numpy()  
    return preds

In [109]:
same_seed(config['seed'])

x_train, x_valid = train_valid_split(new_x_training, config['valid_ratio'], config['seed'])
y_train, y_valid = train_valid_split(new_y_training, config['valid_ratio'], config['seed'])

# Print out the data size.
print(f"""train_data size: {x_train.shape}
valid_data size: {x_valid.shape}""")

# Print out the number of features.
# print(f'number of features: {x_train.shape[2]}')

train_dataset, valid_dataset, test_dataset = CustomDataset(x_train, y_train), \
                                             CustomDataset(x_valid, y_valid), \
                                             CustomDataset(x_testing, y_testing)

# Pytorch data loader loads pytorch dataset into batches.
train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True, drop_last=False)
valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True, drop_last=False)
test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, pin_memory=True, drop_last=False)

train_data size: (55026, 9)
valid_data size: (36684, 9)


### Simple NN Training

In [110]:
same_seed(config['seed'])
model = SimpleNN_Model(input_dim=9).to(device)
trainer(train_loader, valid_loader, model, config, device)

Epoch [1 / 100]: 100%|██████████| 860/860 [00:08<00:00, 105.94it/s, loss=1.08]


Epoch [1 / 100]: Train loss: 1.0973, Valid loss: 1.0945
Saving model with loss 1.095...
current best loss is: 1.0945465550190066


Epoch [2 / 100]: 100%|██████████| 860/860 [00:07<00:00, 107.74it/s, loss=1.1]


Epoch [2 / 100]: Train loss: 1.0955, Valid loss: 1.0923
Saving model with loss 1.092...
current best loss is: 1.0923445338986892


Epoch [3 / 100]: 100%|██████████| 860/860 [00:08<00:00, 95.90it/s, loss=1.12]


Epoch [3 / 100]: Train loss: 1.0952, Valid loss: 1.0904
Saving model with loss 1.090...
current best loss is: 1.0903965035382048


Epoch [4 / 100]: 100%|██████████| 860/860 [00:08<00:00, 106.56it/s, loss=1.09]


Epoch [4 / 100]: Train loss: 1.0935, Valid loss: 1.0909
current best loss is: 1.0903965035382048


Epoch [5 / 100]: 100%|██████████| 860/860 [00:08<00:00, 99.04it/s, loss=1.09] 


Epoch [5 / 100]: Train loss: 1.0935, Valid loss: 1.0929
current best loss is: 1.0903965035382048


Epoch [6 / 100]: 100%|██████████| 860/860 [00:07<00:00, 108.44it/s, loss=1.1]


Epoch [6 / 100]: Train loss: 1.0940, Valid loss: 1.0908
current best loss is: 1.0903965035382048


Epoch [7 / 100]: 100%|██████████| 860/860 [00:08<00:00, 106.19it/s, loss=1.08]


Epoch [7 / 100]: Train loss: 1.0940, Valid loss: 1.0888
Saving model with loss 1.089...
current best loss is: 1.0887813665725627


Epoch [8 / 100]: 100%|██████████| 860/860 [00:08<00:00, 104.91it/s, loss=1.11]


Epoch [8 / 100]: Train loss: 1.0928, Valid loss: 1.0884
Saving model with loss 1.088...
current best loss is: 1.0884045239940339


Epoch [9 / 100]: 100%|██████████| 860/860 [00:08<00:00, 104.06it/s, loss=1.12]


Epoch [9 / 100]: Train loss: 1.0930, Valid loss: 1.0906
current best loss is: 1.0884045239940339


Epoch [10 / 100]: 100%|██████████| 860/860 [00:08<00:00, 105.33it/s, loss=1.1]


Epoch [10 / 100]: Train loss: 1.0931, Valid loss: 1.0887
current best loss is: 1.0884045239940339


Epoch [11 / 100]: 100%|██████████| 860/860 [00:08<00:00, 104.29it/s, loss=1.09]


Epoch [11 / 100]: Train loss: 1.0927, Valid loss: 1.0875
Saving model with loss 1.088...
current best loss is: 1.0875344874551487


Epoch [12 / 100]: 100%|██████████| 860/860 [00:08<00:00, 103.06it/s, loss=1.09]


Epoch [12 / 100]: Train loss: 1.0926, Valid loss: 1.0881
current best loss is: 1.0875344874551487


Epoch [13 / 100]: 100%|██████████| 860/860 [00:08<00:00, 107.14it/s, loss=1.08]


Epoch [13 / 100]: Train loss: 1.0922, Valid loss: 1.0889
current best loss is: 1.0875344874551487


Epoch [14 / 100]: 100%|██████████| 860/860 [00:08<00:00, 102.22it/s, loss=1.08]


Epoch [14 / 100]: Train loss: 1.0920, Valid loss: 1.0886
current best loss is: 1.0875344874551487


Epoch [15 / 100]: 100%|██████████| 860/860 [00:08<00:00, 106.05it/s, loss=1.08]


Epoch [15 / 100]: Train loss: 1.0914, Valid loss: 1.0872
Saving model with loss 1.087...
current best loss is: 1.0872392668956663


Epoch [16 / 100]: 100%|██████████| 860/860 [00:08<00:00, 98.92it/s, loss=1.1] 


Epoch [16 / 100]: Train loss: 1.0922, Valid loss: 1.0872
Saving model with loss 1.087...
current best loss is: 1.0871519087083663


Epoch [17 / 100]: 100%|██████████| 860/860 [00:08<00:00, 104.91it/s, loss=1.08]


Epoch [17 / 100]: Train loss: 1.0916, Valid loss: 1.0869
Saving model with loss 1.087...
current best loss is: 1.08688328365831


Epoch [18 / 100]: 100%|██████████| 860/860 [00:08<00:00, 99.80it/s, loss=1.08] 


Epoch [18 / 100]: Train loss: 1.0918, Valid loss: 1.0871
current best loss is: 1.08688328365831


Epoch [19 / 100]: 100%|██████████| 860/860 [00:08<00:00, 106.83it/s, loss=1.11]


Epoch [19 / 100]: Train loss: 1.0923, Valid loss: 1.0865
Saving model with loss 1.087...
current best loss is: 1.0865493701310107


Epoch [20 / 100]: 100%|██████████| 860/860 [00:08<00:00, 102.81it/s, loss=1.1]


Epoch [20 / 100]: Train loss: 1.0910, Valid loss: 1.0873
current best loss is: 1.0865493701310107


Epoch [21 / 100]: 100%|██████████| 860/860 [00:08<00:00, 100.63it/s, loss=1.11]


Epoch [21 / 100]: Train loss: 1.0914, Valid loss: 1.0892
current best loss is: 1.0865493701310107


Epoch [22 / 100]: 100%|██████████| 860/860 [00:08<00:00, 102.78it/s, loss=1.11]


Epoch [22 / 100]: Train loss: 1.0923, Valid loss: 1.0886
current best loss is: 1.0865493701310107


Epoch [23 / 100]: 100%|██████████| 860/860 [00:08<00:00, 100.45it/s, loss=1.1]


Epoch [23 / 100]: Train loss: 1.0930, Valid loss: 1.0899
current best loss is: 1.0865493701310107


Epoch [24 / 100]: 100%|██████████| 860/860 [00:08<00:00, 106.26it/s, loss=1.09]


Epoch [24 / 100]: Train loss: 1.0918, Valid loss: 1.0874
current best loss is: 1.0865493701310107


Epoch [25 / 100]: 100%|██████████| 860/860 [00:08<00:00, 98.20it/s, loss=1.08]


Epoch [25 / 100]: Train loss: 1.0920, Valid loss: 1.0874
current best loss is: 1.0865493701310107


Epoch [26 / 100]: 100%|██████████| 860/860 [00:08<00:00, 100.59it/s, loss=1.11]


Epoch [26 / 100]: Train loss: 1.0914, Valid loss: 1.0871
current best loss is: 1.0865493701310107


Epoch [27 / 100]: 100%|██████████| 860/860 [00:08<00:00, 102.06it/s, loss=1.08]


Epoch [27 / 100]: Train loss: 1.0910, Valid loss: 1.0866
current best loss is: 1.0865493701310107


Epoch [28 / 100]: 100%|██████████| 860/860 [00:08<00:00, 99.58it/s, loss=1.11]


Epoch [28 / 100]: Train loss: 1.0928, Valid loss: 1.0895
current best loss is: 1.0865493701310107


Epoch [29 / 100]: 100%|██████████| 860/860 [00:08<00:00, 99.68it/s, loss=1.09] 


Epoch [29 / 100]: Train loss: 1.0921, Valid loss: 1.0899
current best loss is: 1.0865493701310107


Epoch [30 / 100]: 100%|██████████| 860/860 [00:08<00:00, 97.59it/s, loss=1.09] 


Epoch [30 / 100]: Train loss: 1.0921, Valid loss: 1.0881
current best loss is: 1.0865493701310107


Epoch [31 / 100]: 100%|██████████| 860/860 [00:08<00:00, 102.71it/s, loss=1.11]


Epoch [31 / 100]: Train loss: 1.0924, Valid loss: 1.0881
current best loss is: 1.0865493701310107


Epoch [32 / 100]: 100%|██████████| 860/860 [00:08<00:00, 105.14it/s, loss=1.08]


Epoch [32 / 100]: Train loss: 1.0921, Valid loss: 1.0865
Saving model with loss 1.087...
current best loss is: 1.086528240803642


Epoch [33 / 100]: 100%|██████████| 860/860 [00:08<00:00, 98.70it/s, loss=1.07] 


Epoch [33 / 100]: Train loss: 1.0926, Valid loss: 1.0894
current best loss is: 1.086528240803642


Epoch [34 / 100]: 100%|██████████| 860/860 [00:08<00:00, 106.83it/s, loss=1.09]


Epoch [34 / 100]: Train loss: 1.0933, Valid loss: 1.0880
current best loss is: 1.086528240803642


Epoch [35 / 100]: 100%|██████████| 860/860 [00:08<00:00, 105.84it/s, loss=1.08]


Epoch [35 / 100]: Train loss: 1.0916, Valid loss: 1.0869
current best loss is: 1.086528240803642


Epoch [36 / 100]: 100%|██████████| 860/860 [00:08<00:00, 99.52it/s, loss=1.08]


Epoch [36 / 100]: Train loss: 1.0920, Valid loss: 1.0874
current best loss is: 1.086528240803642


Epoch [37 / 100]: 100%|██████████| 860/860 [00:08<00:00, 101.73it/s, loss=1.11]


Epoch [37 / 100]: Train loss: 1.0920, Valid loss: 1.0880
current best loss is: 1.086528240803642


Epoch [38 / 100]: 100%|██████████| 860/860 [00:07<00:00, 108.49it/s, loss=1.07]


Epoch [38 / 100]: Train loss: 1.0924, Valid loss: 1.0874
current best loss is: 1.086528240803642


Epoch [39 / 100]: 100%|██████████| 860/860 [00:08<00:00, 102.58it/s, loss=1.09]


Epoch [39 / 100]: Train loss: 1.0934, Valid loss: 1.0899
current best loss is: 1.086528240803642


Epoch [40 / 100]: 100%|██████████| 860/860 [00:08<00:00, 96.28it/s, loss=1.06]


Epoch [40 / 100]: Train loss: 1.0938, Valid loss: 1.0876
current best loss is: 1.086528240803642


Epoch [41 / 100]: 100%|██████████| 860/860 [00:08<00:00, 96.41it/s, loss=1.11]


Epoch [41 / 100]: Train loss: 1.0933, Valid loss: 1.0890
current best loss is: 1.086528240803642


Epoch [42 / 100]: 100%|██████████| 860/860 [00:09<00:00, 93.79it/s, loss=1.11]


Epoch [42 / 100]: Train loss: 1.0924, Valid loss: 1.0898
current best loss is: 1.086528240803642


Epoch [43 / 100]: 100%|██████████| 860/860 [00:08<00:00, 98.64it/s, loss=1.11] 


Epoch [43 / 100]: Train loss: 1.0924, Valid loss: 1.0877
current best loss is: 1.086528240803642


Epoch [44 / 100]: 100%|██████████| 860/860 [00:08<00:00, 100.95it/s, loss=1.09]


Epoch [44 / 100]: Train loss: 1.0927, Valid loss: 1.0898
current best loss is: 1.086528240803642


Epoch [45 / 100]: 100%|██████████| 860/860 [00:09<00:00, 95.16it/s, loss=1.09]


Epoch [45 / 100]: Train loss: 1.0925, Valid loss: 1.0870
current best loss is: 1.086528240803642


Epoch [46 / 100]: 100%|██████████| 860/860 [00:08<00:00, 104.38it/s, loss=1.09]


Epoch [46 / 100]: Train loss: 1.0927, Valid loss: 1.0882
current best loss is: 1.086528240803642


Epoch [47 / 100]: 100%|██████████| 860/860 [00:08<00:00, 100.65it/s, loss=1.11]


Epoch [47 / 100]: Train loss: 1.0918, Valid loss: 1.0866
current best loss is: 1.086528240803642


Epoch [48 / 100]: 100%|██████████| 860/860 [00:08<00:00, 103.09it/s, loss=1.07]


Epoch [48 / 100]: Train loss: 1.0926, Valid loss: 1.0888
current best loss is: 1.086528240803642


Epoch [49 / 100]: 100%|██████████| 860/860 [00:07<00:00, 109.81it/s, loss=1.11]


Epoch [49 / 100]: Train loss: 1.0927, Valid loss: 1.0875
current best loss is: 1.086528240803642


Epoch [50 / 100]: 100%|██████████| 860/860 [00:07<00:00, 111.05it/s, loss=1.09]


Epoch [50 / 100]: Train loss: 1.0936, Valid loss: 1.0871
current best loss is: 1.086528240803642


Epoch [51 / 100]: 100%|██████████| 860/860 [00:07<00:00, 109.81it/s, loss=1.1]


Epoch [51 / 100]: Train loss: 1.0927, Valid loss: 1.0880
current best loss is: 1.086528240803642


Epoch [52 / 100]: 100%|██████████| 860/860 [00:07<00:00, 110.35it/s, loss=1.1]


Epoch [52 / 100]: Train loss: 1.0927, Valid loss: 1.0896
current best loss is: 1.086528240803642


Epoch [53 / 100]: 100%|██████████| 860/860 [00:08<00:00, 101.95it/s, loss=1.1]


Epoch [53 / 100]: Train loss: 1.0934, Valid loss: 1.0875
current best loss is: 1.086528240803642


Epoch [54 / 100]: 100%|██████████| 860/860 [00:08<00:00, 105.51it/s, loss=1.1]


Epoch [54 / 100]: Train loss: 1.0940, Valid loss: 1.0905
current best loss is: 1.086528240803642


Epoch [55 / 100]: 100%|██████████| 860/860 [00:08<00:00, 100.87it/s, loss=1.09]


Epoch [55 / 100]: Train loss: 1.0947, Valid loss: 1.0925
current best loss is: 1.086528240803642


Epoch [56 / 100]: 100%|██████████| 860/860 [00:08<00:00, 101.69it/s, loss=1.1]


Epoch [56 / 100]: Train loss: 1.0941, Valid loss: 1.0897
current best loss is: 1.086528240803642


Epoch [57 / 100]: 100%|██████████| 860/860 [00:08<00:00, 104.09it/s, loss=1.09]


Epoch [57 / 100]: Train loss: 1.0939, Valid loss: 1.0883
current best loss is: 1.086528240803642


Epoch [58 / 100]:  78%|███████▊  | 671/860 [00:06<00:01, 110.04it/s, loss=1.09]

Epoch [58 / 100]: Train loss: 1.0928, Valid loss: 1.0873
current best loss is: 1.086528240803642


Epoch [59 / 100]: 100%|██████████| 860/860 [00:08<00:00, 106.90it/s, loss=1.08]


Epoch [59 / 100]: Train loss: 1.0932, Valid loss: 1.0879
current best loss is: 1.086528240803642


Epoch [60 / 100]: 100%|██████████| 860/860 [00:08<00:00, 98.71it/s, loss=1.07]


Epoch [60 / 100]: Train loss: 1.0922, Valid loss: 1.0859
Saving model with loss 1.086...
current best loss is: 1.0858683176987678


Epoch [61 / 100]: 100%|██████████| 860/860 [00:08<00:00, 107.06it/s, loss=1.09]


Epoch [61 / 100]: Train loss: 1.0921, Valid loss: 1.0867
current best loss is: 1.0858683176987678


Epoch [62 / 100]: 100%|██████████| 860/860 [00:08<00:00, 102.81it/s, loss=1.12]


Epoch [62 / 100]: Train loss: 1.0916, Valid loss: 1.0853
Saving model with loss 1.085...
current best loss is: 1.0853233869067467


Epoch [63 / 100]:  92%|█████████▏| 795/860 [00:07<00:00, 103.89it/s, loss=1.1] 

Epoch [63 / 100]: Train loss: 1.0917, Valid loss: 1.0863
current best loss is: 1.0853233869067467


Epoch [64 / 100]: 100%|██████████| 860/860 [00:08<00:00, 107.45it/s, loss=1.1]


Epoch [64 / 100]: Train loss: 1.0922, Valid loss: 1.0867
current best loss is: 1.0853233869067467


Epoch [65 / 100]: 100%|██████████| 860/860 [00:07<00:00, 110.30it/s, loss=1.08]


Epoch [65 / 100]: Train loss: 1.0923, Valid loss: 1.0869
current best loss is: 1.0853233869067467


Epoch [66 / 100]:  97%|█████████▋| 832/860 [00:07<00:00, 118.33it/s, loss=1.1] 

Epoch [66 / 100]: Train loss: 1.0916, Valid loss: 1.0860
current best loss is: 1.0853233869067467


Epoch [68 / 100]: 100%|██████████| 860/860 [00:08<00:00, 106.52it/s, loss=1.11]


Epoch [68 / 100]: Train loss: 1.0927, Valid loss: 1.0898
current best loss is: 1.0853233869067467


Epoch [69 / 100]: 100%|██████████| 860/860 [00:08<00:00, 104.78it/s, loss=1.1]


Epoch [69 / 100]: Train loss: 1.0939, Valid loss: 1.0896
current best loss is: 1.0853233869067467


Epoch [70 / 100]: 100%|██████████| 860/860 [00:08<00:00, 100.02it/s, loss=1.09]


Epoch [70 / 100]: Train loss: 1.0948, Valid loss: 1.0921
current best loss is: 1.0853233869067467


Epoch [71 / 100]: 100%|██████████| 860/860 [00:08<00:00, 102.82it/s, loss=1.12]


Epoch [71 / 100]: Train loss: 1.0947, Valid loss: 1.0883
current best loss is: 1.0853233869067467


Epoch [72 / 100]: 100%|██████████| 860/860 [00:08<00:00, 101.46it/s, loss=1.05]


Epoch [72 / 100]: Train loss: 1.0938, Valid loss: 1.0874
current best loss is: 1.0853233869067467


Epoch [73 / 100]: 100%|██████████| 860/860 [00:08<00:00, 100.27it/s, loss=1.09]


Epoch [73 / 100]: Train loss: 1.0932, Valid loss: 1.0874
current best loss is: 1.0853233869067467


Epoch [75 / 100]: 100%|██████████| 860/860 [00:08<00:00, 106.10it/s, loss=1.1]


Epoch [75 / 100]: Train loss: 1.0927, Valid loss: 1.0865
current best loss is: 1.0853233869067467


Epoch [76 / 100]: 100%|██████████| 860/860 [00:08<00:00, 101.58it/s, loss=1.13]


Epoch [76 / 100]: Train loss: 1.0919, Valid loss: 1.0875
current best loss is: 1.0853233869067467


Epoch [77 / 100]: 100%|██████████| 860/860 [00:08<00:00, 105.02it/s, loss=1.08]


Epoch [77 / 100]: Train loss: 1.0922, Valid loss: 1.0862
current best loss is: 1.0853233869067467


Epoch [79 / 100]: 100%|██████████| 860/860 [00:08<00:00, 103.12it/s, loss=1.1]


Epoch [79 / 100]: Train loss: 1.0922, Valid loss: 1.0857
current best loss is: 1.0853233869067467


Epoch [80 / 100]:  72%|███████▏  | 622/860 [00:06<00:02, 98.95it/s, loss=1.1]

Epoch [80 / 100]: Train loss: 1.0919, Valid loss: 1.0859
current best loss is: 1.0853233869067467


Epoch [81 / 100]: 100%|██████████| 860/860 [00:08<00:00, 100.40it/s, loss=1.08]


Epoch [81 / 100]: Train loss: 1.0911, Valid loss: 1.0865
current best loss is: 1.0853233869067467


Epoch [82 / 100]:  96%|█████████▌| 823/860 [00:08<00:00, 113.61it/s, loss=1.08]

Epoch [82 / 100]: Train loss: 1.0912, Valid loss: 1.0861
current best loss is: 1.0853233869067467


Epoch [84 / 100]: 100%|██████████| 860/860 [00:08<00:00, 102.79it/s, loss=1.05]


Epoch [84 / 100]: Train loss: 1.0909, Valid loss: 1.0847
Saving model with loss 1.085...
current best loss is: 1.0846739311666853


Epoch [85 / 100]: 100%|██████████| 860/860 [00:08<00:00, 100.65it/s, loss=1.09]


Epoch [85 / 100]: Train loss: 1.0911, Valid loss: 1.0852
current best loss is: 1.0846739311666853


Epoch [86 / 100]: 100%|██████████| 860/860 [00:08<00:00, 106.26it/s, loss=1.08]


Epoch [86 / 100]: Train loss: 1.0913, Valid loss: 1.0861
current best loss is: 1.0846739311666853


Epoch [88 / 100]: 100%|██████████| 860/860 [00:08<00:00, 103.08it/s, loss=1.09]


Epoch [88 / 100]: Train loss: 1.0914, Valid loss: 1.0862
current best loss is: 1.0846739311666853


Epoch [89 / 100]: 100%|██████████| 860/860 [00:08<00:00, 106.74it/s, loss=1.09]


Epoch [89 / 100]: Train loss: 1.0909, Valid loss: 1.0865
current best loss is: 1.0846739311666853


Epoch [90 / 100]: 100%|██████████| 860/860 [00:08<00:00, 104.45it/s, loss=1.1]


Epoch [90 / 100]: Train loss: 1.0910, Valid loss: 1.0853
current best loss is: 1.0846739311666853


Epoch [91 / 100]: 100%|██████████| 860/860 [00:08<00:00, 103.92it/s, loss=1.11]


Epoch [91 / 100]: Train loss: 1.0914, Valid loss: 1.0855
current best loss is: 1.0846739311666853


Epoch [92 / 100]: 100%|██████████| 860/860 [00:08<00:00, 103.66it/s, loss=1.09]


Epoch [92 / 100]: Train loss: 1.0914, Valid loss: 1.0862
current best loss is: 1.0846739311666853


Epoch [93 / 100]: 100%|██████████| 860/860 [00:08<00:00, 105.12it/s, loss=1.06]


Epoch [93 / 100]: Train loss: 1.0918, Valid loss: 1.0854
current best loss is: 1.0846739311666853


Epoch [95 / 100]: 100%|██████████| 860/860 [00:07<00:00, 110.17it/s, loss=1.11]


Epoch [95 / 100]: Train loss: 1.0912, Valid loss: 1.0870
current best loss is: 1.0846739311666853


Epoch [96 / 100]: 100%|██████████| 860/860 [00:08<00:00, 100.27it/s, loss=1.09]


Epoch [96 / 100]: Train loss: 1.0907, Valid loss: 1.0861
current best loss is: 1.0846739311666853


Epoch [97 / 100]: 100%|██████████| 860/860 [00:08<00:00, 102.87it/s, loss=1.04]


Epoch [97 / 100]: Train loss: 1.0920, Valid loss: 1.0863
current best loss is: 1.0846739311666853


Epoch [98 / 100]:  92%|█████████▏| 790/860 [00:07<00:00, 99.44it/s, loss=1.11]

Epoch [98 / 100]: Train loss: 1.0916, Valid loss: 1.0877
current best loss is: 1.0846739311666853


Epoch [100 / 100]:  94%|█████████▍| 809/860 [00:07<00:00, 103.54it/s, loss=1.11]

Epoch [100 / 100]: Train loss: 1.0916, Valid loss: 1.0878
current best loss is: 1.0846739311666853


1.0846739311666853

### Test Data

In [111]:
same_seed(config['seed'])
model = SimpleNN_Model(input_dim=9).to(device)
model.load_state_dict(torch.load(config['save_path']))
preds = predict(test_loader, model, device)
preds

100%|██████████| 28353/28353 [00:28<00:00, 1009.59it/s]


array([[0.958532  , 0.85899603, 0.6079183 ],
       [0.958532  , 0.85899603, 0.6079183 ],
       [0.958532  , 0.85899603, 0.6079183 ],
       ...,
       [0.7516522 , 0.68219954, 1.        ],
       [0.7724584 , 0.6944176 , 1.        ],
       [0.99996185, 0.9959053 , 1.        ]], dtype=float32)

In [112]:
y_true = np.argmax(y_testing, axis=1)
y_pred = np.argmax(preds, axis=1)

In [113]:
unique, counts = np.unique(y_true, return_counts=True)
dict(zip(unique, counts))

{0: 1217821, 1: 506479, 2: 90257}

In [114]:
unique, counts = np.unique(y_pred, return_counts=True)
dict(zip(unique, counts))

{0: 809469, 1: 530867, 2: 474221}

In [115]:
from sklearn.metrics import confusion_matrix, precision_score, f1_score, accuracy_score

In [116]:
cm = confusion_matrix(y_true, y_pred)
cm

array([[522551, 369189, 326081],
       [247805, 134408, 124266],
       [ 39113,  27270,  23874]])

In [117]:
print(f"Macro Precision score = {precision_score(y_true, y_pred, average='macro')}")
print(f"Micro Precision score = {precision_score(y_true, y_pred, average='micro')}")
print(f"Weighted Precision score = {precision_score(y_true, y_pred, average='weighted')}")
print(f"Accuracy = {accuracy_score(y_true, y_pred)}")

Macro Precision score = 0.31635910874226925
Micro Precision score = 0.375206179800359
Weighted Precision score = 0.5064260518155753
Accuracy = 0.375206179800359


In [118]:
!gdown '1-3KyI7uRkhbsuYsgCfgdjpQW_NS9qXGs' --output testing_ids.npy
testing_ids = np.load('./testing_ids.npy', allow_pickle=True)
testing_ids

Downloading...
From: https://drive.google.com/uc?id=1-3KyI7uRkhbsuYsgCfgdjpQW_NS9qXGs
To: /content/testing_ids.npy
  0% 0.00/14.5M [00:00<?, ?B/s]100% 14.5M/14.5M [00:00<00:00, 153MB/s]


array([   0.,    0.,    0., ..., 8230., 8230., 8230.])

In [119]:
df_id_behavior = pd.DataFrame(data=testing_ids, columns=['id'])
df_id_behavior.id = df_id_behavior.id.astype(int)
df_id_behavior['behavior_pred'] = y_pred
df_id_behavior['behavior_true'] = y_true
behavior_pred = df_id_behavior.groupby(['id'])['behavior_pred'].apply(list)
behavior_true = df_id_behavior.groupby(['id'])['behavior_true'].apply(list)

df_person_tag = pd.DataFrame(data=behavior_pred)
df_person_tag['behavior_true'] = behavior_true
df_person_tag.behavior_pred = df_person_tag.behavior_pred.apply(lambda x: max(x,key=x.count))
df_person_tag.behavior_true = df_person_tag.behavior_true.apply(lambda x: max(x,key=x.count))
# df_person_tag.behavior_pred = df_person_tag.behavior_pred.apply(lambda x: max(x))
# df_person_tag.behavior_true = df_person_tag.behavior_true.apply(lambda x: max(x))
df_person_tag

Unnamed: 0_level_0,behavior_pred,behavior_true
id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,1
1,0,0
2,0,1
3,0,1
4,0,0
...,...,...
8226,2,2
8227,1,0
8228,0,1
8229,2,2


In [120]:
cm = confusion_matrix(df_person_tag.behavior_true, df_person_tag.behavior_pred)
cm

array([[1448,  873,  801],
       [2053,  983,  871],
       [ 553,  293,  356]])

In [121]:
print(f"Macro Precision score = {precision_score(df_person_tag.behavior_true, df_person_tag.behavior_pred, average='macro')}")
print(f"Micro Precision score = {precision_score(df_person_tag.behavior_true, df_person_tag.behavior_pred, average='micro')}")
print(f"Weighted Precision score = {precision_score(df_person_tag.behavior_true, df_person_tag.behavior_pred, average='weighted')}")
print(f"Accuracy = {accuracy_score(df_person_tag.behavior_true, df_person_tag.behavior_pred)}")

Macro Precision score = 0.3300475195967236
Micro Precision score = 0.33859798323411494
Weighted Precision score = 0.37823593281369017
Accuracy = 0.33859798323411494
