In [1]:
import sys
sys.path.append('/Volumes/KHJ/Github/hyuckjinkim/lib-python/torch')
from torch_seed import seed_everything

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader, TensorDataset

print('> torch version  :',torch.__version__)
print('> cuda available :',torch.cuda.is_available())

> torch version  : 1.13.1
> cuda available : False


In [3]:
# https://jimmy-ai.tistory.com/342
# https://github.com/Bjarten/early-stopping-pytorch/blob/master/pytorchtools.py
import numpy as np
import torch
import torch.nn as nn
import time

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path=None, trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
        
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            if self.path is not None:
                torch.save(model.state_dict(), self.path)
                save_message = 'Saving model ...'
            else:
                save_message = ''
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). {save_message}')
        self.val_loss_min = val_loss

# # https://github.com/pytorch/pytorch/issues/21987
# def nanmean(v, *args, inplace=False, **kwargs):
#     if not inplace:
#         v = v.clone()
#     is_nan = torch.isnan(v)
#     v[is_nan] = 0
#     return v.sum(*args, **kwargs) / (~is_nan).float().sum(*args, **kwargs)

# def seq2list_cuda(seq,device):
#     nan_value = -99999
#     ret_seq = []
#     k=0
#     N=len(seq)
#     for x in seq:
#         start_seq = torch.tensor([nan_value]*k).to(device).float()
#         end_seq   = torch.tensor([nan_value]*(N-k-1)).to(device).float()
#         x = x.to(device)
        
#         if len(start_seq)==0:
#             _seq  = torch.cat([x,end_seq],axis=0)
#         elif len(end_seq)==0:
#             _seq  = torch.cat([start_seq,x],axis=0)
#         else:
#             _seq  = torch.cat([start_seq,x,end_seq],axis=0)
            
#         _seq[_seq==nan_value] = float('nan')
#         ret_seq.append(_seq)
#         k+=1
#     ret_seq = torch.stack(ret_seq,dim=0)
#     #print('(1)',ret_seq)
#     ret_seq = nanmean(ret_seq,dim=0)
#     #print('(2)',ret_seq)
#     return ret_seq
        
def train(
    model, optimizer, train_loader, valid_loader, epochs, criterion,
    early_stopping=None, device='cpu', scheduler=None, metric_period=1, 
    verbose=True, print_shape=False, save_model_path = './mc/best_model.pt',
    transform_y='identity',
):
    assert transform_y in ['identity','log','sqrt'], \
        "transform_y must be one of ['identity','log','sqrt']"
    is_early_stopping = False if early_stopping is None else True
    
    model.to(device)

    # great is better
    best_loss  = np.inf
    best_epoch = 1
    best_model = None
    is_best    = np.nan
    
    start_time = time.time()
    epoch_s = time.time()
    for epoch in range(1, epochs+1):
        
        model.train()
        train_loss = []
        for X, Y in iter(train_loader):

            X = X.float().to(device)
            Y = Y.float().to(device)

            optimizer.zero_grad()
            output = model(X).float()
            
            #Y = seq2list_cuda(Y,device)
            #output = seq2list_cuda(output,device)
            
            if transform_y=='log':
                output = torch.exp(output)
                Y      = torch.exp(Y)
            elif transform_y=='sqrt':
                output = output**2
                Y      = Y**2
                
            if print_shape:
                    if epoch==1:
                        print(output.shape,Y.shape) # torch.Size([16, 1]) torch.Size([16, 1])
                        print(output[:2],Y[:2])
            
            loss = criterion(output, Y)
            #loss = torch.sqrt(loss) # MSE -> RMSE
            
            loss.backward() # Getting gradients
            optimizer.step() # Updating parameters

            train_loss.append(loss.item())

        valid_loss = validation(model, valid_loader, criterion, device, transform_y)

        epoch_e = time.time()
            
        if scheduler is not None:
            scheduler.step(valid_loss)

        # update the best epoch & best loss
        if (best_loss > valid_loss) | (epoch==1):
            best_epoch = epoch
            best_loss = valid_loss
            best_model = model
            is_best = 1
            torch.save(best_model.state_dict(), save_model_path)
        else:
            is_best = 0
            
        # 결과물 printing
        if (verbose) & (epoch % metric_period == 0):
            mark = '*' if is_best else ' '
            epoch_str = str(epoch).zfill(len(str(epochs)))
            progress = '{}[{}/{}] tr_loss: {:.5f}, val_loss: {:.5f}, best_epoch: {}, elapsed: {:.2f}s, total: {:.2f}s, remaining: {:.2f}s'\
                .format(
                    mark,
                    epoch_str,
                    epochs,
                    np.mean(train_loss),
                    valid_loss,
                    best_epoch,
                    epoch_e-epoch_s,
                    epoch_e-start_time,
                    (epoch_e-epoch_s)*(epochs-epoch)/metric_period,
                )
            epoch_s = time.time()
            print(progress)

        # early stopping 여부를 체크. 현재 과적합 상황 추적
        if is_early_stopping:
            early_stopping(valid_loss, model)
            if early_stopping.early_stop:
                break

    return best_model

def validation(model, valid_loader, criterion, device, transform_y):
    valid_loss = []
    with torch.no_grad():
        for X, Y in iter(valid_loader):
            X = X.float().to(device)
            Y = Y.float().to(device)
            
            output = model(X).float()
            
            #Y = seq2list_cuda(Y,device)
            #output = seq2list_cuda(output,device)
            
            if transform_y=='log':
                output = torch.exp(output)
                Y      = torch.exp(Y)
            elif transform_y=='sqrt':
                output = output**2
                Y      = Y**2
            
            loss = criterion(output, Y)
            valid_loss.append(loss.item())

    return np.mean(valid_loss)

def predict(best_model,loader,device,transform_y):
    best_model.to(device)
    
    true_list = []
    pred_list = []
    with torch.no_grad():
        for data,label in iter(loader):
            data = data.float().to(device)

            output = best_model(data).cpu().numpy().tolist()
            label  = label.cpu().numpy().tolist()

            if transform_y=='log':
                output = np.exp(output).tolist()
                label  = np.exp(label).tolist()
            elif transform_y=='sqrt':
                output = np.square(output).tolist()
                label  = np.square(label).tolist()

            true_list += label
            pred_list += output

    return true_list, pred_list

In [4]:
import pandas as pd
import numpy as np

In [22]:
class CFG:
    SEED = 42
    TARGET = 'ECLO'
    BATCH_SIZE = 32
    NUM_WORKERS = 0

In [23]:
train_df = pd.read_parquet('./out/train_data_identity.parquet')
test_df  = pd.read_parquet('./out/test_data_identity.parquet')

In [24]:
X = train_df.drop(CFG.TARGET,axis=1)
y = train_df[CFG.TARGET]
X_test = test_df.copy()

unique_info = X.nunique()
unique_cols = unique_info[unique_info==1].index.tolist()

if len(unique_cols)>0:
    X     .drop(unique_cols,axis=1,inplace=True)
    X_test.drop(unique_cols,axis=1,inplace=True)
    print(f'delete unique columns: {len(unique_cols)}\ndetail: {unique_cols}')

In [25]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X,y,test_size=0.2,random_state=CFG.SEED,shuffle=True)

In [26]:
train_dataset = TensorDataset(
    torch.tensor(X_train.values, dtype=torch.float32),
    torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1),
)
val_dataset = TensorDataset(
    torch.tensor(X_val.values, dtype=torch.float32),
    torch.tensor(y_val.values, dtype=torch.float32).unsqueeze(1),
)

train_loader  = DataLoader(train_dataset, batch_size=CFG.BATCH_SIZE, shuffle=False, num_workers=CFG.NUM_WORKERS)
val_loader    = DataLoader(val_dataset  , batch_size=CFG.BATCH_SIZE, shuffle=False, num_workers=CFG.NUM_WORKERS)

In [27]:
class DNN(nn.Module):
    def __init__(self,input_size,output_size,hidden_sizes,dropout_rate):
        super(DNN,self).__init__()
        self.hidden_layers = nn.ModuleList([nn.Linear(input_size, hidden_sizes[0])])
        self.hidden_layers.extend([nn.Linear(hidden_sizes[i], hidden_sizes[i+1]) for i in range(len(hidden_sizes)-1)])
        self.output = nn.Linear(hidden_sizes[-1], output_size)
        self.activation = nn.GELU()
        self.bn = nn.ModuleList([nn.BatchNorm1d(hidden_sizes[i]) for i in range(len(hidden_sizes))])
        self.dropout = nn.Dropout(dropout_rate)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        for linear in self.hidden_layers:
            x = linear(x)
            #x = self.bn[i](x)
            x = self.activation(x)
            #x = self.dropout(x)
        x = self.output(x)
        x = self.relu(x)
        return x

In [35]:
class MLP(nn.Module):
    def __init__(self,input_size,output_size,hidden_size,dropout_rate):
        super(MLP,self).__init__()
        self.bn = nn.BatchNorm1d(hidden_size)
        self.activation = nn.GELU()
        self.dropout = nn.Dropout(dropout_rate)
        self.relu = nn.ReLU()
        self.layer = nn.Sequential(
            nn.Linear(input_size , hidden_size), self.activation, #self.dropout,
            nn.Linear(hidden_size, hidden_size), self.activation, #self.dropout,
            nn.Linear(hidden_size, hidden_size), self.activation, #self.dropout,
        )
        self.fc = nn.Linear(hidden_size, output_size)
        self._reinitialize()
        
    def _reinitialize(self):
        """
        Tensorflow/Keras-like initialization
        """
        for name, p in self.named_parameters():
            if 'lstm' in name:
                if 'weight_ih' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'weight_hh' in name:
                    nn.init.orthogonal_(p.data)
                elif 'bias_ih' in name:
                    p.data.fill_(0)
                    # Set forget-gate bias to 1
                    n = p.size(0)
                    p.data[(n // 4):(n // 2)].fill_(1)
                elif 'bias_hh' in name:
                    p.data.fill_(0)
            elif 'fc' in name:
                if 'weight' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'bias' in name:
                    p.data.fill_(0)
        
    def forward(self, x):
        x = self.layer(x)
        x = self.fc(x)
        x = self.relu(x)
        return x

In [36]:
class RMSLELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self, pred, actual):
        pred, actual = F.relu(pred), F.relu(actual)
        return torch.sqrt(self.mse(torch.log(pred + 1), torch.log(actual + 1)))

In [37]:
device = 'cpu'
epochs = 256
lr = 1e-3
weight_decay = 5e-4

In [38]:
input_size = X_train.shape[1]
output_size = 1
hidden_sizes = [128,256,128,64,32]
dropout_rate = 0.2

# model = DNN(input_size,output_size,hidden_sizes,dropout_rate)
model = MLP(input_size,output_size,hidden_sizes[0],dropout_rate)

In [39]:
# x1 = [x for x,y in train_loader][0]
# y1 = [y for x,y in train_loader][0]
# yhat = model(x1).float()
# criterion(yhat,y1)

In [40]:
seed_everything(CFG.SEED)
torch.cuda.empty_cache()

In [41]:
model.eval()
criterion = RMSLELoss().to(device)
# criterion = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
# optimizer = torch.optim.SGD(params = model.parameters(), lr = 1e-2, momentum=0.9)
scheduler = None
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, threshold_mode='abs',min_lr=1e-7, verbose=True)
early_stopping = EarlyStopping(patience=epochs//10,verbose=False,path=None)
# early_stopping = None

best_model = train(
    model, optimizer, train_loader, val_loader, epochs, criterion,
    early_stopping, device, scheduler,
    metric_period=1,
    verbose=True,
    print_shape=False,
    save_model_path = './mc/best_model.pt',
    transform_y='identity',
)

*[001/256] tr_loss: 1.69377, val_loss: 1.68972, best_epoch: 1, elapsed: 1.00s, total: 1.00s, remaining: 255.80s
 [002/256] tr_loss: 1.69380, val_loss: 1.68972, best_epoch: 1, elapsed: 0.91s, total: 1.91s, remaining: 230.41s
 [003/256] tr_loss: 1.69380, val_loss: 1.68972, best_epoch: 1, elapsed: 0.91s, total: 2.83s, remaining: 231.48s
 [004/256] tr_loss: 1.69380, val_loss: 1.68972, best_epoch: 1, elapsed: 0.90s, total: 3.73s, remaining: 225.82s
 [005/256] tr_loss: 1.69380, val_loss: 1.68972, best_epoch: 1, elapsed: 0.89s, total: 4.62s, remaining: 223.99s
*[006/256] tr_loss: 1.39040, val_loss: 1.24232, best_epoch: 6, elapsed: 0.96s, total: 5.58s, remaining: 240.82s
*[007/256] tr_loss: 1.00300, val_loss: 0.47728, best_epoch: 7, elapsed: 1.09s, total: 6.67s, remaining: 270.30s
*[008/256] tr_loss: 0.45298, val_loss: 0.44632, best_epoch: 8, elapsed: 1.06s, total: 7.74s, remaining: 264.06s
*[009/256] tr_loss: 0.45155, val_loss: 0.44586, best_epoch: 9, elapsed: 1.07s, total: 8.81s, remaining: 

In [None]:
best_model = MLP(input_size,output_size,hidden_sizes[0],dropout_rate)
best_model.load_state_dict(torch.load('./mc/best_model.pt'))

In [None]:
true,pred = predict(best_model,val_loader,device,'identity')
RMSLELoss()(torch.tensor(true),torch.tensor(pred))

In [None]:
torch.tensor(pred)