In [1]:
import random
import os

import numpy as np
import pandas as pd
from torch import optim
import torch

import torch.nn as nn
from torch.nn.modules.container import Sequential

from tqdm import tqdm

import gc

In [2]:
device = torch.device(
    'cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [3]:
class CFG:
    dataPath = "./open/"
    trainPath = dataPath+'train.csv'
    testPath = dataPath+'test.csv'
    submission = dataPath+'sample_submission.csv'
    outPath = './output/'
    weightsavePath = './weights/'
    device = device

    batch_size = 1024
    lr = 0.0001
    epochs = 5
#     warmup_num
    decay = 0.00001
    num_depth = 300
    num_hidden_node = 512


config_data = {
    'epochs': CFG.epochs,
    'num_depth': CFG.num_depth,
    'num_hidden_node': CFG.num_hidden_node,
    'lr': CFG.lr,
    'decay': CFG.decay,
    'batch_size': CFG.batch_size
}

pbounds = {'epoch': (50, 500),
           'num_depth': (50, 400),
           'num_hidden_node': (124, 512),
           'lr': (0.00001, 0.007),
           'decay': (0.00001, 0.001),
           'batch_size': (512, 1024)
           }

In [4]:
def seedEverything(random_seed):
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
#     torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    random.seed(random_seed)
    os.environ['PYTHONHASHSEED'] = str(random_seed)
    np.random.seed(random_seed)

In [5]:
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
# from transformers import get_cosine_schedule_with_warmup
# from transformers import get_cosine_schedule_with_warmup


data_df = pd.read_csv(CFG.trainPath)
test_df = pd.read_csv(CFG.testPath)
train_df, val_df = train_test_split(data_df, test_size=0.1)

# valset_ratio = 0.15
# train_df = train_df.sample(frac=1)


def numpy2tensor(variable):
    x = variable.values
    x = np.array(x, dtype=np.float32)
    x = torch.from_numpy(x)
    return x
# TODO normalize

In [6]:
class myModel(nn.Module):
    def __init__(self):
        super(myModel, self).__init__()
        self.fc1 = nn.Linear(56, CFG.num_hidden_node)
        self.norm1 = nn.BatchNorm1d(CFG.num_hidden_node)
        self.layer1 = self.make_layers(
            CFG.num_hidden_node, num_repeat=CFG.num_depth)

        self.fc2 = nn.Linear(CFG.num_hidden_node, 1024)
        self.norm2 = nn.BatchNorm1d(1024)
        self.LeakyReLU = nn.LeakyReLU(inplace=True)
        self.fc3 = nn.Linear(1024, 14)

    def forward(self, x):
        x = self.fc1(x)
        x = self.norm1(x)
#         x = nn.BatchNorm1d(128)(x)
        x = self.LeakyReLU(x)
        x = self.layer1(x)

        x = self.fc2(x)
        x = self.norm2(x)
        x = self.LeakyReLU(x)

        x = self.fc3(x)
        x = nn.Dropout(0.3)(x)
        return x

    def make_layers(self, value, num_repeat):
        layers = []
        for _ in range(num_repeat):
            layers.append(nn.Linear(value, value))
            layers.append(nn.BatchNorm1d(value))
            layers.append(nn.LeakyReLU(inplace=True))

        return nn.Sequential(*layers)

# TODO find opt param with basyan, skip connection, warmup, shuffle, smoothing

In [15]:
# from sklearn.metrics import roc_auc_score
from sklearn.metrics import r2_score
import math


def train(model, epochs, device, criterion, optimizer, loader_train, loader_valid, scheduler=None):
    model.to(device)
    best_score = 100000
    train_x = numpy2tensor(loader_train.filter(regex='X'))
    train_y = numpy2tensor(loader_train.filter(regex='Y'))
    iter_num = int(np.ceil(len(train_x)/CFG.batch_size))
    total_loss = 0.0
    for epoch in range(epochs):
        epoch_loss = 0.0
        for i in range(iter_num):
            start = i * CFG.batch_size
            end = start + CFG.batch_size
            input = train_x[start:end].to(device, dtype=torch.float)
            label = train_y[start:end].to(device, dtype=torch.float)
            outputs = model(input)
            loss = criterion(outputs, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if scheduler is not None:
                scheduler.step()
            epoch_loss += loss.item()

        print(
            f' epoch : {epoch+1}/{epochs}, train loss : {(epoch_loss/iter_num):.4f}')
#         print(f' epoch : {epoch+1}/{epochs}, train loss : {epoch_loss}')
        loss = val(model, loader_valid, criterion, device)
        total_loss += loss
        if best_score > loss:
            best_score = loss
            torch.save(model.state_dict(), CFG.weightsavePath+'best_model.pth')
    return total_loss


def val(model, loader_valid, criterion, device):
    model.eval()
    val_loss = 0.0
    val_x = numpy2tensor(loader_valid.filter(regex='X'))
    val_y = numpy2tensor(loader_valid.filter(regex='Y'))
    iter_num = int(np.ceil(len(val_x)/CFG.batch_size))
    true_list = []
    preds_list = []
    with torch.no_grad():
        for i in range(iter_num):
            start = i * CFG.batch_size
            end = start + CFG.batch_size
            input = val_x[start:end].to(device, dtype=torch.float)
            label = val_y[start:end].to(device, dtype=torch.float)

            preds = model(input).squeeze()
            loss = criterion(preds, label)
            val_loss += loss.item()
#             print(preds.cpu().numpy())
            preds_list.extend(preds.cpu().numpy())
            true_list.extend(label.cpu().numpy())
        print(f'  val loss : {(val_loss/iter_num):.4f}')
        return r2_score()
    # todo : 함수 매게변수랑 베이지안 변수랑 동일하게,consin, 앙상블 regression모델, 피쳐인코
        return val_loss

In [16]:
seedEverything(0)

model = myModel()
# print(model)
optimizer = optim.AdamW(model.parameters(), lr=3e-4, weight_decay=0.0001)
# scheduler = get_cosine_schedule_with_warmup(optimizer,num_warmup_steps=len(train_df)*CFG.warmup_num,num_training_steps=len(train_df)*CFG.epochs)
criterion = nn.L1Loss().cuda()


# TODO enanble (ML and DL)

In [17]:
from bayes_opt import BayesianOptimization
bO = BayesianOptimization(f=train(model, CFG.epochs, CFG.device, criterion,
                          optimizer, train_df, val_df), pbounds=pbounds, verbose=2, random_state=1)
# bo.minimize(init_points=5, n_iter=20,)

#from sklearn.preprocessing import StandardScaler

# 앙상블(deep , and ml, smoothing, skip connection, normalization)

 epoch : 1/5, train loss : 17.5838
  val loss : 17.5587
 epoch : 2/5, train loss : 17.5560
  val loss : 17.5585
 epoch : 3/5, train loss : 17.5560
  val loss : 17.5585
 epoch : 4/5, train loss : 17.5561
  val loss : 17.5587
 epoch : 5/5, train loss : 17.5559
  val loss : 17.5587


In [18]:
bO.maximize(init_points=5, n_iter=20)

|   iter    |  target   | batch_... |   decay   |   epoch   |    lr     | num_depth | num_hi... |
-------------------------------------------------------------------------------------------------


TypeError: 'numpy.float64' object is not callable

In [None]:
print(bo.max)
# print(bo.min)

In [None]:
test_batch = int(np.ceil(len(test_df)/CFG.batch_size))
test_loader = numpy2tensor(test_df.filter(regex='X'))
model.eval()
preds = []
with torch.no_grad():
    for i in range(CFG.batch_size):
        start = i * CFG.batch_size
        end = start + CFG.batch_size

        input = test_loader[start:end].to(device, dtype=torch.float)
        input = input.to(device)
        outputs = model(input).squeeze()
        preds += outputs.detach().cpu().numpy().tolist()
# print(preds)

In [None]:
preds_np = np.array(preds)
preds_np.shape
print(preds_np)

In [None]:
submission_df = pd.read_csv(CFG.submission)
submission_df.iloc[:, 1:] = preds
submission_df.to_csv('./submit_rev2.csv', index=False)