# Training Code
### Contents
> 1. Preprocessing w. some EDA
> 2. Feature Selection w. Boosting Algorithm
> 3. MLP model
> 4. SGD Classifier
> 5. Final Model Selection

## Preprocessing w. EDA

### Reading Data

In [2]:
# Package Load
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
# Reading Training data from chunks
tp = pd.read_csv('data/round1_train.csv', iterator=True, chunksize=1000000)
df = pd.concat(tp, ignore_index=True)

### 불필요한 변수 제거

In [7]:
df.columns

Index(['시각', 'ADID', 'ADID 타입', 'DSP ID', '매체 ID', '애드유닛 ID', '플랫폼', 'OS 종류',
       '사이즈 ID', '노출 ID', 'SSP 입찰ID', 'DSP 입찰ID', 'AX 낙찰ID', 'WUID (웹 유저 ID)',
       '환율', '광고 응답 소재 카테고리', '광고 응답 광고주 도메인', '국가코드 ID', 'OS 버전 ID', 'P1',
       'P2', 'P3', 'P4', 'P5', 'winning'],
      dtype='object')

In [8]:
for col in df.columns[:-6]:
    print(f'Unique values of column {col} : {len(df[col].unique())}')

Unique values of column 시각 : 602696
Unique values of column ADID : 537679
Unique values of column ADID 타입 : 4
Unique values of column DSP ID : 7
Unique values of column 매체 ID : 456
Unique values of column 애드유닛 ID : 986
Unique values of column 플랫폼 : 3
Unique values of column OS 종류 : 2
Unique values of column 사이즈 ID : 3
Unique values of column 노출 ID : 8494146
Unique values of column SSP 입찰ID : 8525512
Unique values of column DSP 입찰ID : 7684881
Unique values of column AX 낙찰ID : 8525512
Unique values of column WUID (웹 유저 ID) : 198624
Unique values of column 환율 : 6
Unique values of column 광고 응답 소재 카테고리 : 143
Unique values of column 광고 응답 광고주 도메인 : 2474
Unique values of column 국가코드 ID : 33
Unique values of column OS 버전 ID : 13


#### 불필요한 열 선정 결과

> Drop cols : ADID, 노출 ID, SSP 입찰ID, DSP 입찰ID, AX 낙찰ID, WUID (웹 유저 ID), 도메인, OS 버전 ID

In [6]:
# 불필요한 열
col_notuse = ['ADID', '노출 ID', 'SSP 입찰ID', 'DSP 입찰ID', 'AX 낙찰ID', 'WUID (웹 유저 ID)', '광고 응답 광고주 도메인','OS 버전 ID']
col_use = ['시각', 'ADID 타입', 'DSP ID', '매체 ID', '애드유닛 ID', '플랫폼', 'OS 종류', '사이즈 ID',
       '환율', '광고 응답 소재 카테고리', '국가코드 ID', 'P1', 'P2', 'P3', 'P4', 'P5',
       'winning']

### ID columns to Categorical Coding

In [9]:
# Read csv with selected columns
df = pd.read_csv('data/round1_train.csv', usecols=col_use) # 34.3s
df_test = pd.read_csv('data/round1_test.csv', usecols=df.columns.drop(['P5','winning'])) # 41s

In [10]:
# DSP ID, 매체 ID, 애드유닛 ID to categorical(from ID to integer)
col_cat = ['DSP ID', '매체 ID', '애드유닛 ID']

In [11]:
from pandas.api.types import CategoricalDtype
from copy import deepcopy

for col in col_cat:
    series_whole = pd.concat([df[col],df_test[col]]).astype('category').cat.codes + 1
    df[col] = series_whole[:len(df)]
    df_test[col] = series_whole[len(df):]

In [12]:
df = df.sort_values(by=['시각','winning'])
df_test = df_test.sort_values(by=['시각'])

In [13]:
df = df.reset_index(drop=True)

### To Datetime
일시 및 초단위 시각을 직접 사용하는 것은 현재 Classification 문제에서 벗어난 Time-series analysis의 관점이므로 일부 변수로만 추출해서 진행

In [14]:
df_train = df[df.columns.drop(['P5'])] # P5 > 학습에 사용불가능한 가격변수

In [15]:
# To datetime
df_train.시각 = pd.to_datetime(df_train.시각, format='%Y%m%d%H%M%S')
df_test.시각 = pd.to_datetime(df_test.시각, format='%Y%m%d%H%M%S')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train.시각 = pd.to_datetime(df_train.시각, format='%Y%m%d%H%M%S')


In [16]:
# Variable Extraction
# 주말여부
df_train['weekend'] = df_train.시각.dt.dayofweek > 4
df_train['weekend'] = df_train['weekend'].astype('int')
df_test['weekend'] = df_test.시각.dt.dayofweek > 4
df_test['weekend'] = df_test['weekend'].astype('int')

In [17]:
## 시간대
df_train['hour'] = df_train.시각.dt.hour
df_test['hour'] = df_test.시각.dt.hour

In [18]:
## 요일
df_train['dayofweek'] = df_train.시각.dt.dayofweek
df_test['dayofweek'] = df_test.시각.dt.dayofweek

In [19]:
# Datetime 불필요 > 제거
df_train = df_train[df_train.columns.drop('시각')]
df_test = df_test[df_test.columns.drop('시각')]

### Price Variable Treatment

#### Correlation

In [20]:
col_price = ['P1','P2','P3','P4']

In [21]:
df_train.loc[df_train.winning==1.0, col_price].corr() # 가격변수간 상관계수 탐색(낙찰)

Unnamed: 0,P1,P2,P3,P4
P1,1.0,0.576665,0.636191,0.999999
P2,0.576665,1.0,0.946439,0.576608
P3,0.636191,0.946439,1.0,0.636141
P4,0.999999,0.576608,0.636141,1.0


In [22]:
df_train.loc[df_train.winning==0.0, col_price].corr() # 가격변수간 상관계수 탐색(유찰)

Unnamed: 0,P1,P2,P3,P4
P1,1.0,0.709176,0.710066,0.999561
P2,0.709176,1.0,0.996453,0.719102
P3,0.710066,0.996453,1.0,0.719804
P4,0.999561,0.719102,0.719804,1.0


#### Visualization of Variable Distribution

In [None]:
# Data Split for Plotting
df_win = df_train[df_train.winning==1.0]
df_lose = df_train[df_train.winning==0.0]

In [None]:
# Normalization
from copy import deepcopy

df_norm = deepcopy(df_train)

for col in col_price:
    df_norm[col] = (df_norm[col]-df_norm[col].mean())/df_norm[col].std()

##### Normalized(not-transformed) Plot

In [None]:
# Mean-Normalized
fig, axes = plt.subplots(2,2,figsize=(12,12))
for idx, col in enumerate(col_price):
    sns.kdeplot(df_norm_win[col], label='Win', ax=axes[idx//2, idx%2])
    sns.kdeplot(df_norm_lose[col], label='Lose', ax=axes[idx//2, idx%2])
    axes[idx//2,idx%2].legend()
    axes[idx//2,idx%2].set_title(f'KDE Plot for {col}')
plt.suptitle('Mean-Normalized Price Variables', fontsize=15)
plt.show() # 32.3s

##### Log-Transformation
기존 분포에서는 낙찰/유찰 데이터 간의 가격변수 분포가 구분되지 않음

In [None]:
fig, axes = plt.subplots(2,2,figsize=(12,12))
for idx, col in enumerate(col_price):
    sns.kdeplot(np.log1p(df_win[col]), label='Win', ax=axes[idx//2, idx%2])
    sns.kdeplot(np.log1p(df_lose[col]), label='Lose', ax=axes[idx//2, idx%2])
    axes[idx//2,idx%2].legend()
    axes[idx//2,idx%2].set_title(f'KDE Plot for {col}')
plt.suptitle('Log-Transformed Price Variables', fontsize=15)
plt.show()
plt.savefig('logtransfomred.png', transparent=False)

##### 최종 : P1-P4 데이터에 대해 Logarithm-Tranformation

In [23]:
for col in col_price:
    df_train[col] = np.log1p(df_train[col])
    df_test[col] = np.log1p(df_test[col])

### Data Type / Variable Name Setting

In [24]:
df_train = df_train[df_train.columns.drop(['winning']).tolist() + ['winning']] # label 마지막으로

In [25]:
colnames = ['ADID_type', 'DSP_ID', 'Media_ID', 'Adunit_ID', 'Platform', 'OS_type', 'Size_ID',
       'Ex_Rate', 'Category', 'Country_ID', 'P1', 'P2', 'P3', 'P4', 'weekend',
       'hour', 'dayofweek']

In [26]:
df_train.columns = colnames + ['Class']
df_test.columns = colnames

In [27]:
col_int = ['ADID_type', 'DSP_ID', 'Media_ID', 'Adunit_ID', 'Platform', 'OS_type', 'Size_ID',
           'Country_ID', 'weekend', 'hour', 'dayofweek'] # integer type columns

In [28]:
# Check NA
df_train.isna().sum()

ADID_type           0
DSP_ID              0
Media_ID            0
Adunit_ID           0
Platform            0
OS_type             0
Size_ID             0
Ex_Rate             0
Category      5732131
Country_ID       5536
P1                  0
P2                  0
P3                  0
P4                  0
weekend             0
hour                0
dayofweek           0
Class               0
dtype: int64

In [29]:
# Missing value at Country_ID > treat with Mode
df_train['Country_ID'] = df_train.Country_ID.fillna(df_train.Country_ID.mode()[0])
df_test['Country_ID'] = df_test.Country_ID.fillna(df_test.Country_ID.mode()[0])

In [30]:
# Set integer columns
df_train[col_int] = df_train[col_int].astype('int64')
df_test[col_int] = df_test[col_int].astype('int64')

### Category Treatment
- 모든 서브카테고리까지 포함하기에는 너무 많은 variable 생성됨</br>
- 특정 Main Category(0~26, 0:NA)에 속하는 여부만 파악
> 동시에 여러 카테고리에 속할 경우, 가장 많은 Subcategory를 가진 main category 선택 \\
> 같은 개수의 subcategory일 경우, 가장 앞에 위치한 main category 선택

In [31]:
# Category fillna with str NA
df_train.Category = df_train.Category.fillna('NA')
df_test.Category = df_test.Category.fillna('NA')

In [32]:
# Category treatment
from tqdm import tqdm
import math

cat_train = []
cat_test = []

for i in tqdm(range(len(df_train))):
    cat_train.append(df_train.Category[i].split('%2C')) # 70s

print(len(cat_train))

for i in tqdm(range(len(df_test))):
    cat_test.append(df_test.Category[i].split('%2C'))

print(len(cat_test))

100%|██████████| 8525512/8525512 [00:56<00:00, 151245.21it/s]


8525512


100%|██████████| 624142/624142 [00:05<00:00, 122563.16it/s]

624142





In [33]:
from collections import Counter

cat_train_treated = [] # for train
for ls in tqdm(cat_train):
    ls_treated = []
    for item in ls:
        if item == 'NA':
            ls_treated.append(0)
        else:
            if item.find('-') == -1:
                ls_treated.append(int(item[3:]))
            else:
                ls_treated.append(int(item[3:item.find('-')]))
    
    if len(ls_treated) == 1:
        cat_train_treated.append(ls_treated[0])
    else:
        cat_train_treated.append(Counter(ls_treated).most_common()[0][0])

print(len(cat_train_treated))

cat_test_treated = [] # for test
for ls in tqdm(cat_test):
    ls_treated = []
    for item in ls:
        if item == 'NA':
            ls_treated.append(0)
        else:
            if item.find('-') == -1:
                ls_treated.append(int(item[3:]))
            else:
                ls_treated.append(int(item[3:item.find('-')]))
    
    if len(ls_treated) == 1:
        cat_test_treated.append(ls_treated[0])
    else:
        cat_test_treated.append(Counter(ls_treated).most_common()[0][0])

print(len(cat_test_treated))

100%|██████████| 8525512/8525512 [00:08<00:00, 1053198.46it/s]


8525512


100%|██████████| 624142/624142 [00:00<00:00, 909325.66it/s]

624142





In [34]:
# Merge
df_train['Category'] = cat_train_treated
df_test['Category'] = cat_test_treated

### 기타 변수 처리

In [35]:
from copy import deepcopy
df_train_ID_treated = deepcopy(df_train)
df_test_ID_treated = deepcopy(df_test)

#### 1. Media_ID
 - Distinct value : 456
 - 최빈값(ID=152)가 전체 데이터의 53.2% at Train
 - 152(=1), 213(=2), 그 외 나머지 값(=3)으로 분류

In [36]:
ls_media_id = []

for val in tqdm(df_train_ID_treated.Media_ID):
    if val == 152:
        ls_media_id.append(1)
    elif val == 213:
        ls_media_id.append(2)
    else:
        ls_media_id.append(3)

df_train_ID_treated.Media_ID = ls_media_id
df_train_ID_treated.head(1)

100%|██████████| 8525512/8525512 [00:02<00:00, 2878781.77it/s]


Unnamed: 0,ADID_type,DSP_ID,Media_ID,Adunit_ID,Platform,OS_type,Size_ID,Ex_Rate,Category,Country_ID,P1,P2,P3,P4,weekend,hour,dayofweek,Class
0,1,7,3,296,1,1,3,1218.0,22,1012,4.193142,3.918581,3.625407,3.842917,1,0,6,0


In [37]:
ls_media_id = []

for val in tqdm(df_test_ID_treated.Media_ID):
    if val == 152:
        ls_media_id.append(1)
    elif val == 213:
        ls_media_id.append(2)
    else:
        ls_media_id.append(3)

df_test_ID_treated.Media_ID = ls_media_id
df_test_ID_treated.head(1)

100%|██████████| 624142/624142 [00:00<00:00, 2722941.38it/s]


Unnamed: 0,ADID_type,DSP_ID,Media_ID,Adunit_ID,Platform,OS_type,Size_ID,Ex_Rate,Category,Country_ID,P1,P2,P3,P4,weekend,hour,dayofweek
0,1,6,1,919,1,1,1,1228.0,0,1012,4.740288,3.926596,3.633367,4.23527,1,0,6


##### 2. Adunit_ID
 - Distinct value : 986
 - 최빈값(ID=919)가 전체 데이터의 53.2% at Train
 - 919(=1), 263(11.5% = 2), 그 외 나머지 값(=3)으로 분류

In [38]:
ls_adunit_id = []

for val in tqdm(df_train_ID_treated.Adunit_ID):
    if val == 919:
        ls_adunit_id.append(1)
    elif val == 263:
        ls_adunit_id.append(2)
    else:
        ls_adunit_id.append(3)

df_train_ID_treated.Adunit_ID = ls_adunit_id
df_train_ID_treated.head(1)

100%|██████████| 8525512/8525512 [00:02<00:00, 2973635.56it/s]


Unnamed: 0,ADID_type,DSP_ID,Media_ID,Adunit_ID,Platform,OS_type,Size_ID,Ex_Rate,Category,Country_ID,P1,P2,P3,P4,weekend,hour,dayofweek,Class
0,1,7,3,3,1,1,3,1218.0,22,1012,4.193142,3.918581,3.625407,3.842917,1,0,6,0


In [39]:
ls_adunit_id = []

for val in tqdm(df_test_ID_treated.Adunit_ID):
    if val == 919:
        ls_adunit_id.append(1)
    elif val == 263:
        ls_adunit_id.append(2)
    else:
        ls_adunit_id.append(3)

df_test_ID_treated.Adunit_ID = ls_adunit_id
df_test_ID_treated.head(1)

100%|██████████| 624142/624142 [00:00<00:00, 2617197.46it/s]


Unnamed: 0,ADID_type,DSP_ID,Media_ID,Adunit_ID,Platform,OS_type,Size_ID,Ex_Rate,Category,Country_ID,P1,P2,P3,P4,weekend,hour,dayofweek
0,1,6,1,1,1,1,1,1228.0,0,1012,4.740288,3.926596,3.633367,4.23527,1,0,6


### Preprocessed Result

In [40]:
df_train.to_pickle('data/train.pkl')
df_test.to_pickle('data/test.pkl')
df_train_ID_treated.to_pickle('data/train_ID_treated')
df_test_ID_treated.to_pickle('data/test_ID_treated')

## Feature Selection w. Boosting
CatBoost를 이용하여 Fitting 후(ID treatment 없는 데이터로), feature importance 이용 변수선택

### Dataset Setting

In [4]:
# Load Packages and data
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier

df_train = pd.read_pickle('data/train.pkl')

In [None]:
# Splitting Dataset

X = df_train.drop('Class', axis=1)
y = df_train['Class']

num_features = ['Ex_Rate','P1','P2','P3','P4']
cat_features = X.columns.drop(num_features).tolist()
cat_idx = [idx for idx, val in enumerate(X.columns) if val in cat_features]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.4, random_state = 123, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size = 0.5, random_state =123, stratify=y_val)

### GridSearch Hyperparameter Setting

In [None]:
# Model Setting

clf_grid = CatBoostClassifier(
    iterations=1000, 
    random_seed=123, 
    task_type='GPU',
    loss_function = 'Logloss',
    verbose=100,
    one_hot_max_size=3
)

In [None]:
# grid
grid = {
      'learning_rate' : [0.01,0.1],
      'depth' : [4, 6, 8],
      'l2_leaf_reg' : [1,3,5]
}

In [None]:
# Tuning
grid_search_result = clf_grid.grid_search(grid, X=X_train, y=y_train)

#### gridsearch result
{'depth': 8, 'l2_leaf_reg': 5, 'learning_rate': 0.1}

In [None]:
# Result
grid_search_result['params']

### Fitting and Feature Importance

In [None]:
# Fit!
clf_grid.fit(X_train, y_train,
    cat_features=cat_idx,
    verbose=100,
    eval_set = (X_val, y_val))

In [None]:
# Prediction Score
y_pred = clf_grid.predict(X_test, prediction_type='Class')

from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred, target_names = ['Lose', 'Win']))

In [None]:
# Feature Importance
feature_importance = pd.Series(clf_grid.feature_importances_, index=X_train.columns)
feature_importance

### Selection Result
Feature Importance가 상대적으로 낮은(0.1보다 낮은) 변수 제거
> Platform      0.053507</br>
> OS_type       0.000000</br>
> Ex_Rate       0.025759</br>
> Country_ID    0.099139</br>
> weekend       0.059900

In [None]:
col_drop = feature_importance[feature_importance<0.1] 
col_drop 

## MLP Modeling

### Setting

In [None]:
# Package Load
import random
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
import torch.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn import set_config

set_config(display='diagram')

In [None]:
# Feature Selection from Boosting Result

X = X[X.columns.drop(col_drop.index)]

# Variable Features
num_features = ['P1','P2','P3','P4']
cat_features = X.columns.drop(num_features).tolist()

### Preprocessing Pipeline
과도한 더미변수 생성을 방지하기 위해 두 ID 변수를 전처리한(3-type coding) 데이터셋(`train_ID_treated.pkl`) 사용

In [None]:
# Transformer
numeric_transformer = StandardScaler()
cat_transformer = OneHotEncoder(drop='first', sparse=False)

# Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, num_features),
        ("cat", cat_transformer, cat_features)
    ]
)

In [None]:
# Train/Val/Test Split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, stratify=y)

In [None]:
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size = 0.4, stratify=y_val)

In [None]:
# Fit_transform
X_train = preprocessor.fit_transform(X_train)
X_val = preprocessor.fit_transform(X_val)
X_test = preprocessor.fit_transform(X_test) # 15s

### Modeling

#### Setting

In [None]:
# Seed setting

def def_seed(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

def_seed(seed=123)

In [None]:
# Device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# device = torch.device('cpu')
print(device)

#### Dataset

In [None]:
class MyDataset(Dataset):
    def __init__(self, X, y):
        self.df = X
        self.labels = y.values
    
    def __getitem__(self, index):
        self.x = self.df[index]
        self.y = self.labels[index]
        return torch.Tensor(self.x), self.y

    def __len__(self):
        return len(self.df)

#### MLP Model

In [None]:
# Input dim
input_shape = X_train.shape[1]
print(input_shape)

In [None]:
# Classifier Model

class Classifier(nn.Module):
    def __init__(self, input_shape):
        super(Classifier, self).__init__()
        self.layer = nn.Sequential(
            nn.Linear(input_shape,64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Linear(64, 30),
            nn.BatchNorm1d(30),
            nn.Dropout1d(0.1),
            nn.Linear(30, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.layer(x)
        return x

# Initialize Net
model = Classifier(input_shape = input_shape)
print(model)

#### EarlyStopper

In [None]:
class EarlyStopping:
    """주어진 patience 이후로 validation loss가 개선되지 않으면 학습을 조기 중지"""
    def __init__(self, patience=10, verbose=False, delta=0, path='checkpoint.pt'):
        """
        Args:
            patience (int): validation loss가 개선된 후 기다리는 기간
                            Default: 7
            verbose (bool): True일 경우 각 validation loss의 개선 사항 메세지 출력
                            Default: False
            delta (float): 개선되었다고 인정되는 monitered quantity의 최소 변화
                            Default: 0
            path (str): checkpoint저장 경로
                            Default: 'checkpoint.pt'
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''validation loss가 감소하면 모델을 저장한다.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

#### Trainer

In [None]:
# Model Trainer
def train_model(model, batch_size, patience, epochs, device):
    # Logging
    train_losses = []
    valid_losses = []
    avg_train_losses = []
    avg_valid_losses = []

    # earlystopping
    early_stopping = EarlyStopping(patience=patience, verbose=True)

    model.to(device)
    # Trainer
    for epoch in range(1, epochs + 1):
        model.train()
        for batch, (data, target) in enumerate(train_loader, 1):
            data = data.to(device)
            target = target.float().to(device)
            # clear gradients of all optimized variables
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target.view(-1,1))
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        
        # validation
        model.eval()
        for data, target in val_loader:
            data = data.to(device)
            target = target.float().to(device)
            output = model(data)
            loss = criterion(output, target.view(-1,1))
            valid_losses.append(loss.item())
        
        # loss per epoch
        train_loss = np.average(train_losses)
        valid_loss = np.average(valid_losses)
        avg_train_losses.append(train_loss)
        avg_valid_losses.append(valid_loss)

        epoch_len = len(str(epochs))

        print_msg = (f'[{epoch:>{epoch_len}}/{epochs:>{epoch_len}}] ' +
                     f'train_loss: {train_loss:.5f} ' +
                     f'valid_loss: {valid_loss:.5f} ')
        
        print(print_msg)

        # clear epoch list
        train_losses = []
        valid_losses = []

        early_stopping(valid_loss, model)

        if early_stopping.early_stop:
            print("Early Stopping")
            break
        
    model.load_state_dict(torch.load('checkpoint.pt'))

    return model, avg_train_losses, avg_valid_losses

### Train

In [None]:
# Hyperparameters
epochs = 100
lr = 0.01
batch_size = 8192
patience = 5 # earlystopping criteria

# Criterion and Optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr)

In [None]:
# To DataLoader
train = MyDataset(X_train, y_train)
val = MyDataset(X_val, y_val)
test = MyDataset(X_test, y_test)

train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test, shuffle=False)

In [None]:
# Train!
model, train_loss, valid_loss = train_model(model, batch_size, patience, epochs, device)

### Test

In [None]:
import itertools
from tqdm import tqdm

y_pred_list = []

# for prediction
model.eval()

with torch.no_grad():
    for data, target in tqdm(test_loader):
        data = data.to(device)
        y_test_pred = model(data)
        y_pred_tag = torch.round(y_test_pred).to('cpu')
        y_pred_list.append(y_pred_tag.detach().numpy())

y_pred_list = [i.squeeze().tolist() for i in y_pred_list]

In [None]:
# Prediction Score
y_true_test = y_test.values.ravel()

from sklearn.metrics import classification_report
print(classification_report(y_true_test, y_pred_list, target_names = ['Lose', 'Win']))

### Save Model