# 데이터 로드 

In [None]:
!cp /content/drive/MyDrive/kaggle.json ./


!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json
!mkdir data 
%cd data 

!kaggle competitions download -c tabular-playground-series-apr-2022
!unzip tabular-playground-series-apr-2022.zip 

/content/data
Downloading tabular-playground-series-apr-2022.zip to /content/data
 94% 161M/171M [00:06<00:00, 28.3MB/s]
100% 171M/171M [00:06<00:00, 26.6MB/s]
Archive:  tabular-playground-series-apr-2022.zip
  inflating: sample_submission.csv   
  inflating: test.csv                
  inflating: train.csv               
  inflating: train_labels.csv        


In [None]:
from glob import glob 
from tqdm import tqdm 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns 
import os 

In [None]:
%cd data
file_dirs = glob('*.csv')

df = []
for dir in file_dirs:
  exec(f"{dir.split('.')[0]}=pd.read_csv('{dir}')")
  df.append(dir.split('.')[0])

# 전처리 

In [None]:
class preprocess():
  def __init__(self,input_df,test_df):
    self.input_df = self.init_pre(input_df)
    self.train_df, self.valid_df = self.train_valid_split(self.input_df)
    self.minmax_value = self.make_minmax(self.train_df)
    self.test_df = self.init_pre(test_df)

  #기본 전처리 모듈
  def init_pre(self,input_df:pd.DataFrame):
    input_df = input_df.drop(columns = ['sequence','subject','step'])
    input_df = np.array(input_df).reshape(-1,60,13)
    return input_df 
  
  def train_valid_split(self,input_df):
    id = int(len(input_df)*0.1)
    train_df = input_df[:id*9]
    valid_df = input_df[id*9:]
    return train_df, valid_df 
  
  def make_minmax(self,input_df):
    # min_value = np.min(input_df,axis=0)
    # max_value = np.max(input_df,axis=0)
    min_value = np.percentile(input_df,25)
    max_value = np.percentile(input_df,75)
    scale_value = {}
    scale_value['min'] = min_value
    scale_value['max'] = max_value
    return scale_value
  
  def scaler(self,input_df):
    train_min_value = self.minmax_value['min']
    train_max_value = self.minmax_value['max']
    return_value = (input_df-train_min_value)/(train_max_value-train_min_value)
    return_value = return_value.astype(np.float16)
    np.random.shuffle(return_value)
    return return_value 

  # return  
  def __call__(self):
    self.train_df = self.scaler(self.train_df)
    self.valid_df = self.scaler(self.valid_df)
    self.test_df = self.scaler(self.test_df)
    return self.train_df,self.valid_df,self.test_df

def make_labels(labels):
  id = int(len(labels)*0.1)

  labels = np.array(labels.drop(columns=['sequence']))
  train_y = labels[:id*9]
  valid_y = labels[id*9:]
  return train_y, valid_y 

train_df,valid_df,test_df = preprocess(train,test)()
train_y, valid_y = make_labels(train_labels)

# 모델 

In [None]:
!pip install tensorflow-addons
from tensorflow import keras
from tensorflow.keras import layers 
from tensorflow.keras.layers import Input,Dense,LSTM,Bidirectional,Conv2D
from tensorflow.keras import optimizers
import tensorflow_addons as tfons

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-addons
  Downloading tensorflow_addons-0.17.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 34.4 MB/s 
Installing collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.17.1


## 하이퍼 파라미터 튜닝 

In [None]:
opt = {} 
opt['input_shape'] = train_df.shape[1:]
opt['batch_size'] = 32 
opt['epochs'] = 5
opt['init_lr'] = 0.0005
opt['optimizer'] = optimizers.Adam(learning_rate = opt['init_lr'])

In [121]:
def encoding_block(node,input_layer):
  x = input_layer 
  x = layers.Dropout(0.2)(x)
  x = Bidirectional(LSTM(node,return_sequences=True))(x)
  x = layers.Normalization()(x)
  return x 

def make_model(opt):
  inputs = Input(opt['input_shape'])
  x = inputs
  for i in [128,32,2]:
    x = encoding_block(i,x)
  x = layers.Flatten()(x)
  x = layers.Dropout(0.2)(x)
  x = Dense(10,activation='relu')(x)
  x = Dense(2,activation = 'softmax')(x)
  model = keras.Model(inputs,x)
  
  model.compile(optimizer = opt['optimizer'],loss = 'sparse_categorical_crossentropy',metrics=['accuracy'])
  history = model.fit(train_df,train_y,batch_size=opt['batch_size'],validation_data=(valid_df,valid_y),epochs=opt['epochs'],verbose=1)
  return history,model 

In [78]:
batch_size = {} 
batch_size['64'] = 64 
batch_size['32'] = 32 

init_lr = {}
init_lr['0.0005'] = 0.0005
init_lr['0.001'] = 0.001 

opt_dict = {}
opt_dict['Radam'] = tfons.optimizers.RectifiedAdam()
opt_dict['Adamw'] = tfons.optimizers.AdamW(weight_decay = 1e-4)
opt_dict['Adam'] = optimizers.Adam() 
opt_dict['RMSprop'] = optimizers.RMSprop()

schedules = {} 
schedules['exp'] = optimizers.schedules.ExponentialDecay(initial_learning_rate=0.0001,decay_steps=1000,decay_rate=0.96)


In [79]:
result_save = [] 
for lr_name,lr in init_lr.items():
  for sch_name, schedule in schedules.items():
    for opt_name,optimizer in opt_dict.items():
      for t_batch,n_batch in batch_size.items():
        schedule.initial_learning_rate = lr 
        optimizer.learning_rate=schedule
        opt['optimizer'] = optimizer
        opt['batch_size'] = n_batch 
        condition = [lr_name,sch_name,t_batch,opt_name]
        print(condition)
        history = make_model(opt)
        condition = [lr_name,sch_name,t_batch,opt_name,history.history['accuracy'][-1]]
        result_save.append(condition)

['0.0005', 'exp', '64', 'Radam']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.0005', 'exp', '32', 'Radam']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.0005', 'exp', '64', 'Adamw']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.0005', 'exp', '32', 'Adamw']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.0005', 'exp', '64', 'Adam']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.0005', 'exp', '32', 'Adam']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.0005', 'exp', '64', 'RMSprop']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.0005', 'exp', '32', 'RMSprop']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.001', 'exp', '64', 'Radam']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.001', 'exp', '32', 'Radam']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.001', 'exp', '64', 'Adamw']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.001', 'exp', '32', 'Adamw']
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['0.00

In [80]:
result_save = pd.DataFrame(result_save)
result_save.columns = ['lr','schedule','batch','opt','acc']

In [85]:
result_save.sort_values(by=['acc'],ascending=False)

Unnamed: 0,lr,schedule,batch,opt,acc
6,0.0005,exp,64,RMSprop,0.565186
4,0.0005,exp,64,Adam,0.554143
14,0.001,exp,64,RMSprop,0.548536
7,0.0005,exp,32,RMSprop,0.545497
0,0.0005,exp,64,Radam,0.539719
2,0.0005,exp,64,Adamw,0.53882
15,0.001,exp,32,RMSprop,0.536638
5,0.0005,exp,32,Adam,0.528762
1,0.0005,exp,32,Radam,0.52705
3,0.0005,exp,32,Adamw,0.521315


## 튜닝 결과로 학습 

In [122]:
opt['batch_size'] = 64 
opt['epochs'] = 10
opt['init_lr'] = 0.0005 

opt_dict = {}
opt_dict['Radam'] = tfons.optimizers.RectifiedAdam()
opt_dict['Adamw'] = tfons.optimizers.AdamW(weight_decay = 1e-4)
opt_dict['Adam'] = optimizers.Adam() 
opt_dict['RMSprop'] = optimizers.RMSprop()

model_save = [] 
result_save = [] 
for optimizer in list(opt_dict.values()):
  opt['optimizer'] = optimizer 
  history,model = make_model(opt)
  
  result_save.append(history.history['accuracy'][-1])
  model_save.append(model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [106]:
list(opt_dict.values())

[<tensorflow_addons.optimizers.rectified_adam.RectifiedAdam at 0x7f3b78130710>,
 <tensorflow_addons.optimizers.weight_decay_optimizers.AdamW at 0x7f3b78130890>,
 <keras.optimizer_v2.adam.Adam at 0x7f3b78130590>,
 <keras.optimizer_v2.rmsprop.RMSprop at 0x7f3b3ee52490>]