In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# 데이터 불러오기
train = pd.read_csv('./data/train.csv', index_col=0)
test = pd.read_csv('./data/test.csv', index_col=0)
sample_submission = pd.read_csv('./data/sample_submission.csv', index_col=0)

# Train 데이터의 타입을 Sample_submission에 대응하는 가변수 형태로 변환
column_number = {}
for i, column in enumerate(sample_submission.columns):
    column_number[column] = i
    
def to_number(x, dic):
    return dic[x]

train['type_num'] = train['type'].apply(lambda x : to_number(x, column_number))

# 모델에 적용할 데이터 셋 준비 
x = train.drop(columns=['type', 'type_num'], axis=1)
y = train['type_num']

x = x.drop(columns=['fiberID'], axis=1)
test_x = test.drop(columns=['fiberID'],axis=1)

x_name=x.columns
col_name=x_name

In [None]:
x=np.array(x)
y=np.array(y)
test_x=np.array(test_x)

In [None]:
from sklearn.model_selection import train_test_split, KFold, RandomizedSearchCV

#### 6:2:2
tra_size= 0.8
val_size= 0.75

# #### 8:1:1
# tra_size = 0.9
# val_size = 1./9.

x_train, x_test, y_train, y_test = train_test_split(x,y, stratify=y,train_size=tra_size, shuffle=True ,random_state=0)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=val_size, shuffle=True ,random_state=0)


In [None]:
from sklearn.preprocessing import StandardScaler, RobustScaler

scaler1=StandardScaler()
#scaler1=RobustScaler()

scaler1.fit(x_train)
x_train=scaler1.transform(x_train)
x_test=scaler1.transform(x_test)
test_x =scaler1.transform(test_x)

In [None]:
from keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input
from keras.callbacks import EarlyStopping
from keras import regularizers

In [None]:
def build_network(keep_prob =0.5,length=3, nodes=64, init_mode='uniform',reg=0.01):

    inputs=Input(shape=(20,), name='input')
    x=Dense(64, activation='elu', name='hidden0')(inputs)
    x=Dropout(0.2)(x)
    
    for i in range(length):
        
        if i % 2 == 0:
            x=Dense(nodes*2, activation='elu', name='hidden'+str(i+1), \
                   kernel_initializer=init_mode,\
                   kernel_regularizer=regularizers.l2(reg))(x)
        if i % 2 == 1:
            
            x=Dense(nodes, activation='elu', name='hidden'+str(i+1), \
                   kernel_initializer=init_mode,\
                   kernel_regularizer=regularizers.l2(reg))(x)           
        
        if i == length-2:
            x=Dropout(keep_prob)(x)

    prediction = Dense(19, activation='softmax', name='output')(x)
    
    model = Model(inputs=inputs, outputs=prediction)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
                  
    model.summary()
    
    return model

In [None]:
def create_hyperparameters():
    batches=[1024]
    dropout =[0.25] #,0.5] 
    length= [3] # 4
    nodes= [64] #128
    epochs = [1] # 2
    init_mode = [ 'glorot_normal', 'he_normal']
    reg = [0.005] # 0.002

    return{"batch_size":batches, "keep_prob":dropout, "epochs":epochs,\
           "length":length, "nodes":nodes, "init_mode":init_mode, \
           "reg":reg}

early_stop = EarlyStopping(
    monitor = 'val_loss', patience = 20)
callbacks = [early_stop]

keras_fit_params = {'callbacks': callbacks,'validation_data': (x_val, y_val)}

from keras.wrappers.scikit_learn import KerasClassifier
model=KerasClassifier(build_fn=build_network, verbose=2)

In [None]:
hyperparameters= create_hyperparameters()

from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

search = RandomizedSearchCV(estimator=model, param_distributions=hyperparameters,\
                             n_iter=24,n_jobs=-1, cv=5, verbose=2, scoring = 'neg_log_loss')
search.fit(x_train, y_train)

In [None]:
print(search.best_params_)

In [None]:
import os.path
i=0
while 1:
    if os.path.isfile('./data/best_params'+str(i)+'.txt') == True:
        i+=1
    else:
        break
l1=str(search.best_params_.keys())
l2=str(search.best_params_.values())

f=open('./data/best_params'+str(i)+'.txt','w')
f.write(l1)    
f.write(l2)
f.close()

# 현재까지 가장 안정적인 모델

In [None]:
# model=Sequential()

# model.add(Dense(64,activation='elu',input_dim=20)) # input dimension
# model.add(Dense(128,activation='elu'))
# model.add(Dropout(0.5))
# model.add(Dense(128,activation='elu'))
# model.add(Dense(128,activation='elu'))
# model.add(Dense(128,activation='elu'))
# model.add(Dense(19,activation='softmax'))

# model.summary()