In [None]:
import pandas as pd
from pandas import DataFrame
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
sns.set(style='white', context='notebook', palette='deep')
%matplotlib inline


from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.linear_model import *
import itertools

from keras.utils.np_utils import to_categorical 
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.callbacks import *
from keras.preprocessing.image import ImageDataGenerator
import os

import xgboost as xgb 
from xgboost import plot_importance , XGBClassifier, DMatrix
import joblib


In [None]:
# define train set
from google.colab import drive
drive.mount('/content/drive')
train = pd.read_csv('./drive/My Drive/DACON/data_file/train.csv')
test = pd.read_csv('./drive/My Drive/DACON/data_file/test.csv')
test_pred = pd.read_csv('./drive/My Drive/DACON//submission/submission_91_ensembles_6+2_bn_08.csv')
train_copy = train.copy()
test_copy = test.copy()
test_pred_copy = test_pred.copy()

In [None]:
rot_gen = ImageDataGenerator(
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    zca_epsilon=1e-06,
    rotation_range=45, 
    width_shift_range=0.0,
    height_shift_range=0.0,
    brightness_range=None,
    shear_range=0,     
    zoom_range=0,      
    channel_shift_range=0.0,
    fill_mode='constant', 
    cval=0.0,            
    horizontal_flip=False, 
    vertical_flip=False,   
    rescale=1./255, 
    preprocessing_function=None,
    data_format=None,
    validation_split=0, 
    dtype=None
)

trans_gen = ImageDataGenerator(
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    zca_epsilon=1e-06,
    rotation_range=0, 
    width_shift_range=0.2, 
    height_shift_range=0.2,
    brightness_range=None,
    shear_range=0,     
    zoom_range=0,      
    channel_shift_range=0.0,
    fill_mode='constant', 
    cval=0.0,             
    horizontal_flip=False, 
    vertical_flip=False,   
    rescale=1./255, 
    preprocessing_function=None,
    data_format=None,
    validation_split=0, 
    dtype=None
)

shear_zoom_gen = ImageDataGenerator(
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    zca_epsilon=1e-06,
    rotation_range=0, 
    width_shift_range=0.0,
    height_shift_range=0.0,
    brightness_range=None,
    shear_range=0.2,     
    zoom_range=0.2,      
    channel_shift_range=0.0,
    fill_mode='constant', 
    cval=0.0,             
    horizontal_flip=False,
    vertical_flip=False,   
    rescale=1./255, 
    preprocessing_function=None,
    data_format=None,
    validation_split=0, 
    dtype=None
)

flip_gen = ImageDataGenerator(
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    zca_epsilon=1e-06,
    rotation_range=0, 
    width_shift_range=0.0,
    height_shift_range=0.0,
    brightness_range=None,
    shear_range=0,     
    zoom_range=0,      
    channel_shift_range=0.0,
    fill_mode='constant', 
    cval=0.0,             
    horizontal_flip=True, 
    vertical_flip=True,   
    rescale=1./255, # Rescale
    preprocessing_function=None,
    data_format=None,
    validation_split=0, 
    dtype=None
)

In [None]:
def augmentation( input_imgs, aug_size ):
    df = input_imgs
    new_data_set = []
    num_of_training_set = df.shape[0]

    for i in range(num_of_training_set//2):
        rand_1 = np.random.randint(num_of_training_set)
        rand_2 = np.random.randint(num_of_training_set)
        rand_3 = np.random.randint(num_of_training_set)
        rand_4 = np.random.randint(num_of_training_set)
    
        for j in range( aug_size ):
            # rotation
            _rot = rot_gen.flow( np.array(df.iloc[rand_1,3:]).reshape(1,28,28,1) ).next().reshape(784,)
            new_data_set += [[
                df.iloc[rand_1,1],
                df.iloc[rand_1,2],
            ] + list(_rot)]
            # translation
            _trans = trans_gen.flow( np.array(df.iloc[rand_2,3:]).reshape(1,28,28,1) ).next().reshape(784,)
            new_data_set += [[
                df.iloc[rand_2,1],
                df.iloc[rand_2,2],
            ] + list(_trans)]
            # shear / zoom
            _shear = shear_zoom_gen.flow( np.array(df.iloc[rand_3,3:]).reshape(1,28,28,1) ).next().reshape(784,)
            new_data_set += [[
                df.iloc[rand_3,1],
                df.iloc[rand_3,2],
            ] + list(_shear)]
            # flip
            _flip = flip_gen.flow( np.array(df.iloc[rand_4,3:]).reshape(1,28,28,1) ).next().reshape(784,)
            new_data_set += [[
                df.iloc[rand_4,1],
                df.iloc[rand_4,2],
            ] + list(_flip)]

    columns = ['digit', 'letter'] + [str(x) for x in range(784)]
    aug = pd.DataFrame(new_data_set, columns=columns)

    train_norm = pd.concat([ input_imgs.iloc[:,1:3], np.divide(input_imgs.iloc[:,3:],255) ],axis=1)
    train_aug = pd.concat([train_norm,aug])

    return train_aug


In [None]:
def train_test_gen(input_imgs, aug_size):
    train_aug = augmentation(input_imgs, aug_size)

    x_train = train_aug.iloc[:,2:].values.copy()
    x_train = x_train.reshape(-1,28,28,1)

    y_train = train_aug['digit']
    y_train = to_categorical(y_train,num_classes = 10)

    return train_test_split(x_train,y_train,test_size=0.1,random_state=15)

In [None]:
def load_best(file_name):
    filepath = './drive/My Drive/DACON/saved_model/' + file_name + '/'
    time_list = []
    for f_name in os.listdir(f"{filepath}"):
        written_time = os.path.getctime(f"{filepath}{f_name}")
        time_list.append((f_name, written_time))
    sorted_file_list = sorted(time_list, key=lambda x: x[1], reverse=True)
    best = sorted_file_list[0]
    best_name = best[0]
    model = load_model( filepath + best_name )
    print('\033[31m' + best_name + '\033[0m')
    print()
    return model

In [None]:
def set_filepath(file_name):
    MODEL_SAVE_FOLDER_PATH = './drive/My Drive/DACON/saved_model/' + file_name + '/'
    if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
        os.mkdir(MODEL_SAVE_FOLDER_PATH)
    
    return MODEL_SAVE_FOLDER_PATH

In [None]:
def get_model(N):
    
    model = Sequential()

    model.add(Conv2D(filters = N, kernel_size = (5,5),padding = 'Same', 
                    activation ='relu', input_shape = (28,28,1)))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = N, kernel_size = (5,5),padding = 'Same', 
                    activation ='relu'))
    model.add(BatchNormalization())
                
    model.add(MaxPool2D(pool_size=(2,2)))
    model.add(Dropout(0.25))


    model.add(Conv2D(filters = 2*N, kernel_size = (3,3),padding = 'Same', 
                    activation ='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters = 2*N, kernel_size = (3,3),padding = 'Same', 
                    activation ='relu'))
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(0.25))


    model.add(Flatten())
    model.add(Dense(4*N, activation = "relu", name='my_dense'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(10, activation = "softmax"))

    return model

In [None]:
def compare(file1,file2):
    filepath1 = './drive/My Drive/DACON/submission/' + file1 +'.csv'
    filepath2 = './drive/My Drive/DACON/submission/' + file2 +'.csv'
    f1 = pd.read_csv(filepath1)
    f2 = pd.read_csv(filepath2)
    match = np.array( [ f1['digit']==f2['digit'] ][0] )
    acc = len( np.where(match==True)[0] )/len(match)
    return acc

In [None]:
def pred_acc(file_name,file_list):
    score = []
    for i in range( len(file_list) ):
        acc = compare(file_name, file_list[i])
        score.append(acc)
        print( 'Compared with ' + file_list[i].replace('submision_','') + ' : {}'.format(acc) )
    #return score
    

In [None]:
epochs = 100
batch_size = 100

In [None]:
def train_cnn(cnn_num, N, aug_size):
    cnn_model_list = []
    acc_list = []

    for i in range(cnn_num):

        model = get_model(N)
        file_name = 'model_ensembles'
        model_path = set_filepath(file_name) + 'cnn_{}'.format(i) + '_{val_accuracy:.4f}.hdf5'

        # callbacks
        early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, verbose=1, mode='max')
        mcp_save = ModelCheckpoint(filepath = model_path, save_best_only=True, monitor='val_accuracy', mode='max', verbose=1)
        reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_delta=1e-4, mode='min')
        # compile
        optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
        model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

        # fit model
        x_train, x_val, y_train, y_val = train_test_gen(train_copy, aug_size)

        hist = model.fit(x_train, y_train, batch_size=batch_size, epochs = epochs, 
                    validation_data = (x_val,y_val),
                    steps_per_epoch=x_train.shape[0]// batch_size, 
                    callbacks=[early_stopping,mcp_save,reduce_lr_loss])
        model = load_best(file_name)

        cnn_model_list.append(model)
        acc_list.append(hist.history['val_accuracy'][-11])

    return cnn_model_list, acc_list


In [None]:
def train_xgb(xgb_num, N, aug_size):
    inter_model_list = []
    layer_name='my_dense'

    for i in range(xgb_num):

        model = get_model(N)
        file_name = 'model_ensembles'
        model_path = set_filepath(file_name) + 'xgb_{}'.format(i) + '_{val_accuracy:.4f}.hdf5'

        # callbacks
        early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, verbose=1, mode='max')
        mcp_save = ModelCheckpoint(filepath = model_path, save_best_only=True, monitor='val_accuracy', mode='max', verbose=1)
        reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_delta=1e-4, mode='min')

        optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
        model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

        # fit model
        x_train, x_val, y_train, y_val = train_test_gen(train_copy, aug_size)

        hist = model.fit(x_train, y_train, batch_size=batch_size, epochs = epochs, 
                    validation_data = (x_val,y_val),
                    steps_per_epoch=x_train.shape[0]// batch_size, 
                    callbacks=[early_stopping,mcp_save,reduce_lr_loss])
        model = load_best(file_name)
        
        inter_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
        inter_model_list.append(inter_model)

    train_aug = augmentation(train_copy, aug_size)
    inter_train = train_aug.iloc[:,2:].values.reshape(-1,28,28,1)
    cnn_output_list = []
    cnn_val_list = []
    for i in range(xgb_num):
        cnn_output = inter_model_list[i].predict( inter_train ) 
        cnn_output = pd.DataFrame( data=cnn_output )
        cnn_output_list.append(cnn_output)
        cnn_val = train_aug['digit']
        cnn_val_list.append(cnn_val)

    xgb_model_list = []
    for i in range(xgb_num):
        x_train, x_val, y_train, y_val = train_test_split(cnn_output_list[i], cnn_val_list[i],test_size=0.1,random_state=25)

        xgb_model = XGBClassifier(max_depth=5, num_class=10, objective='multi:softprob', booster='gbtree', n_estimators=300, learning_rate=0.2 )
        xgb_model.fit( x_train, y_train, eval_set=[(x_val, y_val)], eval_metric='mlogloss', early_stopping_rounds=5)

        xgb_model_list.append(xgb_model)

    return xgb_model_list, inter_model_list

In [None]:
def ensemble(input_imgs,cnn_model_list, inter_model_list, xgb_model_list, w_cnn, w_xgb):
    pred = []
    L = input_imgs.shape[0]
    label_list = np.zeros((L,10))
    for i in range(len(cnn_model_list)):
        label = cnn_model_list[i].predict( np.array(input_imgs).reshape(-1,28,28,1).astype(np.float32) )
        label_list += label*w_cnn

    for i in range(len(xgb_model_list)):
        cnn_output = inter_model_list[i].predict( np.array(input_imgs).reshape(-1,28,28,1).astype(np.float32) )
        cnn_output = DataFrame(cnn_output)
        label = xgb_model_list[i].predict_proba( cnn_output )
        label_list += label*w_xgb
        
    for j in range(len(label_list)):
        pred.append( np.argmax(label_list[j]) )

    return pred

# Train models

In [None]:
cnn_num = 10
xgb_num = 5

In [None]:
cnn_model_list, acc_list = train_cnn(cnn_num, 64, 2)

In [None]:
xgb_model_list, inter_model_list = train_xgb(xgb_num,128,3)

# Save models

In [None]:
MODEL_SAVE_FOLDER_PATH = './drive/My Drive/DACON/saved_model/' + 'model_ensembles_10+5_new' + '/'
if not os.path.exists(MODEL_SAVE_FOLDER_PATH):
    os.mkdir(MODEL_SAVE_FOLDER_PATH)
for i in range(len(cnn_model_list)):
    cnn_model_list[i].save(MODEL_SAVE_FOLDER_PATH  + 'cnn_model_list_{}.hdf5'.format(i))
for i in range(len(inter_model_list)):
    inter_model_list[i].save(MODEL_SAVE_FOLDER_PATH  + 'inter_model_list_{}.hdf5'.format(i))
for i in range(len(xgb_model_list)):
    joblib.dump(xgb_model_list[i], MODEL_SAVE_FOLDER_PATH  + 'xgb_model_list_{}.dat'.format(i))

In [None]:
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)
pred = ensemble(x_test, cnn_model_list, inter_model_list, xgb_model_list)
data = {'id':test_copy['id'], 'digit':pred}
submission = DataFrame(data)
file_name = 'submission_ensembles_6+2_bn_07'
submission.to_csv('./drive/My Drive/DACON/submission/'+file_name+'.csv', index=False)
print(pred[:10])
print()
file_list = [ 'submission_84',
             'submission_85',
             'submission_86_xgb_ensemble',
             'submission_87_ensembles',
             'submission_88_ensemble_2_2_4_try3',
             'submission_89_ensemble_2_2',
             'submission_89',
             'submission_91_ensembles_3+1_w1',
             'submission_91_ensembles_6+2_bn_08'
             ]
pred_acc(file_name,file_list)

# Load models for linear regression

In [None]:
filepath = './drive/My Drive/DACON/saved_model/model_ensembles_10+5/'
time_list = []
for f_name in os.listdir(f"{filepath}"):
    written_time = os.path.getctime(f"{filepath}{f_name}")
    time_list.append((f_name, written_time))
sorted_file_list = sorted(time_list, key=lambda x: x[1], reverse=False)
models = sorted_file_list
cnn_model_list_load = []
inter_model_list_load = []
xgb_model_list_load = []
for i in range(cnn_num):
    model = load_model( filepath + models[i][0] )
    cnn_model_list_load.append(model)
for i in range(xgb_num):
    model = load_model( filepath + models[i+cnn_num][0] )
    inter_model_list_load.append(model)
for i in range(xgb_num):
    model = joblib.load( filepath + models[i+cnn_num+xgb_num][0] )
    xgb_model_list_load.append(model)

# Linear Regression

In [None]:
def to_data(input, cnn_model_list, inter_model_list, xgb_model_list):

    pred_data = []
    a = len(cnn_model_list)
    b = len(xgb_model_list)
    for i in range(a):
           pred_data.append( cnn_model_list[i].predict(input) )
    for i in range(b):
        cnn_output = inter_model_list[i].predict( input )
        cnn_output = DataFrame(cnn_output)
        pred_data.append( xgb_model_list[i].predict_proba( cnn_output ) )

    data = pred_data[0]
    for i in range(1,a+b):
        data = np.concatenate((data, pred_data[i]),axis=1)
    
    return data

In [None]:
def linear_reg(aug_size, cnn_model_list, inter_model_list, xgb_model_list):
    
    train_aug = augmentation(train_copy, aug_size)
    input = train_aug.iloc[:,2:].values.copy().reshape(-1,28,28,1)
    data = to_data(input, cnn_model_list, inter_model_list, xgb_model_list)
    data_val = train_aug['digit']
    data_val = to_categorical(data_val, 10)
    x_train, x_val, y_train, y_val = train_test_split(data, data_val, test_size = 0.1)
    linear_model = LinearRegression()
    linear_model.fit(x_train, y_train)
    print('Coefficients: {}', format(linear_model.coef_[0][:10]))
    print("RSS: {}".format( np.mean((linear_model.predict(x_val) - y_val) ** 2) ))

    return linear_model

In [None]:
train_aug_check = augmentation(train_copy,3)
x_check = train_aug_check.iloc[:,2:].values
x_check = x_check.reshape(-1,28,28,1)
y_check = train_aug_check['digit']

In [None]:
data_check = to_data(x_check, cnn_model_list_load, inter_model_list_load[], xgb_model_list_load[])

linear_model = linear_reg(0, cnn_model_list_load, inter_model_list_load, xgb_model_list_load)
pred = linear_model.predict( data_check )
pred[:10]
pred_check = []
for i in range(len(pred)):
    pred_check.append( np.argmax(pred[i]) )

s = [np.array(pred_check)==np.array(y_check)]
t = np.where(s[0]==True)
acc = len(t[0])/len(s[0])
acc

In [None]:
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)
data_test = to_data(x_test, cnn_model_list_load, inter_model_list_load, xgb_model_list_load)

pred = linear_model.predict( data_test )
pred_test = []
for i in range(len(pred)):
    pred_test.append( np.argmax(pred[i]) )
pred_test[:10]

In [None]:
# test cell
u = 1
v = 1

data_check = to_data(x_check, cnn_model_list_load[:u], inter_model_list_load[2:2+v], xgb_model_list_load[2:2+v])
linear_model = linear_reg(3, cnn_model_list_load[:u], inter_model_list_load[2:2+v], xgb_model_list_load[2:2+v])
pred = linear_model.predict( data_check )
pred_check = []
for i in range(len(pred)):
    pred_check.append( np.argmax(pred[i]) )

s = [np.array(pred_check)==np.array(y_check)]
t = np.where(s[0]==True)
acc = len(t[0])/len(s[0])
print('Predicted score : {}'.format(acc))

x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)
data_test = to_data(x_test, cnn_model_list_load[:u], inter_model_list_load[2:2+v], xgb_model_list_load[2:2+v])

pred = linear_model.predict( data_test )
pred_test = []
for i in range(len(pred)):
    pred_test.append( np.argmax(pred[i]) )
pred_test[:10]

In [None]:
data = {'id':test_copy['id'], 'digit':pred_test}
submission = DataFrame(data)
file_name = 'submission_ensembles_1+1_bn_linearreg_2'
submission.to_csv('./drive/My Drive/DACON/submission/'+file_name+'.csv', index=False)
file_list = [ 'submission_84',
             'submission_85',
             'submission_86_xgb_ensemble',
             'submission_87_ensembles',
             'submission_87_ensembles_10+5_bn_linearreg',
             'submission_88_ensemble_2_2_4_try3',
             'submission_88_ensembles_10+1_bn_linearreg',
             'submission_88_ensembles_6+2_bn_linearreg_2',
             'submission_89_ensemble_2_2',
             'submission_89',
             'submission_90_ensembles_6+2_bn_08_retry',
             'submission_90_pretrain_using_test_layer_4_3ensemble',
             'submission_90_ensembles_6+2_bn_linearreg',
             'submission_91_ensembles_3+1_w1',
             'submission_91_ensembles_6+2_bn_08'
             ]
pred_acc(file_name,file_list)

In [None]:
pred_acc('submission_ensembles_6+2_bn_linearreg_2',file_list)

# ElasticNet Regression

In [None]:
def to_data(input, cnn_model_list, inter_model_list, xgb_model_list):

    pred_data = []
    a = len(cnn_model_list)
    b = len(xgb_model_list)
    for i in range(a):
           pred_data.append( cnn_model_list[i].predict(input) )
    for i in range(b):
        cnn_output = inter_model_list[i].predict( input )
        cnn_output = DataFrame(cnn_output)
        pred_data.append( xgb_model_list[i].predict_proba( cnn_output ) )

    data = pred_data[0]
    for i in range(1,a+b):
        data = np.concatenate((data, pred_data[i]),axis=1)
    
    return data

In [None]:
def linear_reg(aug_size, cnn_model_list, inter_model_list, xgb_model_list):
    
    train_aug = augmentation(train_copy, aug_size)
    input = train_aug.iloc[:,2:].values.copy().reshape(-1,28,28,1)
    data = to_data(input, cnn_model_list, inter_model_list, xgb_model_list)
    data_val = train_aug['digit']
    data_val = to_categorical(data_val, 10)
    x_train, x_val, y_train, y_val = train_test_split(data, data_val, test_size = 0.1)
    linear_model = MultiTaskElasticNet(alpha=0.01)
    linear_model.fit(x_train, y_train)
    print('Coefficients: {}', format(linear_model.coef_[0][:10]))
    print("RSS: {}".format( np.mean((linear_model.predict(x_val) - y_val) ** 2) ))

    return linear_model

In [None]:
train_aug_check = augmentation(train_copy,3)
x_check = train_aug_check.iloc[:,2:].values
x_check = x_check.reshape(-1,28,28,1)
y_check = train_aug_check['digit']

In [None]:
data_check = to_data(x_check, cnn_model_list_load, inter_model_list_load, xgb_model_list_load)

In [None]:
linear_model = linear_reg(3, cnn_model_list_load, inter_model_list_load, xgb_model_list_load)
pred = linear_model.predict( data_check )
pred[:10]
pred_check = []
for i in range(len(pred)):
    pred_check.append( np.argmax(pred[i]) )

s = [np.array(pred_check)==np.array(y_check)]
t = np.where(s[0]==True)
acc = len(t[0])/len(s[0])
acc

In [None]:
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)
data_test = to_data(x_test, cnn_model_list_load, inter_model_list_load, xgb_model_list_load)

pred = linear_model.predict( data_test )
pred_test = []
for i in range(len(pred)):
    pred_test.append( np.argmax(pred[i]) )
pred_test[:10]

In [None]:
data = {'id':test_copy['id'], 'digit':pred_test}
submission = DataFrame(data)
file_name = 'submission_ensembles_10+5_bn_elasticreg_3_001'
submission.to_csv('./drive/My Drive/DACON/submission/'+file_name+'.csv', index=False)
file_list = [ 'submission_84',
             'submission_85',
             'submission_86_xgb_ensemble',
             'submission_87_ensembles',
             'submission_87_ensembles_10+5_bn_linearreg',
             'submission_88_ensemble_2_2_4_try3',
             'submission_88_ensembles_10+1_bn_linearreg',
             'submission_88_ensembles_6+2_bn_linearreg_2',
             'submission_89_ensemble_2_2',
             'submission_89',
             'submission_90_ensembles_6+2_bn_08_retry',
             'submission_90_pretrain_using_test_layer_4_3ensemble',
             'submission_90_ensembles_6+2_bn_linearreg',
             'submission_91_ensembles_3+1_w1',
             'submission_91_ensembles_6+2_bn_08'
             ]
pred_acc(file_name,file_list)

In [None]:
compare('submission_ensembles_10+5_bn_elasticreg_3_001','submission_ensembles_10+5_bn_elasticreg')

# Ensembles : 10+5

In [None]:
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)

In [None]:
pred = ensemble(x_test, cnn_model_list_load, inter_model_list_load, xgb_model_list_load, 1.0, 3.0)
data = {'id':test_copy['id'], 'digit':pred}
submission = DataFrame(data)
file_name = 'submission_ensembles_10+5_bn_30'
submission.to_csv('./drive/My Drive/DACON/submission/'+file_name+'.csv', index=False)
print(pred[:10])
print()
file_list = [ 'submission_84',
             'submission_85',
             'submission_86_xgb_ensemble',
             'submission_87_ensembles',
             'submission_87_ensembles_10+5_bn_linearreg',
             'submission_88_ensemble_2_2_4_try3',
             'submission_88_ensembles_10+1_bn_linearreg',
             'submission_88_ensembles_6+2_bn_linearreg_2',
             'submission_89_ensemble_2_2',
             'submission_89',
             'submission_90_ensembles_6+2_bn_08_retry',
             'submission_90_pretrain_using_test_layer_4_3ensemble',
             'submission_90_ensembles_6+2_bn_linearreg',
             'submission_91_ensembles_3+1_w1',
             'submission_91_ensembles_6+2_bn_08'
             ]
pred_acc(file_name,file_list)

In [None]:
compare('submission_ensembles_10+5_bn_30','submission_ensembles_10+5_bn_25')

In [None]:
train_aug = augmentation(train_copy, 3)
x_check = train_aug.iloc[:,2:]
y_check = train_aug['digit']

In [None]:
for i in range(10):    
    pred_check =  ensemble(x_check, cnn_model_list_load, inter_model_list_load, xgb_model_list_load, 1.0, 4+0.2*i)
    s = [np.array(pred_check)==np.array(y_check)]
    t = np.where(s[0]==True)
    acc = len(t[0])/len(s[0])
    print('{} Predicted score : {}'.format(2+0.2*i,acc))

In [None]:
pred_acc('submission_91_ensembles_6+2_bn_08',file_list)

# Load models for ensemble : 5 models



In [None]:
filepath = './drive/My Drive/DACON/saved_model/model_storage/'

cnn = load_model( filepath + 'cnn_0_0.8891.hdf5' )
cnn_bn = load_model( filepath + 'cnn_bn_0.hdf5' )
depthwise = load_model( filepath + 'depthwise_0_0.8731.hdf5' )
pretrained = load_model( filepath + 'pretrained_by_letter_cnn_model_0_0.9038.hdf5' )
inter_model = load_model( filepath + 'inter_model.hdf5' )
xgb = joblib.load( filepath + 'xgb.dat' )

In [None]:
model_list = []
model_list.append(cnn)
model_list.append(cnn_bn)
model_list.append(depthwise)
model_list.append(pretrained)

In [None]:
# simple ensemble
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)

L = x_test.shape[0]
label_list = np.zeros((L,10))
for i in range(len(model_list)):
    label = model_list[i].predict( x_test )
    label_list += label

cnn_output = inter_model.predict( x_test )
cnn_output = DataFrame(cnn_output)
label_xgb = xgb.predict_proba(cnn_output)
label_list += label_xgb*5.0

pred = []
for j in range(len(label_list)):
    pred.append( np.argmax(label_list[j]) )

pred[:10]

In [None]:
data = {'id':test_copy['id'], 'digit':pred}
submission = DataFrame(data)
file_name = 'submission_ensembles_w5'
submission.to_csv('./drive/My Drive/DACON/submission/'+file_name+'.csv', index=False)
file_list = [ 'submission_84',
             'submission_85',
             'submission_86_xgb_ensemble',
             'submission_87_ensembles',
             'submission_87_ensembles_10+5_bn_linearreg',
             'submission_88_ensemble_2_2_4_try3',
             'submission_88_ensembles_10+1_bn_linearreg',
             'submission_88_ensembles_6+2_bn_linearreg_2',
             'submission_89_ensemble_2_2',
             # 'submission_89',
             'submission_90_ensembles_6+2_bn_08_retry',
             'submission_90_pretrain_using_test_layer_4_3ensemble',
             'submission_90_ensembles_6+2_bn_linearreg',
             'submission_91_ensembles_3+1_w1',
             'submission_91_ensembles_6+2_bn_08'
             ]
pred_acc(file_name,file_list)

In [None]:
def data_for_reg(input):
    pred_data = []
    for i in range(len(model_list)):
        pred_data.append( model_list[i].predict( input ) )
    cnn_output = inter_model.predict( input )
    cnn_output = DataFrame(cnn_output)
    pred_data.append( xgb.predict_proba(cnn_output) )

    data = pred_data[0]
    for i in range(1,5):
        data = np.concatenate((data,pred_data[i]),axis=1)
    return data

In [None]:
"""# trian linear regression
train_aug = augmentation(train_copy,3)
x = train_aug.iloc[:,2:].values.reshape(-1,28,28,1)
data = data_for_reg( x )
data_val = train_aug['digit']
data_val = to_categorical(data_val, 10)

x_train, x_val, y_train, y_val = train_test_split(data, data_val, test_size = 0.1)
linear_model = LinearRegression()
linear_model.fit(x_train, y_train)
print('Coefficients: {}', format(linear_model.coef_[0][:10]))
print("RSS: {}".format( np.mean((linear_model.predict(x_val) - y_val) ** 2) ))"""

In [None]:
t_size = test_copy.shape[0]
x = np.divide(test_copy.iloc[:t_size,2:].values,255)
x = x.reshape(-1,28,28,1)
data = data_for_reg( x )
data_val = test_pred_copy['digit'][:t_size]
data_val = to_categorical(data_val, 10)

x_train, x_val, y_train, y_val = train_test_split(data, data_val, test_size = 0.1)
linear_model = LinearRegression()
linear_model.fit(x_train, y_train)
print('Coefficients: {}', format(linear_model.coef_[0][:10]))
print("RSS: {}".format( np.mean((linear_model.predict(x_val) - y_val) ** 2) ))

In [None]:
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)
data_test = data_for_reg(x_test)

pred = linear_model.predict( data_test )
pred_test = []
for i in range(len(pred)):
    pred_test.append( np.argmax(pred[i]) )
pred_test[:10]

In [None]:
data = {'id':test_copy['id'], 'digit':pred_test}
submission = DataFrame(data)
file_name = 'submission_ensembles_linear_using_test_all'
submission.to_csv('./drive/My Drive/DACON/submission/'+file_name+'.csv', index=False)

file_list = [ 'submission_84',
             'submission_85',
             'submission_86_xgb_ensemble',
             'submission_87_ensembles',
             'submission_87_ensembles_10+5_bn_linearreg',
             'submission_88_ensemble_2_2_4_try3',
             'submission_88_ensembles_10+1_bn_linearreg',
             'submission_88_ensembles_6+2_bn_linearreg_2',
             'submission_89_ensemble_2_2',
             # 'submission_89',
             'submission_90_ensembles_6+2_bn_08_retry',
             'submission_90_pretrain_using_test_layer_4_3ensemble',
             'submission_90_ensembles_6+2_bn_linearreg',
             'submission_90_ensembles_linear_using_test_1000',
             'submission_91_ensembles_3+1_w1',
             'submission_91_ensembles_6+2_bn_08'
             ]
pred_acc(file_name,file_list)

In [None]:
def data_for_reg_1(input):
    pred_data = []
    for i in range(len(model_list)):
        label_proba = model_list[i].predict( input )
        label_list = []
        for j in range(label_proba.shape[0]):
            label = 0
            for k in range(10):
                label += label_proba[j][k]*k
            label_list.append( label )
        pred_data.append( np.array(label_list).reshape(-1,1) )

    cnn_output = inter_model.predict( input )
    cnn_output = DataFrame(cnn_output)
    label_proba = xgb.predict_proba(cnn_output)
    label_list = []
    label = 0
    for j in range(label_proba.shape[0]):
        for k in range(10):
                label += label_proba[i][k]*k
        label_list.append( label )
    pred_data.append( np.array(label_list).reshape(-1,1) )

    data = pred_data[0]
    for i in range(1,5):
        data = np.concatenate((data,pred_data[i]),axis=1)
    return data

In [None]:
train_aug = augmentation(train_copy,3)
x = train_aug.iloc[:,2:].values.reshape(-1,28,28,1)
a = model_list[0].predict( x )
a.shape

In [None]:
# trian linear regression
train_aug = augmentation(train_copy,3)
x = train_aug.iloc[:,2:].values.reshape(-1,28,28,1)
data = data_for_reg_1( x )
data = np.divide(data,10)
data_val = train_aug['digit']
data_val = np.divide(data_val,10)

x_train, x_val, y_train, y_val = train_test_split(data, data_val, test_size = 0.1)
linear_model = LinearRegression()
linear_model.fit(x_train, y_train)
print('Coefficients: {}', format(linear_model.coef_[0]))
print("RSS: {}".format( np.mean((linear_model.predict(x_val) - y_val) ** 2) ))

In [None]:
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)
data_test = data_for_reg_1(x_test)

pred = linear_model.predict( data_test )
pred[:10]

In [None]:
def data_for_reg_clip(input, clip):
    pred_data = []
    for i in range(len(model_list)):
        label = model_list[i].predict( input )
        label[label<clip]=0
        pred_data.append( label )
    cnn_output = inter_model.predict( input )
    cnn_output = DataFrame(cnn_output)
    label = xgb.predict_proba(cnn_output)
    label[label<clip]=0
    pred_data.append( label )

    data = pred_data[0]
    for i in range(1,5):
        data = np.concatenate((data,pred_data[i]),axis=1)
    return data

In [None]:
t_size = 1000
x = np.divide(test_copy.iloc[:t_size,2:].values,255)
x = x.reshape(-1,28,28,1)
data = data_for_reg_clip( x, 0.001 )
data_val = test_pred_copy['digit'][:t_size]
data_val = to_categorical(data_val, 10)

x_train, x_val, y_train, y_val = train_test_split(data, data_val, test_size = 0.1)
linear_model = LinearRegression()
linear_model.fit(x_train, y_train)
print('Coefficients: {}', format(linear_model.coef_[0][:10]))
print("RSS: {}".format( np.mean((linear_model.predict(x_val) - y_val) ** 2) ))

In [None]:
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)
data_test = data_for_reg(x_test)

pred = linear_model.predict( data_test )
pred_test = []
for i in range(len(pred)):
    pred_test.append( np.argmax(pred[i]) )
pred_test[:10]

In [None]:
data = {'id':test_copy['id'], 'digit':pred_test}
submission = DataFrame(data)
file_name = 'submission_ensembles_linear_using_test_clip'
submission.to_csv('./drive/My Drive/DACON/submission/'+file_name+'.csv', index=False)

file_list = [ 'submission_84',
             'submission_85',
             'submission_86_xgb_ensemble',
             'submission_87_ensembles',
             'submission_87_ensembles_10+5_bn_linearreg',
             'submission_88_ensemble_2_2_4_try3',
             'submission_88_ensembles_10+1_bn_linearreg',
             'submission_88_ensembles_6+2_bn_linearreg_2',
             'submission_89_ensemble_2_2',
             # 'submission_89',
             'submission_90_ensembles_6+2_bn_08_retry',
             'submission_90_pretrain_using_test_layer_4_3ensemble',
             'submission_90_ensembles_6+2_bn_linearreg',
             'submission_90_ensembles_linear_using_test_1000',
             'submission_91_ensembles_3+1_w1',
             'submission_91_ensembles_6+2_bn_08'
             ]
pred_acc(file_name,file_list)

In [None]:
train_aug = augmentation(train_copy,0)
x = train_aug.iloc[:,2:].values.reshape(-1,28,28,1)
data = data_for_reg_clip( x,0.001 )
data_val = train_aug['digit']
data_val = to_categorical(data_val, 10)

x_train, x_val, y_train, y_val = train_test_split(data, data_val, test_size = 0.1)
linear_model = LinearRegression()
linear_model.fit(x_train, y_train)
print('Coefficients: {}', format(linear_model.coef_[0][:10]))
print("RSS: {}".format( np.mean((linear_model.predict(x_val) - y_val) ** 2) ))

In [None]:
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)
data_test = data_for_reg(x_test)

pred = linear_model.predict( data_test )
pred_test = []
for i in range(len(pred)):
    pred_test.append( np.argmax(pred[i]) )
pred_test[:10]

In [None]:
data = {'id':test_copy['id'], 'digit':pred_test}
submission = DataFrame(data)
file_name = 'submission_ensembles_linear_clip_aug0'
submission.to_csv('./drive/My Drive/DACON/submission/'+file_name+'.csv', index=False)

file_list = [ 'submission_84',
             'submission_85',
             'submission_86_xgb_ensemble',
             'submission_87_ensembles',
             'submission_87_ensembles_10+5_bn_linearreg',
             'submission_88_ensemble_2_2_4_try3',
             'submission_88_ensembles_10+1_bn_linearreg',
             'submission_88_ensembles_6+2_bn_linearreg_2',
             'submission_89_ensemble_2_2',
             # 'submission_89',
             'submission_90_ensembles_6+2_bn_08_retry',
             'submission_90_pretrain_using_test_layer_4_3ensemble',
             'submission_90_ensembles_6+2_bn_linearreg',
             'submission_90_ensembles_linear_using_test_1000',
             'submission_91_ensembles_3+1_w1',
             'submission_91_ensembles_6+2_bn_08'
             ]
pred_acc(file_name,file_list)

# Stacking model

In [None]:
def stack_model( input , N):
    stack = Sequential()
    stack.add(Input(shape=(50,)))
    stack.add(Dense(N, activation = "relu"))
    stack.add(BatchNormalization())
    # stack.add(Dropout(0.5))
    
    stack.add(Dense(10, activation = "softmax"))

    # fit model
    batch_size = 100
    epochs = 10

    data = data_for_reg( input.iloc[:,2:].values.reshape(-1,28,28,1) )
    data_val = input['digit']
    data_val = to_categorical(data_val, 10)

    x_train, x_val, y_train, y_val = train_test_split(data,data_val,test_size=0.1,random_state=15)

    optimizer = RMSprop(lr=0.01, rho=0.9, epsilon=1e-08, decay=0.0)
    stack.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

    hist = stack.fit(x_train, y_train, batch_size=batch_size, epochs = epochs, 
                validation_data = (x_val,y_val),
                steps_per_epoch=x_train.shape[0]// batch_size
                )
    stack.summary()

    return stack


In [None]:
train_aug = augmentation(train_copy,3)

In [None]:
stack = stack_model( train_aug, 256 )

In [None]:
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)
data_test = data_for_reg(x_test)

pred = stack.predict( data_test )
pred_test = []
for i in range(len(pred)):
    pred_test.append( np.argmax(pred[i]) )
pred_test[:10]

In [None]:
data = {'id':test_copy['id'], 'digit':pred_test}
submission = DataFrame(data)
file_name = 'submission_ensembles_stack_bn_do_2_256'
submission.to_csv('./drive/My Drive/DACON/submission/'+file_name+'.csv', index=False)

file_list = [ 'submission_84',
             'submission_85',
             'submission_86_xgb_ensemble',
             'submission_87_ensembles',
             'submission_87_ensembles_10+5_bn_linearreg',
             'submission_88_ensemble_2_2_4_try3',
             'submission_88_ensembles_10+1_bn_linearreg',
             'submission_88_ensembles_6+2_bn_linearreg_2',
             'submission_89_ensemble_2_2',
             # 'submission_89',
             'submission_90_ensembles_6+2_bn_08_retry',
             'submission_90_pretrain_using_test_layer_4_3ensemble',
             'submission_90_ensembles_6+2_bn_linearreg',
             'submission_90_ensembles_linear_using_test_1000',
             'submission_91_ensembles_3+1_w1',
             'submission_91_ensembles_6+2_bn_08'
             ]
pred_acc(file_name,file_list)

# Load more models

In [None]:
filepath = './drive/My Drive/DACON/saved_model/model_storage/'

cnn = load_model( filepath + 'cnn_0_0.8891.hdf5' )
cnn_1 = load_model( filepath + 'cnn_1_0.8975.hdf5' )
# cnn_2 = load_model( filepath + 'cnn_2_0.9100.hdf5' )
cnn_bn = load_model( filepath + 'cnn_bn_0.hdf5' )
cnn_bn_1 = load_model( filepath + 'cnn_bn_1.hdf5' )
depthwise = load_model( filepath + 'depthwise_0_0.8731.hdf5' )
# depthwise_2 = load_model( filepath + 'depthwise_2_0.8577.hdf5' )
# depthwise_1 = load_model( filepath + 'depthwise_1_0.8529.hdf5' )
pretrained = load_model( filepath + 'pretrained_by_letter_cnn_model_0_0.9038.hdf5' )
inter_model = load_model( filepath + 'inter_model.hdf5' )
xgb = joblib.load( filepath + 'xgb.dat' )
inter_model_1 = load_model( filepath + 'inter_model_1.hdf5' )
xgb_1 = joblib.load( filepath + 'xgb_1.dat' )

vgg_0 = load_model(filepath + 'vgg_0_0.9163.hdf5' )
vgg_1 = load_model(filepath + 'vgg_1_0.8906.hdf5' )
# vgg_2 = load_model(filepath + 'vgg_2_0.9053.hdf5' )

res_0 = load_model(filepath + 'ResNet_0_0.9066.hdf5' )
res_1 = load_model(filepath + 'ResNet_1_0.9261.hdf5' )
res_2 = load_model(filepath + 'ResNet_2_0.8996.hdf5' )

model_list = []
model_list.append(cnn)
model_list.append(cnn_1)
# model_list.append(cnn_2)
model_list.append(cnn_bn)
model_list.append(cnn_bn_1)
model_list.append(depthwise)
# model_list.append(depthwise_1)
# model_list.append(depthwise_2)
model_list.append(pretrained)
model_list.append(vgg_0)
model_list.append(vgg_1)
# model_list.append(vgg_2)
model_list.append(res_0)
model_list.append(res_1)
model_list.append(res_2)

In [None]:
inter_model_list = []
inter_model_list.append( inter_model )
inter_model_list.append( inter_model_1 )

xgb_list = []
xgb_list.append( xgb )
xgb_list.append( xgb_1 )

In [None]:
def data_for_reg_more(input):
    pred_data = []
    for i in range(len(model_list)):
        pred_data.append( model_list[i].predict( input ) )
        print(i)
    for j in range( len(inter_model_list) ):
        cnn_output = inter_model_list[j].predict( input )
        cnn_output = DataFrame(cnn_output)
        pred_data.append( xgb_list[j].predict_proba(cnn_output) )
        print(j)

    data = pred_data[0]
    for i in range(1,13):
        data = np.concatenate((data,pred_data[i]),axis=1)
    return data

In [None]:
def stack_model_more( input_imgs , input_digit, model_num, N):
    stack = Sequential()
    stack.add(Input(shape=(10*model_num,)))
    stack.add(Dense(N, activation = "relu"))
    stack.add(BatchNormalization())
    # stack.add(Dropout(0.5))

    stack.add(Dense(2*N, activation = "relu"))
    stack.add(BatchNormalization())
    
    stack.add(Dense(10, activation = "softmax"))

    # fit model
    batch_size = 500
    epochs = 100

    data = data_for_reg_more( input_imgs.values.reshape(-1,28,28,1) )
    data_val = input_digit
    data_val = to_categorical(data_val, 10)

    x_train, x_val, y_train, y_val = train_test_split(data,data_val,test_size=0.1,random_state=15)

    optimizer = RMSprop(lr=0.01, rho=0.9, epsilon=1e-08, decay=0.0)
    stack.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

    file_name = 'model_stack'
    model_path = set_filepath(file_name) + 'stack_{val_loss:.4f}.hdf5'

    # callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')
    mcp_save = ModelCheckpoint(filepath = model_path, save_best_only=True, monitor='val_loss', mode='min', verbose=1)

    hist = stack.fit(x_train, y_train, batch_size=batch_size, epochs = epochs, 
                validation_data = (x_val,y_val),
                steps_per_epoch=x_train.shape[0]// batch_size
                , callbacks = [early_stopping, mcp_save]
                )
    stack.summary()

    return stack


In [None]:
stack = stack_model_more( train_aug.iloc[:,2:], train_aug['digit'], 13, 64 )

In [None]:
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)
data_test = data_for_reg_more(x_test)

pred = stack.predict( data_test )
pred_test = []
for i in range(len(pred)):
    pred_test.append( np.argmax(pred[i]) )
pred_test[:10]

In [None]:
data = {'id':test_copy['id'], 'digit':pred_test}
submission = DataFrame(data)
file_name = 'submission_ensembles_stack_more+_using_test'
submission.to_csv('./drive/My Drive/DACON/submission/'+file_name+'.csv', index=False)

file_list = [ 'submission_84',
             'submission_85',
             'submission_86_xgb_ensemble',
             'submission_87_ensembles',
             'submission_87_ensembles_10+5_bn_linearreg',
             'submission_88_ensemble_2_2_4_try3',
             'submission_88_ensembles_10+1_bn_linearreg',
             'submission_88_ensembles_6+2_bn_linearreg_2',
             'submission_89_ensemble_2_2',
             'submission_89_ensembles_stack_more',
             # 'submission_89',
             'submission_90_ensembles_6+2_bn_08_retry',
             'submission_90_pretrain_using_test_layer_4_3ensemble',
             'submission_90_ensembles_6+2_bn_linearreg',
             'submission_90_ensembles_linear_using_test_1000',
             'submission_91_ensembles_3+1_w1',
             'submission_91_ensembles_6+2_bn_08'
             ]
pred_acc(file_name,file_list)

# Using predicted data

In [None]:
pred1 = pd.read_csv('./drive/My Drive/DACON/submission/submission_90_ensembles_stack_more+_using_test.csv').copy()
pred2 = pd.read_csv('./drive/My Drive/DACON/submission/submission_91_ensembles_6+2_bn_08.csv').copy()
pred3 = pd.read_csv('./drive/My Drive/DACON/submission/submission_91_ensembles_3+1_w1.csv').copy()

overlap_id1 = np.where([pred1['digit']==pred2['digit']][0]==True)[0]
overlap_id2 = np.where([pred1['digit']==pred3['digit']][0]==True)[0]  
overlap_id = np.array([id for id in overlap_id1 if id in overlap_id2])
print(len(overlap_id))

aug_data = pd.concat([pred1.iloc[overlap_id,0:2], test_copy.iloc[overlap_id,1:]], axis=1)

aug = augmentation(aug_data, 2)
input_test = aug.iloc[:,2:]
input_digit = aug['digit']

In [None]:
data_reg = data_for_reg_more( input_test.values.reshape(-1,28,28,1) )
data_val = input_digit
data_val = to_categorical(data_val, 10)

In [None]:
train_aug = augmentation(train_copy,2)
data_reg1 = data_for_reg_more( train_aug.iloc[:,2:].values.reshape(-1,28,28,1) )
data_val1 = train_aug['digit']
data_val1 = to_categorical(data_val1,10)
x = np.concatenate([data_reg, data_reg1], axis=0)
y = np.concatenate([data_val, data_val1], axis=0)
x_train, x_val, y_train, y_val = train_test_split(data_reg1, data_val1,test_size=0.1,random_state=24)

In [None]:
x_train, x_val, y_train, y_val = train_test_split(data_reg , data_val,test_size=0.1,random_state=24)

In [None]:
"""x_train = data
y_train = data_val

train_aug = augmentation(train_copy,3)
x_val = data_for_reg_more( train_aug.iloc[:,2:].values.reshape(-1,28,28,1) )
y_val = train_aug['digit']
y_val = to_categorical(y_val, 10)"""

In [None]:
model_num = 13

N = 256
stack = Sequential()
stack.add(Input(shape=(10*model_num,)))
stack.add(Dense(N, activation = "relu"))
# stack.add(BatchNormalization())
stack.add(Dense(2*N, activation = "relu"))
# stack.add(BatchNormalization())
stack.add(Dense(4*N, activation = "relu"))
# stack.add(BatchNormalization())
stack.add(Dense(8*N, activation = "relu"))
# stack.add(BatchNormalization())
# stack.add(Dense(16*N, activation = "relu"))
# stack.add(BatchNormalization())
stack.add(Dense(10, activation = "softmax"))

# fit model
batch_size = 200
epochs = 10
# optimizer = RMSprop(lr=0.01, rho=0.9, epsilon=1e-08, decay=0.0)
optimizer = Adam()
stack.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

file_name = 'model_stack'
model_path = set_filepath(file_name) + 'stack_{val_accuracy:.4f}.hdf5'

# callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')
mcp_save = ModelCheckpoint(filepath = model_path, save_best_only=True, monitor='val_loss', mode='min', verbose=1)

hist = stack.fit(x_train, y_train, batch_size=batch_size, epochs = epochs, 
            validation_data = (x_val,y_val),
            steps_per_epoch=x_train.shape[0]// batch_size
            # , callbacks = [early_stopping, mcp_save]
            )
# stack = load_best('model_stack')

In [None]:
x_test = np.divide(test_copy.iloc[:,2:].values,255)
x_test = x_test.reshape(-1,28,28,1)
data_test = data_for_reg_more(x_test)

pred = stack.predict( data_test )
pred_test = []
for i in range(len(pred)):
    pred_test.append( np.argmax(pred[i]) )
pred_test[:10]

In [None]:
data = {'id':test_copy['id'], 'digit':pred_test}
submission = DataFrame(data)
file_name = 'submission_ensembles_stack_more+res_13_256'
submission.to_csv('./drive/My Drive/DACON/submission/'+file_name+'.csv', index=False)

file_list = [ 'submission_84',
             'submission_85',
             'submission_86_xgb_ensemble',
             'submission_87_ensembles',
             'submission_87_ensembles_10+5_bn_linearreg',
             'submission_87_ensembles_stack_more+res_13_256',
             'submission_88_ensemble_2_2_4_try3',
             'submission_88_ensembles_10+1_bn_linearreg',
             'submission_88_ensembles_6+2_bn_linearreg_2',
             'submission_89_ensemble_2_2',
             'submission_89_ensembles_stack_more',
             'submission_89_ensembles_stack_more_using_test',
             'submission_89_ensembles_stack_more++_using_test_overlap_wobn_512',
             'submission_90_ensembles_6+2_bn_08_retry',
             'submission_90_pretrain_using_test_layer_4_3ensemble',
             'submission_90_ensembles_6+2_bn_linearreg',
             'submission_90_ensembles_linear_using_test_1000',
             'submission_90_ensembles_stack_more+_using_test',
             'submission_91_ensembles_3+1_w1',
             'submission_91_ensembles_6+2_bn_08',
             'submission_91_ensembles_stack_more++_using_test_overlap_909191_2048_aug',
             'submission_91_ensembles_stack_more++_using_test_overlap_909191_64_aug'
             ]
pred_acc(file_name,file_list)

In [None]:
pred_acc('submission_89_ensembles_stack_more++_using_test_overlap_wobn_512',file_list)

In [None]:
compare('submission_ensembles_stack_more+res_using_test_overlap_wobn_512', file_name)

# Train VGG

In [None]:
def get_vgg_model( init_dim ):
    model = Sequential()
    model.add(Conv2D(init_dim, (3, 3), activation='relu', padding="same", input_shape=(28, 28, 1)))
    model.add(Conv2D(init_dim, (3, 3), activation='relu', padding="same"))

    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(init_dim*2, (3, 3), activation='relu', padding="same"))
    model.add(Conv2D(init_dim*2, (3, 3), activation='relu', padding="same"))

    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(init_dim*4, (3, 3), activation='relu', padding="same"))
    model.add(Conv2D(init_dim*4, (3, 3), activation='relu', padding="same"))
    model.add(Conv2D(init_dim*4, (3, 3), activation='relu', padding="same"))

    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(10, activation='softmax'))

    return model

In [None]:
batch_size = 100
epochs = 100

model = get_vgg_model(64)
file_name = 'model_vgg'
model_path = set_filepath(file_name) + 'vgg' + '_{val_accuracy:.4f}.hdf5'

# callbacks
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, verbose=1, mode='max')
mcp_save = ModelCheckpoint(filepath = model_path, save_best_only=True, monitor='val_accuracy', mode='max', verbose=1)
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_delta=1e-4, mode='min')
# compile
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

# fit model
x_train, x_val, y_train, y_val = train_test_gen(train_copy, 2)

hist = model.fit(x_train, y_train, batch_size=batch_size, epochs = epochs, 
            validation_data = (x_val,y_val),
            steps_per_epoch=x_train.shape[0]// batch_size, 
            callbacks=[early_stopping,mcp_save,reduce_lr_loss])
model = load_best(file_name)


In [None]:
a = test_copy.iloc[5,2:].values.astype(np.int).reshape(28,28)
plt.imshow(a,cmap='gray')

# ResNet

In [None]:
def get_ResNet(learning_rate):
    
    # Remove the previous model.
    model = None
    
    # Input layer
    img_input = Input(shape = (28,28,1))
    
    # CNN
    # Identity mapping shortcut을 위한 conv_1 layer
    conv_1 = Conv2D(128, kernel_size = 3, padding = 'same', activation = 'relu')(img_input) 
    
    conv_2_1 = Conv2D(128, kernel_size = 3, padding = 'same', activation = 'relu')(conv_1)
    conv_2_1 = Conv2D(128, kernel_size = 3, padding = 'same')(conv_2_1)
    
    # ShortCut connection
    add_2_1 = add([conv_1, conv_2_1])
    out_2_1 = Activation('relu')(add_2_1)
    
    conv_2_2 = Conv2D(128, kernel_size = 3, padding = 'same', activation = 'relu')(out_2_1)
    conv_2_2 = Conv2D(128, kernel_size = 3, padding = 'same')(conv_2_2)
    
    # ShortCut connection
    add_2_2 = add([out_2_1, conv_2_2])
    out_2_2 = Activation('relu')(add_2_1)
    
    pool_2 = MaxPool2D((2,2), strides = 2)(out_2_2)
    
    conv_3_0 = Conv2D(256, kernel_size = 1, strides = 1)(pool_2)
    
    conv_3_1 = Conv2D(256, kernel_size = 3, padding = 'same', activation = 'relu')(conv_3_0)
    conv_3_1 = Conv2D(256, kernel_size = 3, padding = 'same')(conv_3_1)
    
    # ShortCut connection
    add_3_1 = add([conv_3_0, conv_3_1])
    out_3_1 = Activation('relu')(add_3_1)
    
    conv_3_2 = Conv2D(256, kernel_size = 3, padding = 'same', activation = 'relu')(out_3_1)
    conv_3_2 = Conv2D(256, kernel_size = 3, padding = 'same')(conv_3_2)
    
    # ShortCut connection
    add_3_2 = add([out_3_1, conv_3_2])
    out_3_2 = Activation('relu')(add_3_2)
    
    pool_3 = MaxPool2D((2,2), strides = 2)(out_3_2)
    
    conv_4_0 = Conv2D(256, kernel_size = 1, strides = 1)(pool_3)
    
    conv_4_1 = Conv2D(256, kernel_size = 3, padding = 'same', activation = 'relu')(conv_4_0)
    conv_4_1 = Conv2D(256, kernel_size = 3, padding = 'same')(conv_4_1)
    
    # ShortCut connection
    add_4_1 = add([conv_4_0, conv_4_1])
    out_4_1 = Activation('relu')(add_4_1)
    
    pool_4 = MaxPool2D((2,2), strides = 2)(out_4_1)
    
    # FC layers
    img_features = Flatten()(pool_4)
    img_features = Dense(512, activation = 'relu')(img_features)
    img_features = Dropout(rate = 0.5)(img_features)
    img_features = Dense(512, activation = 'relu')(img_features)
    img_features = Dropout(rate = 0.5)(img_features)
    
    # Output layer
    digit_pred = Dense(10, activation = 'softmax')(img_features)
    
    model = Model(inputs = img_input, outputs = digit_pred)
    
    model.compile(optimizer = Adam(lr = learning_rate),
                 loss = 'categorical_crossentropy',
                 metrics = ['accuracy'])
                    
    return model

In [None]:
batch_size = 100
epochs = 100

model = get_ResNet(0.0001)
file_name = 'model_ResNet'
model_path = set_filepath(file_name) + 'ResNet' + '_{val_accuracy:.4f}.hdf5'

# callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')
mcp_save = ModelCheckpoint(filepath = model_path, save_best_only=True, monitor='val_loss', mode='min', verbose=1)
# reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, min_delta=1e-4, mode='min')
"""# compile
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])
"""
# fit model
x_train, x_val, y_train, y_val = train_test_gen(train_copy, 3)

hist = model.fit(x_train, y_train, batch_size=batch_size, epochs = epochs, 
            validation_data = (x_val,y_val),
            steps_per_epoch=x_train.shape[0]// batch_size, 
            callbacks=[early_stopping,mcp_save])
model = load_best(file_name)
