In [12]:
import pandas as pd
import numpy as np
import tensorflow as tf

from keras.callbacks import TensorBoard
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
# from processing import load_data, normalize_inputs

train_path = './data/train.csv'
test_path = './data/test.csv'

In [9]:
class get_data:
    def __init__(self, path, is_train=True):
        self.df = pd.read_csv(path)
        if is_train:
            self.get_ts_vs()
        
    def get_ts_vs(self):
        # split df: feature & label
        df_y = self.df['label']
        df_x = self.df.drop('label', axis=1)
        # split df: training set & validation set
        self.ts_f, self.vs_f, self.ts_l, self.vs_l = \
        train_test_split(df_x, df_y, test_size=0.1, random_state=0)
        # normalize
        self.ts_f, self.ts_l = self.normalize_inputs(self.ts_f, self.ts_l)
        self.vs_f, self.vs_l = self.normalize_inputs(self.vs_f, self.vs_l)
        
    def normalize_inputs(self, x, y):
        x = tf.cast(x / 255, tf.float32)
        y = to_categorical(y, 10)
        y = tf.cast(y, tf.int32)
        return x, y

In [28]:
class create_model:
    def __init__(
        self,
        save_model=True,
        save_model_path='./models/keras_functional/default',
        epochs=10
    ):
        self.epochs = epochs
        self.save_model = save_model
        self.save_model_path = save_model_path
        
        self.inputs = Input(shape=(784,))
        self.build_nn()
        
        self.model = Model(
            inputs=self.inputs,
            outputs=self.pred
        )
        # compile model
        self.model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )
        
    def build_nn(self):
        h1 = Dense(512, activation='relu')(self.inputs)
        h2 = Dense(512, activation='relu')(h1)
        self.pred = Dense(10, activation='softmax')(h2)
        
    def train(
        self,
        ts_f,
        ts_l,
        vs_f,
        vs_l
    ):
        self.create_cb()
        self.history = self.model.fit(
            ts_f,
            ts_l,
            validation_data=(vs_f, vs_l),
            epochs=self.epochs,
            callbacks=self.cbs,
            batch_size= None,
            steps_per_epoch=10,
            validation_steps=10
        )
        
    def create_cb(self):
        self.cbs = []
        if self.save_model:
            tensorboard = TensorBoard(log_dir=self.save_model_path)
            self.cbs.append(tensorboard)
    
    def predict(self, test_df):
        pred = pd.DataFrame(self.model.predict(test_df, steps=1))
        pred = pd.DataFrame(pred.idxmax(axis=1))
        pred.index.name = 'ImageId'
        pred = pred.rename(columns = {0: 'Label'}).reset_index()
        pred['ImageId'] = pred['ImageId'] + 1
        self.result = pred
        
    def save_result(self, path='./predictions/keras_functional/submission.csv'):
        self.result.to_csv(path, index = False)

In [29]:
train = get_data(train_path)
train.df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
model = create_model(save_model=False)

In [31]:
model.train(
    train.ts_f,
    train.ts_l,
    train.vs_f,
    train.vs_l
)

Train on 37800 samples, validate on 4200 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [32]:
test = get_data(test_path, is_train=False)
test.df.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
model.predict(test.df)

In [37]:
model.save_result('./result/functional/functional_predict.csv')