In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, LSTM, concatenate, Dropout
from tensorflow.keras.utils import plot_model
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

In [7]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [8]:
x_train = train.drop(['id', 'digit', 'letter'], axis=1).values
x_train = x_train.reshape(-1, 28, 28, 1)
x_train = x_train/255

y_data = train['digit']
y_train = np.zeros((len(y_data), len(y_data.unique())))
for i, digit in enumerate(y_data):
    y_train[i, digit] = 1
print(x_train.shape)
print(y_train.shape)

(2048, 28, 28, 1)
(2048, 10)


In [9]:
x_letter = train['letter'].values
x_letter = x_letter[:, np.newaxis]
en = OneHotEncoder()
x_letter = en.fit_transform(x_letter).toarray()
x_letter.shape

(2048, 26)

In [30]:
def build_model(con=3, drop=0.2, optimizer='adam', padding='valid'):
    input1 = Input(shape=(28,28,1), name='input1')
    x1 = Conv2D(64, (con,con), activation='relu', padding=padding, name='conv1')(input1)
    x1 = Dropout(drop)(x1)
    x1 = MaxPooling2D((2,2), name='pool1')(x1)
    x1 = Conv2D(64, (2,2), activation='relu', padding=padding, name='conv2')(x1)
    x1 = Dropout(drop)(x1)
    x1 = MaxPooling2D((2,2), name='pool2')(x1)
    x1 = Conv2D(128, (2,2), activation='relu', padding=padding, name='conv3')(x1)
    x1 = Dropout(drop)(x1)
    x1 = MaxPooling2D((2,2), name='pool3')(x1)
    x1 = Flatten(name='flat1')(x1)
    
    input2 = Input(shape=(26,), name='input2')
    
    merge = concatenate([x1, input2])
    x2 = Dense(500, activation='relu', name='hidden1')(merge)
    x2 = Dropout(drop)(x2)
    x2 = Dense(100, activation='relu', name='hidden2')(x2)
    x2 = Dense(50, activation='relu', name='hidden3')(x2)
    outputs = Dense(10, activation='softmax', name='output')(x2)
    
    model = Model(inputs = [input1, input2], outputs = outputs)
    model.compile(optimizer = optimizer, metrics = ['accuracy'], 
                  loss = 'categorical_crossentropy')
    
    return model

In [31]:
model_struct = build_model(3, 0.2, 'adam', 'valid')
model_struct.summary()
#plot_model(model_struct, show_shapes=True, to_file='struct.png')

Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input1 (InputLayer)             [(None, 28, 28, 1)]  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 26, 26, 64)   640         input1[0][0]                     
__________________________________________________________________________________________________
dropout_21 (Dropout)            (None, 26, 26, 64)   0           conv1[0][0]                      
__________________________________________________________________________________________________
pool1 (MaxPooling2D)            (None, 13, 13, 64)   0           dropout_21[0][0]                 
____________________________________________________________________________________________

In [25]:
def hyperparameters():
    con = [2, 3, 4, 5]
    dropout = np.linspace(0.1, 0.5, 5).tolist()
    batches = [32, 64, 128, 256]
    optimizers = ['adam', 'adadelta', 'rmsprop']
    padding = ['valid', 'same']
    return {'con':con, 'drop':dropout, 'batch_size':batches, 
            'optimizer':optimizers, 'padding':padding}

hyperparameters = hyperparameters()

In [26]:
model = KerasClassifier(build_fn = build_model, verbose=1)
search = RandomizedSearchCV(model, hyperparameters, cv=9)

In [44]:
multiple_input = [x_train, x_letter]

In [45]:
best_model = search.fit(multiple_input, y_train, epochs=10)

ValueError: Found input variables with inconsistent numbers of samples: [2, 2048]

In [None]:
print(search.best_params_)