In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Activation
from tensorflow.keras.layers import concatenate, Dropout, AveragePooling2D, GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# seed
import os
seed = 123
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [16]:
train_val = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

train = train_val[:-48]
print(train.shape)

valid = train_val[-48:]
print(valid.shape)

(2000, 787)
(48, 787)


In [17]:
image_generator = ImageDataGenerator(width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     rotation_range=0.2,
                                     zoom_range=[0.8,1.2], 
                                     shear_range=10)

SyntaxError: invalid syntax (<ipython-input-17-fecd92899a8a>, line 4)

In [18]:
x1 = train.drop(['id', 'digit', 'letter'], axis=1).values
x1 = x1.reshape(-1, 28, 28, 1)
x1 = x1/255
x1_total = x1.copy()

def augment(x):
    aug_list = []
    for i in range(x1.shape[0]):
        num_aug = 0
        tmp = x1[i]
        tmp = tmp.reshape((1,) + tmp.shape)
        for x_aug in image_generator.flow(tmp, batch_size = 1) :
            if num_aug >= 1:
                break
            aug_list.append(x_aug[0])
            num_aug += 1
    aug_list = np.array(aug_list)
    return aug_list

n = 1
for i in range(n):
    arr = augment(x1)
    x1_total = np.concatenate((x1_total, arr), axis=0)
    if i > n:
        break

print(x1_total.shape)

(4000, 28, 28, 1)


In [19]:
y1_data = train['digit']
y1 = np.zeros((len(y1_data), len(y1_data.unique())))
for i, digit in enumerate(y1_data):
    y1[i, digit] = 1

y1_total = y1.copy()
for i in range(n):
    arr = y1.copy()
    y1_total = np.concatenate((y1_total, arr), axis=0)

print(y1_total.shape)

(4000, 10)


In [20]:
x1_let = train['letter'].values
x1_let = x1_let[:, np.newaxis]
en = OneHotEncoder()
x1_let = en.fit_transform(x1_let).toarray()

x1_letter_total = x1_let.copy()
for i in range(n):
    arr = x1_let.copy()
    x1_letter_total = np.concatenate((x1_letter_total, arr), axis=0)

print(x1_letter_total.shape)

(4000, 26)


In [21]:
x1_train, x1_val, y1_train, y1_val = train_test_split(x1_total, y1_total, test_size=0.2, shuffle=True, stratify=y1_total)

print(x1_train.shape)
print(x1_val.shape)
print(y1_train.shape)
print(y1_val.shape)

x1_letter_train = x1_letter_total[:x1_train.shape[0],:]
x1_letter_val = x1_letter_total[x1_train.shape[0]:,:]
print(x1_letter_train.shape)
print(x1_letter_val.shape)

(3200, 28, 28, 1)
(800, 28, 28, 1)
(3200, 10)
(800, 10)
(3200, 26)
(800, 26)


In [22]:
def Conv_block(x, growth_rate, activation='relu'):
    x_l = BatchNormalization()(x)
    x_l = Activation(activation)(x_l)
    x_l = Conv2D(growth_rate*4, (1,1), padding='same', kernel_initializer='he_normal')(x_l)
    
    x_l = BatchNormalization()(x_l)
    x_l = Activation(activation)(x_l)
    x_l = Conv2D(growth_rate, (3,3), padding='same', kernel_initializer='he_normal')(x_l)
    
    x = concatenate([x, x_l])
    return x

def Dense_block(x, layers, growth_rate=32):
    for i in range(layers):
        x = Conv_block(x, growth_rate)
    return x

def Transition_layer(x, compression_factor=0.5, activation='relu'):
    reduced_filters = int(tf.keras.backend.int_shape(x)[-1] * compression_factor)
    
    x = BatchNormalization()(x)
    x = Activation(activation)(x)
    x = Conv2D(reduced_filters, (1,1), padding='same', kernel_initializer='he_normal')(x)
    
    x = AveragePooling2D((2,2), padding='same', strides=2)(x)
    return x

def DenseNet(model_input, classes, densenet_type='DenseNet-121'):
    x = Conv2D(base_growth_rate*2, (7,7), padding='same', strides=2,
               kernel_initializer='he_normal')(model_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = MaxPooling2D((3,3), padding='same', strides=2)(x)
    
    x = Dense_block(x, layers_in_block[densenet_type][0], base_growth_rate)
    x = Transition_layer(x, compression_factor=0.5)
    x = Dense_block(x, layers_in_block[densenet_type][1], base_growth_rate)
    x = Transition_layer(x, compression_factor=0.5)
    x = Dense_block(x, layers_in_block[densenet_type][2], base_growth_rate)
    #x = Transition_layer(x, compression_factor=0.5)
    #x = Dense_block(x, layers_in_block[densenet_type][3], base_growth_rate)
    
    x = GlobalAveragePooling2D()(x)
    
    model_output = Dense(classes, activation='softmax', kernel_initializer='he_normal')(x)
    
    model = Model(model_input, model_output, name=densenet_type)
    
    return model

In [23]:
def DenseNet_letter(model_input, letter, classes, densenet_type='DenseNet-121'):
    x = Conv2D(base_growth_rate*2, (5,5), padding='same', strides=1,
               kernel_initializer='he_normal')(model_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = MaxPooling2D((2,2), padding='same', strides=2)(x)
    
    x = Dense_block(x, layers_in_block[densenet_type][0], base_growth_rate)
    x = Transition_layer(x, compression_factor=0.5)
    x = Dense_block(x, layers_in_block[densenet_type][1], base_growth_rate)
    x = Transition_layer(x, compression_factor=0.5)
    x = Dense_block(x, layers_in_block[densenet_type][2], base_growth_rate)
    #x = Transition_layer(x, compression_factor=0.5)
    #x = Dense_block(x, layers_in_block[densenet_type][3], base_growth_rate)
    
    x = GlobalAveragePooling2D()(x)
    
    merge = concatenate([x, letter])
    x1 = Dense(500, activation='relu')(merge)
    x1 = Dropout(0.3)(x1)
    
    model_output = Dense(classes, activation='softmax', kernel_initializer='he_normal')(x1)
    
    model = Model(inputs = [model_input, letter], outputs = model_output, name=densenet_type)
    
    return model

In [24]:
def DenseNet_CNN(model_input1, model_input2, classes, densenet_type='DenseNet-121'):
    x = Conv2D(base_growth_rate*2, (7,7), padding='same', strides=2,
               kernel_initializer='he_normal')(model_input1)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = MaxPooling2D((3,3), padding='same', strides=2)(x)
    
    x = Dense_block(x, layers_in_block[densenet_type][0], base_growth_rate)
    x = Transition_layer(x, compression_factor=0.5)
    x = Dense_block(x, layers_in_block[densenet_type][1], base_growth_rate)
    #x = Transition_layer(x, compression_factor=0.5)
    #x = Dense_block(x, layers_in_block[densenet_type][2], base_growth_rate)
    #x = Transition_layer(x, compression_factor=0.5)
    #x = Dense_block(x, layers_in_block[densenet_type][3], base_growth_rate)
    
    x = GlobalAveragePooling2D()(x)
    
    x1 = Conv2D(64, (2,2), activation='relu', padding='same')(model_input2)
    x1 = Dropout(0.3)(x1)
    x1 = MaxPooling2D((2,2))(x1)
    x1 = Conv2D(64, (2,2), activation='relu', padding='same')(x1)
    x1 = Dropout(0.3)(x1)
    x1 = MaxPooling2D((2,2))(x1)
    x1 = Conv2D(128, (2,2), activation='relu', padding='same')(x1)
    x1 = Dropout(0.3)(x1)
    x1 = MaxPooling2D((2,2))(x1)
    x1 = Flatten()(x1)
      
    x2 = Dense(500, activation='relu')(x1)
        
    merge = concatenate([x, x2])
    x1 = Dense(500, activation='relu')(merge)
    x1 = Dropout(0.3)(x1)
    
    model_output = Dense(classes, activation='softmax', kernel_initializer='he_normal')(x1)
    
    model = Model(inputs = [model_input1, model_input2], outputs = model_output, name=densenet_type)
    
    return model

In [25]:
layers_in_block = {'DenseNet-121':[6, 12, 24, 16],
                   'DenseNet-169':[6, 12, 32, 32],
                   'DenseNet-201':[6, 12, 48, 32],
                   'DenseNet-265':[6, 12, 64, 48]}

base_growth_rate = 32

model_input1 = Input(shape=(28,28,1))
#model_input2 = Input(shape=(28,28,1))
letter = Input(shape=(26,))
classes = 10

model = DenseNet_letter(model_input1, letter, classes, 'DenseNet-121')

model.summary()

Model: "DenseNet-121"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 28, 28, 1)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 28, 28, 64)   1664        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 28, 28, 64)   256         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 28, 28, 64)   0           batch_normalization[0][0]        
_______________________________________________________________________________________

In [None]:
model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics=['accuracy']) 
              #loss_weights = [0.8, 0.2])

#es = EarlyStopping(monitor = 'val_loss', mode='min', patience=10)
cp = ModelCheckpoint('./models/{epoch:02d}-{val_accuracy:.4f}.h5', monitor='val_loss',
                     save_best_only=True, mode='min')

history = model.fit([x1_train, x1_letter_train], y1_train, validation_data=([x1_val, x1_letter_val], y1_val),
                    batch_size=64, epochs=50, callbacks = [cp])

Train on 3200 samples, validate on 800 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50

In [12]:
from tensorflow.keras.models import load_model
best_model = load_model('./models/mydensenet.h5')

In [13]:
x1_test = valid.drop(['id', 'digit', 'letter'], axis=1).values
x1_test = x1_test.reshape(-1, 28, 28, 1)
x1_test = x1_test/255

#x2_test = test.drop(['id', 'letter'], axis=1).values
#x2_test = x2_test.reshape(-1, 28, 28, 1)
#x2_test = x2_test/255

x1_letter_test = train_val['letter'].values
x1_letter_test = x1_letter_test[:, np.newaxis]
en = OneHotEncoder()
x1_letter_test = en.fit_transform(x1_letter_test).toarray()
x1_letter_test = x1_letter_test[-48:]

#x2_letter_test = x1_letter_test.copy()

y1_test = best_model.predict([x1_test, x1_letter_test])
y_1 = np.argmax(y1_test, axis=1)
print(y_1)

[4 2 9 8 6 2 3 4 7 8 9 9 1 2 5 1 2 1 1 0 8 5 7 7 9 7 5 9 6 8 4 2 6 0 7 8 4
 6 4 3 2 2 6 6 1 9 0 5]


In [14]:
submission = pd.read_csv('data/val.csv')
submission['pred'] = y_1
submission.to_csv('val_0820.csv', index=False)