In [1]:
# import libraries
import numpy as np # linear algebra, matrix multiplications
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from math import sqrt
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split

from keras import backend as K
import datetime

# for the architecture
from keras.models import Sequential
from keras.layers import Dense, Dropout, Lambda, Flatten, BatchNormalization
from keras.layers import Conv2D, MaxPool2D, AvgPool2D

# optimizer, data generator and learning rate reductor
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

from sklearn.metrics import confusion_matrix
import itertools

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import keras_metrics

Using TensorFlow backend.


In [8]:
def df_reshape(df, dim):
    return df.values.reshape(-1, dim, dim, 1)

def load_and_prepare_data(filename):
    data = pd.read_csv(filename, index_col=[0]).sample(frac=1).reset_index(drop=True)
    labels = data.pop('label').values

    dim = int(sqrt(data.shape[1]))

    # Normalize the data
    data = data / 255.0
    data = data.fillna(1.0)
    data = df_reshape(data, dim) # numpy.ndarray type

    # number of classes, in this case 2
    nclasses = labels.max() - labels.min() + 1
    labels = to_categorical(labels, num_classes = nclasses)

    # fix random seed for reproducibility
    seed = 2
    np.random.seed(seed)

    # percentage of xtrain which will be xtest
    split_pct = 0.2

    # Split the train and the validation set
    X_train, X_test, Y_train, Y_test = train_test_split(
        data, 
        labels, 
        test_size=split_pct,
        random_state=seed,
        shuffle=True,
        stratify=labels
    )

    return X_train, X_test, Y_train, Y_test

def build_model():
    model = Sequential()

    dim = 28
    nclasses = 2

    model.add(Conv2D(filters=32, kernel_size=(5,5), padding='same', activation='relu', input_shape=(dim,dim,1)))
    model.add(Conv2D(filters=32, kernel_size=(5,5), padding='same', activation='relu',))
    model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(0.2))

    model.add(Flatten())
    model.add(Dense(120, activation='relu'))
    model.add(Dense(84, activation='relu'))
    model.add(Dense(nclasses, activation='softmax'))
    
    return model

def train_model(model, epochs, batch_size):
    
    lr_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                 patience=3, 
                                 verbose=1, 
                                 factor=0.5, 
                                 min_lr=0.00001)
    
    datagen = ImageDataGenerator(
          featurewise_center=False,            # set input mean to 0 over the dataset
          samplewise_center=False,             # set each sample mean to 0
          featurewise_std_normalization=False, # divide inputs by std of the dataset
          samplewise_std_normalization=False,  # divide each input by its std
          zca_whitening=False,                 # apply ZCA whitening
          rotation_range=0,                   # randomly rotate images in the range (degrees, 0 to 180)
          zoom_range =0,                    # Randomly zoom image 
          width_shift_range=0,               # randomly shift images horizontally (fraction of total width)
          height_shift_range=0,              # randomly shift images vertically (fraction of total height)
          horizontal_flip=False,               # randomly flip images
          vertical_flip=False)                 # randomly flip images

    datagen.fit(X_train)

    model.compile(optimizer="adam", loss="mean_squared_error", metrics=["accuracy", 
                                                                        keras_metrics.precision(), 
                                                                        keras_metrics.recall(),
                                                                        keras_metrics.f1_score()])
    epochs = epochs
    batch_size = batch_size

    print('Start')
    print(datetime.datetime.now())
    history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
                              epochs=epochs, 
                              validation_data=(X_test, Y_test),
                              verbose=1, 
                              steps_per_epoch=X_train.shape[0] // batch_size, 
                              callbacks=[lr_reduction])

    print('End')
    print(datetime.datetime.now())
    
    return model

In [5]:
X_train, X_test, Y_train, Y_test = load_and_prepare_data('./obrazy_concat_3000.csv')

In [6]:
model = build_model()
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        832       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 32)        25632     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 14, 14, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 6272)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 120)               752760    
_________________________________________________________________
dense_2 (Dense)              (None, 84)                10164     
__________

In [9]:
model = train_model(model, 15, 64)

Start
2019-05-13 13:12:33.389354
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
End
2019-05-13 13:23:30.759821


In [10]:
model.save("nn_3000")

In [11]:
X_train, X_test, Y_train, Y_test = load_and_prepare_data('./obrazy_concat_2000.csv')
model = build_model()
model = train_model(model, 15, 64)
model.save("nn_2000")

Start
2019-05-13 13:25:47.759662
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
End
2019-05-13 13:34:08.060877


In [12]:
X_train, X_test, Y_train, Y_test = load_and_prepare_data('./obrazy_concat_1000.csv')
model = build_model()
model = train_model(model, 15, 64)
model.save("nn_1000")

Start
2019-05-13 13:36:49.317117
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 12/15
Epoch 13/15
Epoch 14/15

Epoch 00014: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 15/15
End
2019-05-13 13:40:21.914506


In [13]:
X_train, X_test, Y_train, Y_test = load_and_prepare_data('./obrazy_concat_500.csv')
model = build_model()
model = train_model(model, 15, 64)
model.save("nn_500")

Start
2019-05-13 13:41:34.309567
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
End
2019-05-13 13:43:18.073170


In [14]:
X_train, X_test, Y_train, Y_test = load_and_prepare_data('./obrazy_concat_100.csv')
model = build_model()
model = train_model(model, 15, 64)
model.save("nn_100")

Start
2019-05-13 13:44:08.743530
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15

Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 13/15
Epoch 14/15
Epoch 15/15

Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
End
2019-05-13 13:44:26.265625
