### CNN
* http://andrew.gibiansky.com/blog/machine-learning/convolutional-neural-networks/
* http://www.wildml.com/2015/11/understanding-convolutional-neural-networks-for-nlp/
* https://medium.com/@ageitgey/machine-learning-is-fun-part-3-deep-learning-and-convolutional-neural-networks-f40359318721
* https://algotravelling.com/ru/%D0%BC%D0%B0%D1%88%D0%B8%D0%BD%D0%BD%D0%BE%D0%B5-%D0%BE%D0%B1%D1%83%D1%87%D0%B5%D0%BD%D0%B8%D0%B5-%D1%8D%D1%82%D0%BE-%D0%B2%D0%B5%D1%81%D0%B5%D0%BB%D0%BE-3/
* https://adeshpande3.github.io/adeshpande3.github.io/A-Beginner's-Guide-To-Understanding-Convolutional-Neural-Networks-Part-2/
* https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b
* https://www.asozykin.ru/courses/nnpython
* https://www.tensorflow.org/tutorials/deep_cnn#convolutional-neural-networks

In [1]:
from scipy import ndimage
import numpy as np
import pandas as pd
import os
from IPython.display import display, Image
from scipy import misc
import matplotlib.pyplot as plt
import tensorflow
import seaborn

from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator

%matplotlib inline

Using TensorFlow backend.


### Load train transformed

In [2]:
train_df = pd.read_csv('data/data_analys.csv', index_col=0)

In [24]:
%%time

dir_train_transformed = 'data/train_transformed'

img_shape = (64, 64)
channels = 3
img_shape_flattened = img_shape[0] * img_shape[1] * channels
img_qty = train_df.shape[0]

# initialize X,y
X_train = np.empty(shape=(img_qty, img_shape_flattened), dtype=np.int8)
y_train = np.empty(shape=(img_qty,), dtype=np.uint16)

# read images
for i,f_name in enumerate(os.listdir(dir_train_transformed)):
    if i % 5000 == 0:
        print('{} images loaded'.format(i))
    
    img_path = os.path.join(dir_train_transformed, f_name)
    X_train[i, :] = misc.imread(img_path).flatten('C') # since img is np.ndarray, flatten in row-style
    y_train[i] = train_df.loc[train_df['image_name'] == f_name, 'target'].iloc[0]

0 images loaded
5000 images loaded
10000 images loaded
15000 images loaded
20000 images loaded
CPU times: user 1min 15s, sys: 3.92 s, total: 1min 18s
Wall time: 1min 31s


In [25]:
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size = 0.3, stratify=y_train)

### Load test transformed

In [5]:
%%time

dir_test_transformed = 'data/test_transformed'

X_val = np.empty(shape=(img_qty, img_shape_flattened), dtype=np.int8)

# read images
for i,f_name in enumerate(os.listdir(dir_test_transformed)):
    if i % 2000 == 0:
        print('{} images loaded'.format(i))
    
    img_path = os.path.join(dir_test_transformed, f_name)
    X_val[i, :] = misc.imread(img_path).flatten('C') # since img is np.ndarray, flatten in row-style

0 images loaded
2000 images loaded
4000 images loaded
6000 images loaded
CPU times: user 3.41 s, sys: 834 ms, total: 4.24 s
Wall time: 5.07 s


### Keras

In [36]:
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        channel_shift_range=0.6)

# rotate, horizontal_flip, shift

img = misc.imread('data/test/6.jpg')  # this is a PIL image
x = img.reshape((1,) + img.shape)  # this is a Numpy array with shape (1, 3, 150, 150)

# the .flow() command below generates batches of randomly transformed images
# and saves the results to the `preview/` directory
i = 0
for batch in datagen.flow(x, batch_size=1,
                          save_to_dir='data/preview', save_prefix='cat', save_format='jpeg'):
    i += 1
    if i > 20:
        break  # otherwise the generator would loop indefinitely

In [7]:
dir_train_transformed = 'data/train_transformed'
dir_test_transformed = 'data/test_transformed'
img_width = 32
img_height = 32
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16
# faced with the issue https://stackoverflow.com/a/39851572/5151861
#                      https://github.com/fchollet/keras/issues/3945#issuecomment-274321680

model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(img_width, img_height, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering='tf'))

model.add(Conv2D(32, (3, 3), dim_ordering='tf'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(257))
model.add(Activation('softmax'))

  from ipykernel import kernelapp as app


In [8]:
#sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [9]:
train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1. / 255)

In [10]:
dir_train = 'data/train'
dir_test = 'data/test'

train_generator = train_datagen.flow_from_directory(
    dir_train,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    dir_test,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size)

model.save('first_try.h5')

Found 22897 images belonging to 257 classes.
Found 0 images belonging to 0 classes.
Epoch 1/50

Exception in thread Thread-19:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/engine/training.py", line 612, in data_generator_task
    generator_output = next(self._generator)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/preprocessing/image.py", line 732, in __next__
    return self.next(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/preprocessing/image.py", line 1021, in next
    index_array, current_index, current_batch_size = next(self.index_generator)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python

ValueError: output of generator should be a tuple (x, y, sample_weight) or (x, y). Found: None

In [27]:
model = Sequential()

model.add(Dense(units=64, input_shape=(16027, 12288)))
model.add(Activation('relu'))
model.add(Dense(units=10))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

#X_train, X_test, y_train, y_test
model.fit(X_train, y_train, epochs=5, batch_size=32)

ValueError: Error when checking input: expected dense_9_input to have 3 dimensions, but got array with shape (16027, 12288)