In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD
from keras.utils import np_utils, generic_utils
from keras.preprocessing.image import ImageDataGenerator
from six.moves import range

In [1]:
import numpy as np
import pandas as pd
import os
from skimage import transform
from skimage import io

In [2]:
labels = pd.read_csv('../input/trainLabels.csv')

In [84]:
def split_index(row):
    # split row to 6 processes for parallel processing
    res = []
    chunk = row/6
    for i in range(1, 6):
        res.append([(i-1)*chunk, i*chunk])
    res.append([5*chunk, row])
    return res

In [88]:
parts = split_index(labels.shape[0])

In [98]:
x = labels.level.value_counts()

In [4]:
n_train = labels.shape[0] * 0.2

In [5]:
aug_dict = {}

In [6]:
for i in x.index:
    if i != 0:
        aug_dict[i] = int(round(n_train /x.ix[i]))

In [8]:
# Replicate class 1,2,3,4 to balance sample
import multiprocessing

In [87]:
def helper(part):
    res = {'image':[], 'level': []}
    for i in range(part[0], part[1]):
        if labels.level[i] != 0:
            lev = labels.level[i]
            for j in range(aug_dict[lev]):
                res['image'].append(labels.image[i] + str(j))
                res['level'].append(lev)
    return res
            

In [90]:
pool = multiprocessing.Pool()

In [91]:
jobs = [pool.apply(helper, args=(x,)) for x in parts]

In [92]:
len(jobs)

6

In [94]:
temp = jobs.pop()

In [95]:
for it in jobs:
    for key, values in it.items():
        temp[key].extend(values)

In [111]:
folder = os.listdir('../input/')

In [112]:
folder[:5]

['.DS_Store',
 '10000_left.jpeg',
 '10000_right.jpeg',
 '10001_left.jpeg',
 '10001_right.jpeg']

In [114]:
cur = io.imread('../input/' + folder[3])

In [120]:
len(cur.shape)

3

In [116]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy


In [None]:
labels = pd.read_csv('trainLabels.csv')
folder = os.listdir('../input/processed/run-normal/train')
for x in folder:
    cur = io.imread('../input/train/'+ x)
    #cur = transform.resize(cur, (128, 128))
    cur = np.swapaxes(cur, 0, 2)
    X_train[i] = cur
    y_train[i] = int(labels.loc[labels.image == x[:-5],'level'])

In [None]:
X_train = np.load("sample_train.npy")
Y_train = np.load('sample_y.npy')

nb_classes = 5
nb_epoch = 1
batch_size = 32

#y_train = y_train.reshape([1600,1])
#Y_train = np_utils.to_categorical(y_train, nb_classes)

model = Sequential()

model.add(Convolution2D(32, 3, 3, 3, border_mode='full')) 
model.add(Activation('relu'))
model.add(Convolution2D(32, 32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 32, 3, 3, border_mode='full')) 
model.add(Activation('relu'))
model.add(Convolution2D(64, 64, 3, 3)) 
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 64, 3, 3, border_mode='full')) 
model.add(Activation('relu'))
model.add(Convolution2D(64, 64, 3, 3)) 
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64*32*32, 512))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(512, nb_classes))
model.add(Activation('softmax'))

# let's train the model using SGD + momentum (how original).
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)


datagen = ImageDataGenerator(
    featurewise_center=True, # set input mean to 0 over the dataset
    samplewise_center=False, # set each sample mean to 0
    featurewise_std_normalization=True, # divide inputs by std of the dataset
    samplewise_std_normalization=False, # divide each input by its std
    zca_whitening=False, # apply ZCA whitening
    rotation_range=20, # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=0.2, # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.2, # randomly shift images vertically (fraction of total height)
    horizontal_flip=True, # randomly flip images
    vertical_flip=False) # randomly flip images

X_train = X_train.astype("float32")
datagen.fit(X_train)
for e in range(nb_epoch):
        print('-'*40)
        print('Epoch', e)
        print('-'*40)
        print("Training...")
        # batch train with realtime data augmentation
        progbar = generic_utils.Progbar(X_train.shape[0])
        for X_batch, Y_batch in datagen.flow(X_train, Y_train):
            loss = model.train_on_batch(X_batch, Y_batch)
            progbar.add(X_batch.shape[0], values=[("train loss", loss)])

#X_train = X_train.astype("float32")
#model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch)