In [1]:
import csv
import numpy as np
import pandas as pd

import cv2

import keras
from keras import regularizers
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Embedding
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [2]:
from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())

import os
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
# remove prefixes
import os
path = '/media/stanislau/82db778e-0496-450c-9b25-d1e50a90e476/data/data4stas/01_data_cls'
os.chdir(path)

In [None]:
tbCallBack = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)

In [4]:
def process_file(path):
    df = pd.read_csv(path)
    
    x = []
    y = []

    for index, row in df.iterrows():
        x.append(cv2.imread(row['FilePath'], cv2.IMREAD_GRAYSCALE))
        y.append(float(row['ClassId']))

    x = np.array(x)
    y = np.array(y)
    
    num_classes = len(np.unique(y))

    x = x.reshape(x.shape + (-1,))
    y = y - 1
    y = keras.utils.to_categorical(y, num_classes=num_classes)

    return x, y, num_classes
            

def process_line(line):
    pass

def generate_arrays_from_file(path):
    df = pd.read_csv(path)
    
    while 1:
        f = open(path)
        for line in f:
            # create numpy arrays of input data
            # and labels, from each line in the file
            x1, x2, y = process_line(line)
            yield ({'input_1': x1, 'input_2': x2}, {'output': y})
        f.close()

In [5]:
x, y, num_classes = process_file('Alexander_cls_train.csv')

In [None]:
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    zoom_range=0.2,
    fill_mode='reflect')

In [None]:
datagen.fit(x)

In [None]:
%pylab inline

In [None]:
model = Sequential()
# input: 200x200 grayscale images -> (200, 200, 1) tensors.
# this applies 8 convolution filters of size 3x3 each.
model.add(Conv2D(32, (3, 3), kernel_regularizer=regularizers.l2(0.01), activation='relu', input_shape=(200, 200, 1)))
model.add(Conv2D(32, (3, 3), kernel_regularizer=regularizers.l2(0.01), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(0.01), activation='relu'))
model.add(Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(0.01), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), kernel_regularizer=regularizers.l2(0.01), activation='relu'))
model.add(Conv2D(128, (3, 3), kernel_regularizer=regularizers.l2(0.01), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

# model.add(Conv2D(64, (2, 2), kernel_regularizer=regularizers.l2(0.01), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

# model.add(Conv2D(128, (2, 2), kernel_regularizer=regularizers.l2(0.01), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

# model.add(Conv2D(256, (3, 3), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))

# model.add(Conv2D(512, (2, 2), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
# model.add(Dense(1024, kernel_regularizer=regularizers.l2(0.01), activation='relu'))
# model.add(Dropout(0.5))
model.add(Dense(num_classes, kernel_regularizer=regularizers.l2(0.01), activation='softmax'), )
model.add(Dropout(0.5))

# sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# model.fit(x, y, batch_size=8, epochs=10, validation_data=(x_train, y_train))
model.fit(x, y, batch_size=32, epochs=10)

In [None]:
x_test, y_test, num_classes = process_file('Alexander_cls_train.csv')

In [None]:
batch_size = 128
model.fit_generator(datagen.flow(x, y, batch_size=batch_size),
                    steps_per_epoch=len(x) / batch_size * 3, epochs=100, use_multiprocessing=True, workers=20,
                   validation_data=(x_test, y_test), verbose = 2)

In [None]:
test_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True
)

test_datagen.fit(x)

test_paths = ['Alexander_cls_train.csv',
             'Alexander_cls_test.csv',
             'Alexander_cls_test1.csv',
             'Alexander_cls_test2.csv',
             'Alexander_cls_test3.csv',
             'Alexander_cls_test4.csv']

batch_size = 32

for test_path in test_paths:
    x_test, y_test, num_classes = process_file(test_path)
    score = model.evaluate_generator(test_datagen.flow(x_test, y_test, batch_size=batch_size), steps=len(x_test) / batch_size)
    print(test_path + ' : ' + ', '.join(str(x) for x in score))

In [None]:
model.evaluate(x, y, batch_size=32)

In [None]:
y_predict = model.predict(x_test, batch_size=32)

In [None]:
y_predict.shape

In [None]:
np.argmax(y_test, axis=1).shape

In [None]:
np.sum(np.argmax(y_predict, axis=1) == np.argmax(y_test, axis=1))/y_predict.shape[0]

In [None]:
np.unique(np.argmax(y_predict, axis=1))

In [None]:
model.summary()

In [None]:
# спросить про 500 samples на класс (хорошо, но можно оверфитнуться)
# когда 50 сэмплов хорошо, обычно плохо