In [11]:
from __future__ import print_function
import glob
import math
import os
import sys

import cv2
import h5py
import numpy as np
import pandas as pd
from keras.models import Sequential, model_from_json
from keras.layers import  Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD, Adam
from keras.utils import np_utils
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

print(os.listdir('../data/labeling_20'))

['wigths', 'train_master.tsv', 'label_master.tsv', 'output', 'test', 'train', 'sample_submit .csv']


In [12]:
def make_data(folder_name):
    """It is a function that acquires an image from the folder where
         the image is stored and creates the data.
    """
    image_size = 32
    X = []
    dir = "../data/labeling_20/" + folder_name
    files = glob.glob(dir + "/*.png")
    for file in files:
        image = Image.open(file)
        image = image.resize((image_size, image_size))
        image -= np.mean(image)
        image /= np.std(image)
        data = np.array(image)
        X.append(data)
    X = np.array(X)

    return X

"""
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
"""

In [13]:
def vgg16_model():
    """This function is model of VGG16.
    CNN model.
    returns: model
    """
    model = Sequential()
    
    model.add(ZeroPadding2D((1, 1), input_shape=(32, 32, 3)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    


    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(20, activation='softmax'))

    return model

In [14]:
def train():
    """this function is train func.
       if dir output and wigths don't exist, make them.
       And do learning in this func.
       returns: none (make history)
    """
    

    if not os.path.isdir('../data/labeling_20/output'):
        os.mkdir('../data/labeling_20/output')
        
    if not os.path.isdir('../data/labeling_20/wigths'):
        os.mkdir('../data/labeling_20/wigths')

    model = vgg16_model()
    
    model.compile(optimizer=Adam(lr=0.1),
             loss='categorical_crossentropy', metrics=["accuracy"])
    model.summary()
    history = model.fit(X_train, y_train, batch_size=2500,
                          epochs=100,
                          verbose=1,
                          shuffle=True,
                          validation_split=0.2,)

In [15]:
def display():
    """This function display history of acc and loss.
    returns: two figures.
    """
    plt.figure(figsize=(12, 5))
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('Model Accuracy')
    plt.xlabel('epochs')
    plt.ylabel('Accuracy')
    plt.show()
    
    plt.figure(figsize=(12, 5))
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.xlabel('epochs')
    plt.ylabel('Loss')
    plt.show()

In [28]:
train_ = pd.read_csv('../data/labeling_20/train_master.tsv', delimiter='\t')
y_train = train_['label_id'].values
y_train = np_utils.to_categorical(y_train, 20)

X_train= make_data('train')
X_train = X_train.reshape(X_train.shape[0], 32, 32, 3)

In [29]:
print(X_train.shape, y_train.shape)

(50000, 32, 32, 3) (50000, 20)


In [30]:
train()
display()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
zero_padding2d_15 (ZeroPaddi (None, 34, 34, 3)         0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 32, 32, 64)        1792      
_________________________________________________________________
zero_padding2d_16 (ZeroPaddi (None, 34, 34, 64)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
zero_padding2d_17 (ZeroPaddi (None, 18, 18, 64)        0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 16, 16, 128)       73856     
__________

ValueError: Error when checking target: expected dense_9 to have 4 dimensions, but got array with shape (50000, 20)

In [None]:
X_test = make_data('test')
X_test = X_test.reshape(X_test.shape[0], 32, 32, 3).astype('float32')/255
predict = model.predict(X_test)

In [6]:
df = pd.read_csv('../data/labeling_20/sample.csv')
df_out = df['test_00000.png']
df_out

0    test_00001.png
1    test_00002.png
2    test_00003.png
3    test_00004.png
4    test_00005.png
Name: test_00000.png, dtype: object