In [17]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
import os
from random import shuffle
from tqdm import tqdm
import csv
import sys
stderr = sys.stderr
sys.stderr = open(os.devnull, 'w')
import keras
sys.stderr = stderr
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Dense, Dropout, Flatten, Lambda
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from scipy.ndimage.filters import gaussian_filter
from scipy.ndimage.interpolation import map_coordinates
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from keras.utils import plot_model
from keras.models import load_model

image_colmn = 100
image_row  = 100
read_train_filename = './all/train_images.npy'

train_label_filename = './all/train_labels.csv'
train_data_filename = './train_data.npy'

read_test_filename = './all/test_images.npy'

test_data_filename = './test_data.npy'
class_name_list =["sink","pear","moustache","nose","skateboard","penguin","peanut","skull","panda","paintbrush","nail","apple","rifle","mug","sailboat","pineapple","spoon","rabbit","shovel","rollerskates","screwdriver","scorpion","rhinoceros","pool","octagon","pillow","parrot","squiggle","mouth","empty","pencil"]
num_classes = 31


def create_train_data():
    class_label_name=[]
    with open (train_label_filename,'r',) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for row in readCSV:
            
            class_label_name.append(row)
    del class_label_name[0]
    class_id_list = []
    for i in range(len(class_name_list)):
        class_id_list.append(i)
        
    hashmap = dict(zip(class_name_list,class_id_list))
    class_label_vector=[]

    for r in tqdm(class_label_name):
        word = r[1]
        if word in hashmap:
            class_label_vector.append(hashmap[word])
    images = np.load(read_train_filename,encoding='latin1')
    train_data=[]
    for i in tqdm(range(len(images))):
            train_data.append([np.array((images[i][1]).reshape(1,10000)),np.array(class_label_vector[i])])
    np.save('train_data.npy',train_data)
    return train_data


    
        
def norm_input(x):
    return (x - mean_px) / std_px

if __name__ == '__main__':
    if (os.path.isfile(train_data_filename)==False):
        train_data = create_train_data()
                
    else:
        print("file already exist")
        train_data = np.load(train_data_filename,encoding='latin1')

        
    if (os.path.isfile(test_data_filename)==False):
        images = np.load(read_test_filename,encoding='latin1')
        predict_data=[]
        for i in tqdm(range(len(images))):
            predict_data.append([np.array((images[i][1])).reshape(1,10000)])
        np.save('test_data.npy',predict_data)
    
    else:
        print("file already exist")
        predict_data = np.load(test_data_filename,encoding='latin1')
        
    aug_epochs = 80
    test_size = 0.05
    #seed 
    random_seed = 1
    learning_rate=0.001
    version =1
    batch_size=64
    epochs=50
    
    y_raw = [i[1] for i in train_data]
#print(y_raw)
#for y_e in y_raw:
#    y.append(convert_y_to_vector(y_e))
    train_y=np.asarray(y_raw,dtype=float)

#print(y)

    train_y = train_y.reshape((-1,1))




    train_x = np.array([i[0] for i in train_data])
    train_x = train_x.reshape((-1,image_colmn,image_row))
    input_shape = (image_colmn, image_row, 1)
    predict_x = np.array([i[0] for i in predict_data])
    predict_x =predict_x.reshape((-1,image_colmn,image_row,1))
    print(len(predict_x))

    train_x = train_x.reshape((-1,image_colmn,image_row,1))
    train_y = train_y.reshape((train_y.shape[0],))
    train_x = train_x.astype('float32')
    predict_x = predict_x.astype('float32')
    train_x /= 255
    predict_x /= 255


    x_train, x_test, y_train, y_test = train_test_split(train_x, train_y, test_size=test_size, random_state=111)
    class_weights = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)

    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    mean_px = x_train.mean().astype(np.float32)
    std_px = x_train.std().astype(np.float32)

    
   #https://keras.io/getting-started/sequential-model-guide/
    model = Sequential([
        Lambda(norm_input, input_shape=input_shape),

        Conv2D(32, strides=(1, 1),kernel_size=(3, 3), activation='relu', padding='same', input_shape=input_shape),
        Conv2D(32, strides=(1, 1),kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2),strides=(1, 1)),
        Dropout(0.25),

        Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'),
        Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),

        Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'),
        Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'),
        Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),

        Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'),
        Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'),
        Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),

        Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'),
        Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'),
        Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.25),
        Flatten(),

        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        # Dense(512, activation='relu'),
        # BatchNormalization(),
        # Dropout(0.5),
        # Dense(512, activation='relu'),
        # BatchNormalization(),
        # Dropout(0.5),
        Dense(31, activation='softmax')
    ])
    model.load_weights('model_1.h5')
    print(len(predict_x))
    predict_result = model.predict_classes(predict_x, batch_size=batch_size, verbose=0)
    print(predict_result)
    with open ('submission.csv','w',) as csvfile: 
        csvfile.write('Id')
        csvfile.write(",")
        csvfile.write('Category')
        csvfile.write('\n')
        for i in range(len(predict_result)):
            csvfile.write('%d'%i)
            csvfile.write(",")
            csvfile.write(class_name_list[predict_result[i]])
            csvfile.write('\n')

file already exist
file already exist
10000
10000
[15  1  0 ...  4 15 13]
