In [1]:
import pandas as pd
import numpy
from PIL import Image

import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config = config)

import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

## Reading Image

In [23]:
X = []

for i in range(1, 50001):
    X.append(np.array(Image.open('train/{}.png'.format(i)).getdata()).reshape(32,32,3))
    
X = np.array(X)

KeyboardInterrupt: 

In [3]:
y = pd.read_csv('trainLabels.csv')['label']

## Data Augmentation

In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

gen = ImageDataGenerator(rotation_range = 10, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.15,
    zoom_range=0.1, channel_shift_range= 10., horizontal_flip = True)

In [5]:
X_aug = []
y_aug = []
aug_num = 3 #number of new images

for i in range(X.shape[0]):
    if i % 10000 == 0:
        print(i)
    aug_iter = gen.flow(np.expand_dims(X[i,:,:,:], 0))
    aug_images = [next(aug_iter).astype(np.uint8) for i in range(aug_num)]
    for j in range(aug_num):
        X_aug.append(np.array(aug_images)[j][0])
        y_aug.append(y[i])

0
10000
20000
30000
40000


In [6]:
X = np.array(X_aug)

## Data Preprocessing

In [7]:
X = X/255

In [8]:
dummies = pd.get_dummies(y_aug)
labels = dummies.columns
Y = np.array(dummies) 

In [9]:
del X_aug
del y_aug

## Data Separation

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, random_state = 1702)

In [11]:
X_train, X_val, Y_train, Y_val =  train_test_split(X_train, Y_train, test_size = 0.1, random_state = 1702)

## Neural Network

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization

In [13]:
model = Sequential()
model.add(Conv2D(256, (3,3), activation = 'relu', input_shape = (50, 50, 3)))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Conv2D(512, (3,3), activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Conv2D(512, (3,3), activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(256, activation = 'relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(10, activation = 'softmax'))
model.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 48, 48, 256)       7168      
_________________________________________________________________
batch_normalization (BatchNo (None, 48, 48, 256)       1024      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 24, 24, 256)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 22, 22, 512)       1180160   
_________________________________________________________________
batch_normalization_1 (Batch (None, 22, 22, 512)       2048      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 11, 11, 512)       0         
_________________________________________

In [14]:
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.fit(X_train, Y_train, batch_size = 256, epochs = 10, validation_data = (X_val, Y_val))

Train on 121500 samples, validate on 13500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fc4cdac7e10>

In [15]:
model.evaluate(X_test, Y_test)



[0.8274306374073028, 0.80193335]

In [16]:
from numba import cuda 
device = cuda.get_current_device()
device.reset()

In [22]:
X_sub = []

for i in range(1, 300001):
    X_sub.append(np.array(Image.open('test/{}.png'.format(i)).getdata()).reshape(32,32,3))
    
X_sub = np.array(X_sub)
X_sub = X_sub/255

KeyboardInterrupt: 

In [None]:
y_sub = []

In [None]:
y_sub.append(model.predict_classes(X_sub[:100000, :, :, :]))
y_sub.append(model.predict_classes(X_sub[100000:200000, :, :, :]))
y_sub.append(model.predict_classes(X_sub[200000:, :, :, :]))

In [None]:
y_sub = np.array(y_sub).reshape((300000,))

In [None]:
y_sub_name = [labels[x] for x in y_sub]

In [None]:
sub = pd.DataFrame(y_sub_name, index = np.arange(1,300001)).reset_index()

In [None]:
sub.columns = ['id', 'label']

In [None]:
sub.to_csv('submission.csv', index = False)