In [1]:
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
 
# import the necessary packages
from eugenenet.vgg11 import VGG11
from eugenenet.smallvggnet import SmallVGGNet
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras_preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
#from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
import os



Using TensorFlow backend.
  return f(*args, **kwds)
  'Matplotlib is building the font cache using fc-list. '


In [9]:
import pandas as pd

# initialize the data and labels
print("[INFO] loading images...")
#data = []
#labels = []
NUMBER_OF_SAMPLES = 100000

TRAIN_DATA_FILE = 'synimg/train/data.csv'
# grab the image paths and randomly shuffle them
df_imagePaths = pd.DataFrame.from_csv(TRAIN_DATA_FILE)
df_imagePaths_sample = df_imagePaths.sample(n=NUMBER_OF_SAMPLES)


[INFO] loading images...




In [10]:
# construct the image generator for data augmentation
#aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
#	height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,#
#	horizontal_flip=True, fill_mode="nearest")
 
datagen=ImageDataGenerator(rescale=1./255., validation_split=0.15)
    


In [11]:
train_generator=datagen.flow_from_dataframe(
dataframe=df_imagePaths_sample,
directory="./",
x_col="filepath",
y_col="style_name",
subset="training",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(32,64))

valid_generator=datagen.flow_from_dataframe(
dataframe=df_imagePaths_sample,
directory=".",
x_col="filepath",
y_col="style_name",
subset="validation",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(32,64))

Found 85000 validated image filenames belonging to 10 classes.
Found 15000 validated image filenames belonging to 10 classes.


In [12]:
# initialize our VGG-like Convolutional Neural Network
model = VGG11.build(width=64, height=32, depth=3, classes=len(train_generator.class_indices))

# initialize our initial learning rate, # of epochs to train for,
# and batch size
INIT_LR = 0.01
EPOCHS = 75
BS = 128
 
# initialize the model and optimizer (you'll want to use
# binary_crossentropy for 2-class classification)
print("[INFO] training network...")
opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
 
# train the network
#H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS),validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS, epochs=EPOCHS)

#H = model.fit(trainX, trainY, steps_per_epoch=len(trainX) // BS, epochs=EPOCHS)

[INFO] training network...


In [13]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

H = model.fit_generator(generator = train_generator, 
                        steps_per_epoch=STEP_SIZE_TRAIN, 
                        validation_data = valid_generator,  
                        validation_steps=STEP_SIZE_VALID, 
                        epochs=EPOCHS, 
                        callbacks=[
                            EarlyStopping(patience=5),
                            ReduceLROnPlateau(patience=3)
                        ]
                       )

Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75


With batch normalisation: 
* 40 epochs: acc: 0.6613, loss: 0.7873
* 65 epochs: acc: ~0.70
* 75 epochs: acc: 0.7248


In [14]:
len(train_generator.class_indices)

10

In [15]:
model.layers

[<keras.layers.convolutional.Conv2D at 0x7fe81da30518>,
 <keras.layers.core.Activation at 0x7fe81da30208>,
 <keras.layers.normalization.BatchNormalization at 0x7fe81da302b0>,
 <keras.layers.pooling.MaxPooling2D at 0x7fe81da304e0>,
 <keras.layers.core.Dropout at 0x7fe81bd27ba8>,
 <keras.layers.convolutional.Conv2D at 0x7fe81b8f1e10>,
 <keras.layers.core.Activation at 0x7fe7f42aba58>,
 <keras.layers.normalization.BatchNormalization at 0x7fe7f42595c0>,
 <keras.layers.convolutional.Conv2D at 0x7fe7f4245fd0>,
 <keras.layers.core.Activation at 0x7fe7f41fa240>,
 <keras.layers.normalization.BatchNormalization at 0x7fe7f4238e80>,
 <keras.layers.pooling.MaxPooling2D at 0x7fe7f42389e8>,
 <keras.layers.core.Dropout at 0x7fe7f41b0240>,
 <keras.layers.convolutional.Conv2D at 0x7fe7f40ff320>,
 <keras.layers.core.Activation at 0x7fe7f4116c18>,
 <keras.layers.normalization.BatchNormalization at 0x7fe7f40c7828>,
 <keras.layers.convolutional.Conv2D at 0x7fe7f40c77f0>,
 <keras.layers.core.Activation at 0x

In [16]:
model.evaluate_generator(generator= valid_generator,steps=STEP_SIZE_VALID)

[0.1838087858731548, 0.92083333333333328]

In [17]:
valid_generator.class_indices

{'Beijing': 0,
 'Brisbane': 1,
 'Geneva': 2,
 'HongKong': 3,
 'Luanda': 4,
 'Melbourne': 5,
 'Seoul': 6,
 'Singapore': 7,
 'Sydney': 8,
 'Zurich': 9}

In [18]:
train_generator.class_indices

{'Beijing': 0,
 'Brisbane': 1,
 'Geneva': 2,
 'HongKong': 3,
 'Luanda': 4,
 'Melbourne': 5,
 'Seoul': 6,
 'Singapore': 7,
 'Sydney': 8,
 'Zurich': 9}

In [14]:
model_save_name = "./trained_models/vgg11_v2"
model.save_weights('{0}.h5'.format(model_save_name))

In [20]:
model_save_name = "./trained_models/"
print('{0}.h5'.format(model_save_name))

./trained_models/.h5


In [15]:
TEST_DATA_FILE = 'synimg/test/data_nostyle.csv'
df_test = pd.read_csv(TEST_DATA_FILE)

In [22]:
df_test

Unnamed: 0,id,file,filepath
0,9000000,test-A-9000000.jpg,synimg/test/A/test-A-9000000.jpg
1,9000001,test-B-9000001.jpg,synimg/test/B/test-B-9000001.jpg
2,9000002,test-C-9000002.jpg,synimg/test/C/test-C-9000002.jpg
3,9000003,test-D-9000003.jpg,synimg/test/D/test-D-9000003.jpg
4,9000004,test-E-9000004.jpg,synimg/test/E/test-E-9000004.jpg
5,9000005,test-F-9000005.jpg,synimg/test/F/test-F-9000005.jpg
6,9000006,test-G-9000006.jpg,synimg/test/G/test-G-9000006.jpg
7,9000007,test-H-9000007.jpg,synimg/test/H/test-H-9000007.jpg
8,9000008,test-I-9000008.jpg,synimg/test/I/test-I-9000008.jpg
9,9000009,test-J-9000009.jpg,synimg/test/J/test-J-9000009.jpg


In [16]:
testgen=ImageDataGenerator(rescale=1./255.)

test_generator=testgen.flow_from_dataframe(
dataframe=df_test,
directory="./",
x_col="filepath",
y_col=None,
batch_size=32,
seed=42,
shuffle=False,
class_mode=None,
target_size=(32,64))


STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
test_generator.reset()

pred=model.predict_generator(test_generator,
steps=STEP_SIZE_TEST,
verbose=1)

Found 20000 validated image filenames.


In [17]:
predicted_class_indices=np.argmax(pred,axis=1)

In [18]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [26]:
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
                      "Predictions":predictions})
results.to_csv("results.csv",index=False)

In [19]:
df_test['style_name'] = predictions

In [20]:
df_test.to_csv('submission_vgg11_v2.csv', columns=['id','style_name'], index = False)

In [50]:
df_test

Unnamed: 0,id,file,filepath,style_name
0,9000000,test-A-9000000.jpg,synimg/test/A/test-A-9000000.jpg,Geneva
1,9000001,test-B-9000001.jpg,synimg/test/B/test-B-9000001.jpg,Melbourne
2,9000002,test-C-9000002.jpg,synimg/test/C/test-C-9000002.jpg,Melbourne
3,9000003,test-D-9000003.jpg,synimg/test/D/test-D-9000003.jpg,Singapore
4,9000004,test-E-9000004.jpg,synimg/test/E/test-E-9000004.jpg,Brisbane
5,9000005,test-F-9000005.jpg,synimg/test/F/test-F-9000005.jpg,Singapore
6,9000006,test-G-9000006.jpg,synimg/test/G/test-G-9000006.jpg,HongKong
7,9000007,test-H-9000007.jpg,synimg/test/H/test-H-9000007.jpg,Sydney
8,9000008,test-I-9000008.jpg,synimg/test/I/test-I-9000008.jpg,Sydney
9,9000009,test-J-9000009.jpg,synimg/test/J/test-J-9000009.jpg,Singapore


In [13]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0',
 '/job:localhost/replica:0/task:0/device:GPU:1',
 '/job:localhost/replica:0/task:0/device:GPU:2',
 '/job:localhost/replica:0/task:0/device:GPU:3']