In [None]:
from eugenenet.vgg11 import VGG11

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras_preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

import pandas as pd

In [None]:
# Reduce the number if want to train the network on a smaller dataset for speed
NUMBER_OF_SAMPLES = 100000

TRAIN_DATA_FILE = 'synimg/train/data.csv'

df_imagePaths = pd.DataFrame.from_csv(TRAIN_DATA_FILE)
df_imagePaths_sample = df_imagePaths.sample(n=NUMBER_OF_SAMPLES)

In [None]:
datagen=ImageDataGenerator(rescale=1./255., validation_split=0.15)

train_generator=datagen.flow_from_dataframe(
    dataframe=df_imagePaths_sample,
    directory="./",
    x_col="filepath",
    y_col="style_name",
    subset="training",
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=(32,64)
)

valid_generator=datagen.flow_from_dataframe(
    dataframe=df_imagePaths_sample,
    directory=".",
    x_col="filepath",
    y_col="style_name",
    subset="validation",
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=(32,64)
)

In [None]:
model = VGG11.build(width=64, height=32, depth=3, classes=len(train_generator.class_indices))

INIT_LR = 0.01
EPOCHS = 75
BS = 128

opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

# Double checking that changes in VGG11 class were applied. If not - restart the kernel.
model.layers

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

H = model.fit_generator(generator = train_generator, 
                        steps_per_epoch=STEP_SIZE_TRAIN, 
                        validation_data = valid_generator,  
                        validation_steps=STEP_SIZE_VALID, 
                        epochs=EPOCHS, 
                        callbacks=[
                            EarlyStopping(patience=5),
                            ReduceLROnPlateau(patience=3)
                        ]
                       )

In [None]:
model.evaluate_generator(generator= valid_generator,steps=STEP_SIZE_VALID)

## Save the trained model

In [None]:
model_save_name = "./trained_models/vgg11_v2"
model.save_weights('{0}.h5'.format(model_save_name))

## Now it's time to run predictions on the test dataset

In [None]:
TEST_DATA_FILE = 'synimg/test/data_nostyle.csv'
df_test = pd.read_csv(TEST_DATA_FILE)

In [None]:
testgen=ImageDataGenerator(rescale=1./255.)

test_generator=testgen.flow_from_dataframe(
    dataframe=df_test,
    directory="./",
    x_col="filepath",
    y_col=None,
    batch_size=32,
    seed=42,
    shuffle=False,
    class_mode=None,
    target_size=(32,64)
)


STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
test_generator.reset()

pred=model.predict_generator(test_generator,
steps=STEP_SIZE_TEST,
verbose=1)

### Convert predictions into style_names

In [None]:
predicted_class_indices=np.argmax(pred,axis=1)

In [None]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [None]:
df_test['style_name'] = predictions

In [None]:
df_test.to_csv('predictions/submission_vgg11_v2.csv', columns=['id','style_name'], index = False)