## VGG19+Freeze first 5 layers
#### Added validation set improved F1-score compared to reference
#### Detailed running processes are avaibale at private Kaggle Kernel, an attached pdf screenshots show this notebook is runnable 
#### Reference: https://www.kaggle.com/atrisaxena/keras-plant-seedlings-vgg19-augmentation

In [None]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np 
import pandas as pd 
import os
print(os.listdir("../input"))
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import cv2
from keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
from keras.utils import np_utils
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

In [None]:
CATEGORIES = ['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed', 'Common wheat', 'Fat Hen', 'Loose Silky-bent',
              'Maize', 'Scentless Mayweed', 'Shepherds Purse', 'Small-flowered Cranesbill', 'Sugar beet']
NUM_CATEGORIES = len(CATEGORIES)

In [None]:
SEED = 123
data_dir = '../input/withvalid'
train_dir = os.path.join(data_dir, 'train_valid/train_valid/train')
valid_dir = os.path.join(data_dir, 'train_valid/train_valid/valid/valid')
test_dir = os.path.join(data_dir, 'test/test')

In [None]:
# Number of training images for each Category
for category in CATEGORIES:
    print('{} {} images'.format(category, len(os.listdir(os.path.join(train_dir, category)))))

In [None]:
for category in CATEGORIES:
    print('{} {} images'.format(category, len(os.listdir(os.path.join(valid_dir, category)))))

In [None]:
train = []
for category_id, category in enumerate(CATEGORIES):
    for file in os.listdir(os.path.join(train_dir, category)):
        train.append(['train/{}/{}'.format(category, file), category_id, category])
train = pd.DataFrame(train, columns=['file', 'category_id', 'category'])
train.head(2)
train.shape

In [None]:
valid = []
for cat_id, cat in enumerate(CATEGORIES):
    for f in os.listdir(os.path.join(valid_dir, cat)):
        valid.append(['valid/{}/{}'.format(cat, file), cat_id, cat])
valid = pd.DataFrame(valid, columns=['file', 'category_id', 'category'])
valid.head(5)

In [None]:
test = []
for file in os.listdir(test_dir):
    test.append(['test/{}'.format(file), file])
test = pd.DataFrame(test, columns=['filepath', 'file'])
test.head(2)
test.shape

In [None]:
fig = plt.figure(1, figsize=(NUM_CATEGORIES, NUM_CATEGORIES))
grid = ImageGrid(fig, 111, nrows_ncols=(NUM_CATEGORIES, NUM_CATEGORIES), axes_pad=0.05)
i = 0
for category_id, category in enumerate(CATEGORIES):
    for filepath in train[train['category'] == category]['file'].values[:NUM_CATEGORIES]:
        ax = grid[i]
        img = Image.open("../input/withvalid/train_valid/train_valid/"+filepath)
        img = img.resize((240,240))
        ax.imshow(img)
        ax.axis('off')
        if i % NUM_CATEGORIES == NUM_CATEGORIES - 1:
            ax.text(250, 112, filepath.split('/')[1], verticalalignment='center')
        i += 1
plt.show();

## Model Structure

In [None]:
model = applications.VGG19(weights = "imagenet", include_top=False, input_shape = (240, 240, 3))
for layer in model.layers[:5]:
    layer.trainable = False

x = model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
predictions = Dense(12, activation="softmax")(x) 

model_final = Model(input = model.input, output = predictions)

model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])
model_final.summary()

In [None]:
gen = ImageDataGenerator(
            rotation_range=360.,
            width_shift_range=0.3,
            height_shift_range=0.3,
            zoom_range=0.3,
            horizontal_flip=True,
            vertical_flip=True)

In [None]:
train_data_dir = "../input/withvalid/train_valid/train_valid/train"
train_generator = gen.flow_from_directory(
                        train_data_dir,
                        target_size = (240, 240),
                        batch_size = 16, 
                        class_mode = "categorical")

In [None]:
valid_data_dir = "../input/withvalid/train_valid/train_valid/valid/valid"
valid_generator = gen.flow_from_directory(
                        valid_data_dir,
                        target_size = (240, 240),
                        batch_size = 16, 
                        class_mode = "categorical")

In [None]:
checkpoint = ModelCheckpoint("vgg16_withvalid.h5", monitor='loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='loss', min_delta=0, patience=10, verbose=1, mode='auto')

In [None]:
batchsize = 16
STEP_SIZE_TRAIN=train_generator.n/batchsize
STEP_SIZE_VALID=valid_generator.n/batchsize

model_final.fit_generator(
                    generator=train_generator,
                    validation_data=valid_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_steps=STEP_SIZE_VALID,
                    epochs = 50,
                    shuffle= True,
                    callbacks = [checkpoint, early])

## Prediction

In [None]:
prediction = []
for f in test['file']:
    img = cv2.imread(os.path.join(test_dir,f))
    img = cv2.resize(img,(240,240))
    img = np.asarray(img)
    img = img.reshape(1,240,240,3)
    pred = model_final.predict(img)
    prediction.append(classes.get(pred.argmax(axis=-1)[0])) #Invert Mapping helps to map Label

In [None]:
test = test.drop(columns =['filepath'])
sample_submission = pd.read_csv('../input/plant-seedlings-classification/sample_submission.csv')
test.to_csv('submission.csv', index=False)
test.head(5)

In [None]:
pred = pd.DataFrame({'species': prediction})
test =test.join(pred)
test.head()

### Final submission File

In [None]:
test.to_csv('submission.csv', index=False)