## VGG19+Xception
#### Detailed running processes are avaibale at private Kaggle Kernel, an attached pdf screenshots show this notebook is runnable 
#### Reference: https://www.kaggle.com/atrisaxena/keras-plant-seedlings-vgg19-augmentation

In [None]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
print(os.listdir("../input"))
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import cv2
from keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
from keras.utils import np_utils
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

In [None]:
CATEGORIES = ['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed', 'Common wheat', 'Fat Hen', 'Loose Silky-bent',
              'Maize', 'Scentless Mayweed', 'Shepherds Purse', 'Small-flowered Cranesbill', 'Sugar beet']
NUM_CATEGORIES = len(CATEGORIES)

In [None]:
SEED = 123
data_dir = '../input/'
train_dir = os.path.join(data_dir, 'train')
test_dir = os.path.join(data_dir, 'test')
sample_submission = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv'))

### Number of training images for each Category

In [None]:
for category in CATEGORIES:
    print('{} {} images'.format(category, len(os.listdir(os.path.join(train_dir, category)))))

In [None]:
train = []
for category_id, category in enumerate(CATEGORIES):
    for file in os.listdir(os.path.join(train_dir, category)):
        train.append(['train/{}/{}'.format(category, file), category_id, category])
train = pd.DataFrame(train, columns=['file', 'category_id', 'category'])
train.head(2)
train.shape

In [None]:
test = []
for file in os.listdir(test_dir):
    test.append(['test/{}'.format(file), file])
test = pd.DataFrame(test, columns=['filepath', 'file'])
test.head(2)
test.shape

## Model Structure

In [None]:
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers import BatchNormalization
scale=299

model = applications.VGG19(weights = "imagenet", include_top=False, input_shape = (scale, scale, 3))
add_model=applications.Xception(weights = "imagenet", include_top=False, input_shape = (scale, scale, 3))

model = Sequential()
model.add(add_model)
model.add(Conv2D(20, kernel_size=(3, 3), activation='relu',input_shape=(scale, scale, 3)))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(20, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model.add(Dropout(0.2))


model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(12, activation='softmax'))


#compling and show model
model.compile(loss = "categorical_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])
model.summary()

## Data Augmentation

In [None]:
gen = ImageDataGenerator(
            rotation_range=360.,
            width_shift_range=0.3,
            height_shift_range=0.3,
            zoom_range=0.3,
            horizontal_flip=True,
            vertical_flip=True)

In [None]:
train_data_dir = "../input/train"
train_generator = gen.flow_from_directory(
                        train_data_dir,
                        target_size = (scale, scale),
                        batch_size = 16, 
                        class_mode = "categorical")

In [None]:
checkpoint = ModelCheckpoint("Vgg19FreezeXception.h5", monitor='loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='loss', min_delta=0, patience=10, verbose=1, mode='auto')

In [None]:
STEP_SIZE_TRAIN=train_generator.n/16
model.fit_generator(
                    train_generator,
                    epochs = 50,
                    shuffle= True,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    callbacks = [checkpoint, early])

## Prediction of model 

In [None]:
prediction = []
for filepath in test['filepath']:
    img = cv2.imread(os.path.join(data_dir,filepath))
    img = cv2.resize(img,(scale, scale))
    img = np.asarray(img)
    img = img.reshape(1,scale, scale,3)
    pred = model.predict(img)
    prediction.append(classes.get(pred.argmax(axis=-1)[0])) #Invert Mapping helps to map Label

In [None]:
test = test.drop(columns =['filepath'])
pred = pd.DataFrame({'species': prediction})
test =test.join(pred)
test.head()

### Final submission File

In [None]:
test.to_csv('submission.csv', index=False)