In [0]:
# %% [code] {"id":"QpH0zvdTHUV9","colab_type":"code","colab":{}}
import cv2
import numpy as np
import pandas as pd

import matplotlib.pyplot as plot
%matplotlib inline

# %% [markdown]
# **Import tensorflow libraries**

# %% [code] {"id":"pxfvGsH4bIzH","colab_type":"code","outputId":"2d8b2f59-f958-449a-ab14-734bfc6b11bc","colab":{"base_uri":"https://localhost:8080/","height":34}}
import tensorflow
from tensorflow.keras.preprocessing.image import load_img, img_to_array, array_to_img
from keras.preprocessing.image import ImageDataGenerator

# %% [code] {"id":"QdXsXtTkbM5k","colab_type":"code","outputId":"d7821814-d827-42f8-c240-1eae6a2a815e","colab":{"base_uri":"https://localhost:8080/","height":34}}
import os
os.listdir("../input/")

# %% [markdown]
# **The below function fetches all the images of training set from the directory.**

# %% [code] {"id":"MHc4zpczBZNA","colab_type":"code","colab":{}}
def get_images(directory):
    Images = []
    Labels = []
    for dir_name in os.listdir(directory): 
        for image_file in os.listdir(directory+dir_name):
            image = cv2.imread(directory+dir_name+r'/'+image_file)
            if image is not None:
                image = cv2.resize(image,(300,300),)
                Images.append(image)
                Labels.append(dir_name)
    return Images, Labels

# %% [code] {"id":"7FWuFWqxBeYY","colab_type":"code","colab":{}}
Images, Labels = get_images('../input/train/')

# %% [markdown]
# **Encoding the text labels to numericals, since machine learning models only understand data in numbers.**

# %% [code] {"id":"xu0ihZnTHY5S","colab_type":"code","colab":{}}
labels = []
mapping = { 'Sugar beet': 0, 'Fat Hen': 1, 'Scentless Mayweed' : 2, 'Charlock' : 3,
           'Small-flowered Cranesbill': 4, 'Maize': 5, 'Shepherds Purse' :6, 'Common wheat': 7,
           'Common Chickweed': 8, 'Cleavers': 9, 'Loose Silky-bent' : 10, 'Black-grass': 11 }
for label in Labels:
    labels.append(mapping[label])
del Labels

# %% [code] {"id":"MGm0yoZK_h-p","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"05be089f-fd3e-4ed5-88d9-963b20dad870"}
Images[0].shape

# %% [markdown]
# **Reshaping the images to 4 dimensional tensors. (Model requires the input data to be in 4 dimensional format [no. of images, height, width, channels])**

# %% [code] {"id":"S5BkEVq0CXCJ","colab_type":"code","colab":{}}
Images = np.reshape(Images,(-1,300,300,3))
Labels = np.array(labels)

# %% [code] {"id":"TvDIL2DfD8Lv","colab_type":"code","outputId":"cd2dde8f-b614-49fb-b6f9-117d196874b3","colab":{"base_uri":"https://localhost:8080/","height":50}}
print("Shape of training data: ", Images.shape)
print("Shape of labels data: ", Labels.shape)

# %% [markdown]
# **Splitting the data into Training and Validation to check the accuracy of the model on unseen data. **

# %% [code] {"id":"4hrCXjapPFB1","colab_type":"code","colab":{}}
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(Images, Labels, test_size=.2, random_state=42, stratify = Labels)

# %% [markdown]
# **The below function performs one hot encoding on the labels.**

# %% [code] {"id":"j53KKooFZuCu","colab_type":"code","colab":{}}
from keras.utils import np_utils
y_train = np_utils.to_categorical(y_train,num_classes=12)
y_val = np_utils.to_categorical(y_val,num_classes=12)

# %% [markdown]
# **ImageDataGenerator helps in image augmentation by performing various operations on the existing images.**

# %% [code] {"id":"jYjs8GDIMzJf","colab_type":"code","colab":{}}
train_datagen = ImageDataGenerator(
                                   rotation_range=20,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                  )

validation_datagen = ImageDataGenerator()

# %% [code]
del Images
del Labels

# %% [code] {"id":"dt98WrdSNkaw","colab_type":"code","colab":{}}
train_generator = train_datagen.flow(x_train, y_train, batch_size=32)
validation_generator = validation_datagen.flow(x_val, y_val, batch_size=16)

# %% [markdown]
# **Here, I am using VGG16 Pretrained Network**

# %% [code] {"id":"cOIyxa4ezEOk","colab_type":"code","outputId":"9750363f-e81e-42d8-97a0-1744be220231","colab":{"base_uri":"https://localhost:8080/","height":121}}
from tensorflow.keras.applications import VGG16
vgg = VGG16(include_top=
            False, weights='imagenet', input_shape = (300,300,3))

# %% [code] {"id":"2WXE5t84w1nH","colab_type":"code","colab":{}}
import tensorflow.keras.optimizers as Optimizer
from tensorflow.keras.layers import Flatten, Dense, Dropout, GlobalAvgPool2D, GlobalMaxPooling2D, Concatenate
from tensorflow.keras.models import Model

# %% [code] {"id":"xoxZQQdlzETk","colab_type":"code","colab":{}}
vgg.trainable=False
for layer in vgg.layers:
    layer.trainable = False

fc1 = Concatenate(axis=-1)([GlobalAvgPool2D()(vgg.output), GlobalMaxPooling2D()(vgg.output)])
fc1 = Dense(400, activation='relu')(fc1)
fc1_dropout = Dropout(0.3)(fc1)
fc2 = Dense(200, activation='relu')(fc1_dropout)
fc2_dropout = Dropout(0.3)(fc2)
fc2 = Dense(75, activation='relu')(fc1_dropout)
output = Dense(12, activation='softmax')(fc2_dropout)
model = Model(vgg.input, output)

model.compile(optimizer=Optimizer.Adam(lr=0.0001),loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()


# %% [markdown]
# **During training, you can save the model's best weights using ModelCheckpoint. The one with the minimum validation loss is saved.**

# %% [code] {"id":"9R_KViGUV-CQ","colab_type":"code","outputId":"3ca75b77-daa4-4171-fafc-bfb626460232","colab":{"base_uri":"https://localhost:8080/","height":6938}}
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint('saved_model.hdf5', monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
trained = model.fit_generator(train_generator,steps_per_epoch = 25, epochs=200, validation_data = validation_generator,
                              validation_steps=10, 
                              verbose=1, callbacks = callbacks_list)

# %% [markdown]
# **Plotting the graph of model's accuracy and loss.**

# %% [code] {"id":"X02kvmYeLb1T","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":574},"outputId":"9898bdaf-1b9d-49a7-f12a-33179d260244"}
plot.plot(trained.history['acc'])
plot.plot(trained.history['val_acc'])
plot.title('Model accuracy')
plot.ylabel('Accuracy')
plot.xlabel('Epoch')
plot.legend(['Train', 'Test'], loc='upper left')
plot.show()

plot.plot(trained.history['loss'])
plot.plot(trained.history['val_loss'])
plot.title('Model loss')
plot.ylabel('Loss')
plot.xlabel('Epoch')
plot.legend(['Train', 'Test'], loc='upper left')
plot.show()

# %% [code] {"id":"_yA0Cgu2Wx-s","colab_type":"code","colab":{}}
def get_test_images(directory):
    Images = []
    Image_names = []
    for image_file in os.listdir(directory):
        Image_names.append(image_file)
        image = cv2.imread(directory+r'/'+image_file)
        if image is not None:
            image = cv2.resize(image,(300,300),)
            Images.append(image)
    return Images, Image_names

# %% [code] {"id":"RFK7jETxNcKL","colab_type":"code","colab":{}}
test_images, image_names = get_test_images('../input/test/')
test_images = np.array(test_images)
print(test_images.shape)

# %% [markdown]
# **Here, To load the saved weights we need to define the same model architecture again. Also, make sure you do not compile the model this time.**

# %% [code]
vgg = VGG16(include_top=
            False, weights='imagenet', input_shape = (300,300,3))

vgg.trainable=False
for layer in vgg.layers:
    layer.trainable = False

fc1 = Concatenate(axis=-1)([GlobalAvgPool2D()(vgg.output), GlobalMaxPooling2D()(vgg.output)])
fc1 = Dense(400, activation='relu')(fc1)
fc1_dropout = Dropout(0.3)(fc1)
fc2 = Dense(200, activation='relu')(fc1_dropout)
fc2_dropout = Dropout(0.3)(fc2)
fc2 = Dense(75, activation='relu')(fc1_dropout)
output = Dense(12, activation='softmax')(fc2_dropout)
model = Model(vgg.input, output)

# %% [code]
model.load_weights('saved_model.hdf5')

# %% [markdown]
# **If you ever want to save the entire model, you can save using tensorflow.keras.models.save_model()**

# %% [code]
tensorflow.keras.models.save_model(
    model,
    'tf_model.hdf5',
    overwrite=True,
    include_optimizer=True
)

# %% [markdown]
# **Then you can load the entire model using from tensorflow.keras.models**

# %% [code]
from tensorflow.keras.models import load_model
model = load_model('tf_model.hdf5')

# %% [markdown]
# **Here, the model predicts the new images using function model.predict()**

# %% [code] {"id":"vUzsKdL7QrtI","colab_type":"code","colab":{}}
predictions = model.predict(test_images)
predictions = np.argmax(predictions, axis = 1)

# %% [code]
labelled_predictions = []
mapping = {0: 'Sugar beet',1:'Fat Hen' ,2: 'Scentless Mayweed',3:  'Charlock', 
        4:'Small-flowered Cranesbill', 5:'Maize' ,
        6: 'Shepherds Purse' ,7:'Common wheat' ,8:'Common Chickweed' ,
        9:'Cleavers' ,10:'Loose Silky-bent'  ,11: 'Black-grass'}
for pred in predictions:
    labelled_predictions.append(mapping[pred])

# %% [markdown]
# **Preparing the predictions for submission**

# %% [code] {"id":"zQ_hh0ZuPB2_","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":806},"outputId":"b2c5edbf-4d6e-43f7-e977-ce3a631694b2"}
d = []
i=0
for pred in labelled_predictions:
    d.append({'file': image_names[i], 'species': pred})
    i=i+1
output = pd.DataFrame(d)
output.to_csv('submission.csv',index=False)