# Mount the drive data

---






In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


# Import all required libraries

---



In [15]:
import os
from os import listdir
from os.path import isfile, join
from os import walk
import glob
import shutil
import re

import scipy.io as sio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import sklearn
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


from __future__ import print_function
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from keras.models import load_model
from keras.optimizers import RMSprop, Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.preprocessing import image
from keras.applications import VGG16
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.applications.imagenet_utils import preprocess_input, decode_predictions


In [3]:
PARENT_PATH  = '/content/drive/My Drive/Flower_Classification'
IMAGES_PATH  = '/content/drive/My Drive/Flower_Classification/Images'
MAT_PATH     = '/content/drive/My Drive/Flower_Classification'
MAT_NAME     = 'datasplits.mat'
SAVED_MODELS = '/content/drive/My Drive/Flower_Classification/trained_model'

read the mat files

In [None]:
class preprocess:

  def __init__(self, mat_path = None, mat_file_name = None, image_path = None, parent_path = None):
    self.parent_path = parent_path
    self.image_path  = image_path
    self.mat         = sio.loadmat(mat_path + '/' + mat_file_name)
    
    
    # collect train, test, valid in seperate dict
    self.train_idx   = {'train_' + str(i[0]) : self.mat[i[1]].ravel().tolist() for i in zip([1,2,3],['trn1', 'trn2', 'trn3'])}
    self.test_idx    = {'test_'  + str(i[0]) : self.mat[i[1]].ravel().tolist() for i in zip([1,2,3],['tst1', 'tst2', 'tst3'])}
    self.valid_idx   = {'valid_' + str(i[0]) : self.mat[i[1]].ravel().tolist() for i in zip([1,2,3],['val1', 'val2', 'val3'])}


    # collect respective train/test/valid idx for 3 different models 
    self.data1       = [i for i in zip(self.train_idx ,  self.test_idx,  self.valid_idx)][0]
    self.data2       = [i for i in zip(self.train_idx ,  self.test_idx,  self.valid_idx)][1]
    self.data3       = [i for i in zip(self.train_idx ,  self.test_idx,  self.valid_idx)][2]

    self.data1_idx   = {self.data1[0] : self.train_idx[self.data1[0]], self.data1[1] : self.test_idx[self.data1[1]], self.data1[2] : self.valid_idx[self.data1[2]]}
    self.data2_idx   = {self.data2[0] : self.train_idx[self.data2[0]], self.data2[1] : self.test_idx[self.data2[1]], self.data2[2] : self.valid_idx[self.data2[2]]}
    self.data3_idx   = {self.data3[0] : self.train_idx[self.data3[0]], self.data3[1] : self.test_idx[self.data3[1]], self.data3[2] : self.valid_idx[self.data3[2]]}

    # save the indices as image file names 
    self.data1_img   = {}
    self.data2_img   = {}
    self.data3_img   = {}

  def indices_to_img_names(self):
    dic = [self.data1_img, self.data2_img, self.data3_img]
    for i, data in enumerate([self.data1_idx, self.data2_idx, self.data3_idx]):
      for key, value in data.items(): 
        temp_list = []
        for idx in value:
          temp_list.append('image_' + str(idx).zfill(4) + '.jpg')
        dic[i][key] = temp_list
    return dic[0], dic[1], dic[2] 

  def create_folders_ImageName_with_labels(self, datasplits):
    for dir_name in datasplits.keys():
      print('Create a new folder - ' + dir_name)
      new_folder = os.path.join(self.parent_path, dir_name) 
      os.mkdir(new_folder) # create a new folder

      for (dirpath, dirnames, filenames) in walk(self.image_path):
        if len(filenames)!=0:
          common_image_names = np.intersect1d(datasplits[dir_name], filenames)
          label = dirpath.split('/')[-1]
          
          print('Copying selected Images to the created directory ' + dir_name)
          for img in common_image_names:
            shutil.copy(os.path.join(dirpath, img), os.path.join(new_folder, img.split('.')[0] + '_' + label + '.jpg'))
    return

  def create_folders_model_training(self, datasplits):
    for dir_name in datasplits.keys():
      print('Create a new folder - ' + dir_name)
      new_folder = os.path.join(self.parent_path, dir_name) 
      os.mkdir(new_folder) # create a new folder

      for (dirpath, dirnames, filenames) in walk(self.image_path):
        if len(filenames)!=0:
          common_image_names = np.intersect1d(datasplits[dir_name], filenames)
          folder_label = dirpath.split('/')[-1]
          
          print('Copying selected Images to the created directory ' + dir_name)
          flower_folder = os.path.join(self.parent_path + '/' + dir_name, folder_label)
          os.mkdir(flower_folder)
          for img in common_image_names:
            shutil.copy(os.path.join(dirpath, img), os.path.join(flower_folder, img))
    return

  def unpack_indices(self):
    return self.data1_img, self.data2_img, self.data3_img


if __name__ == "__main__": 
  cls             = preprocess(MAT_PATH, MAT_NAME, IMAGES_PATH, PARENT_PATH)
  d1, d2, d3      = cls.indices_to_img_names()
  folder_creation = cls.create_folders_model_training(d1) # based on the split select d1, d2 or d3

# Model & calling Functions

In [8]:
def pretrained_VGG16(train_set = None, valid_set = None, test_set = None):

  # VGG16 was designed to work on 224 x 224 pixel input images sizes
  img_rows = 224
  img_cols = 224 

  # Re-loads the VGG16 model without the top or FC layers
  vgg16 = VGG16(weights = 'imagenet', include_top = False,  input_shape = (img_rows, img_cols, 3))

  # Here we freeze the last 4 layers, layers are set to trainable as True by default
  for layer in vgg16.layers[:10]: #vgg16.layers[:5]
      layer.trainable = False

  train_data_dir      = PARENT_PATH + '/' + train_set
  validation_data_dir = PARENT_PATH + '/' + valid_set
  test_data_dir       = PARENT_PATH + '/' + test_set

  train_datagen       = ImageDataGenerator(rescale=1./255, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, fill_mode='nearest')
  validation_datagen  = ImageDataGenerator(rescale=1./255)
  test_datagen        = ImageDataGenerator(rescale=1./255)
  
  train_batchsize     = 64
  val_batchsize       = 64
  
  train_generator     = train_datagen.flow_from_directory(train_data_dir, target_size=(img_rows, img_cols),  batch_size=train_batchsize, class_mode='categorical')
  validation_generator= validation_datagen.flow_from_directory(validation_data_dir, target_size=(img_rows, img_cols),  batch_size=val_batchsize, class_mode='categorical', shuffle=False)
  test_generator      = test_datagen.flow_from_directory(test_data_dir, target_size=(img_rows, img_cols), batch_size=64, class_mode='categorical', shuffle=False)

  return vgg16, train_generator, validation_generator, test_generator

def addTopModel(bottom_model, num_classes, D=256):
  """creates the top or head of the model that will be placed ontop of the bottom layers"""
  top_model = bottom_model.output
  top_model = Flatten(name = "flatten")(top_model)
  top_model = Dense(D, activation = "relu")(top_model)
  top_model = Dropout(0.3)(top_model)
  top_model = Dense(num_classes, activation = "softmax")(top_model)

  return top_model

def model_processing():
  vgg16, train_generator, validation_generator, test_generator = pretrained_VGG16()

  num_classes   = 17
  FC_Head       = addTopModel(vgg16, num_classes)
  model         = Model(inputs=vgg16.input, outputs=FC_Head)
  model.summary()


  checkpoint    = ModelCheckpoint(PARENT_PATH + '/trained_model/weights-improvement_train3 -{epoch:02d}-{val_accuracy:.2f}.hdf5', monitor= "val_accuracy", mode="max", save_best_only = True,verbose=1)
  earlystop     = EarlyStopping(monitor = 'val_loss',  min_delta = 0,  patience = 3, verbose = 1, restore_best_weights = True)
  reduce_lr     = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 3, verbose = 1, min_delta = 0.0001)
  callbacks     = [checkpoint, reduce_lr]

  model.compile(loss = 'categorical_crossentropy', optimizer = RMSprop(), metrics = ['accuracy'])

  nb_train_samples      = 1224
  nb_validation_samples = 250
  epochs                = 25
  batch_size            = 64

  history               = model.fit_generator(train_generator, steps_per_epoch = nb_train_samples // batch_size, epochs = epochs,
                                              callbacks = callbacks, validation_data = validation_generator, validation_steps = nb_validation_samples // batch_size)


  print(history.history.keys())

  # summarize history for accuracy
  plt.plot(history.history['accuracy'])
  plt.plot(history.history['val_accuracy'])
  plt.title('model accuracy')
  plt.ylabel('accuracy')
  plt.xlabel('epoch')
  plt.grid(True)
  plt.legend(['train', 'validation'], loc='upper left')
  plt.show()
  # summarize history for loss
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('model loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.grid(True)
  plt.legend(['train', 'validation'], loc='upper left')
  plt.show()

  return

# Evalute the model
splits = [['train_1', 'valid_1', 'test_1'],['train_2', 'valid_2', 'test_2'],['train_3', 'valid_3', 'test_3']]
def evaluate_model(TrainedModel = None, Splits = None):
   _, train_generator, validation_generator, test_generator = pretrained_VGG16(train_set = Splits[0], valid_set = Splits[1], test_set = Splits[2])
   #TrainedModel.evaluate(train_generator)
   valid_loss, valid_acc  = TrainedModel.evaluate(validation_generator)
   test_loss, test_acc = TrainedModel.evaluate(test_generator)
   return valid_loss, valid_acc , test_loss, test_acc

# valid_loss, valid_acc , test_loss, test_acc = evaluate_model(TrainedModelSplit_1, splits[0])


# Prediction

In [None]:
splits = [['train_1', 'valid_1', 'test_1'],['train_2', 'valid_2', 'test_2'],['train_3', 'valid_3', 'test_3']]
def prediction(split_no = 0, Model = None):
  split  =  splits[split_no]
  _, train_generator, validation_generator, test_generator = pretrained_VGG16(train_set = split[0], valid_set = split[1], test_set = split[2])
  prediction = np.argmax(Model.predict(test_generator), axis=1)
  actual = [test_generator.class_indices[i.split('/')[0]] for i in test_generator.filenames if i.split('/')[0] in list(test_generator.class_indices.keys())]
  print(f'Accuracy of the model for split no {split_no}:', accuracy_score(actual, prediction))
  return prediction, actual

# Call all the 3 splits & respective Trained model
prediction(0, TrainedModelSplit_1)
prediction(1, TrainedModelSplit_2)
prediction(2, TrainedModelSplit_3)

# Loading the saved Image & predict with Pictures

In [5]:
# Load the best trained models from 3 diffferent splits

TrainedModelSplit_1 = load_model(SAVED_MODELS + '/weights-improvement-15-0.86.hdf5')
TrainedModelSplit_2 = load_model(SAVED_MODELS + '/weights-improvement_train2 -21-0.88.hdf5')
TrainedModelSplit_3 = load_model(SAVED_MODELS + '/weights-improvement_train3 -13-0.87.hdf5')

In [24]:
class_labels = validation_generator.class_indices
class_labels = {v: k for k, v in class_labels.items()}
classes = list(class_labels.values())
print(class_labels)

# checking model on validation data
def getRandomImage(path, img_width, img_height):
    """function loads a random images from a random folder in our validation path """
    folders = list(filter(lambda x: os.path.isdir(os.path.join(path, x)), os.listdir(path)))
    random_directory = np.random.randint(0,len(folders))
    path_class = folders[random_directory]
    file_path = path + path_class
    file_names = [f for f in listdir(file_path) if isfile(join(file_path, f))]
    random_file_index = np.random.randint(0,len(file_names))
    image_name = file_names[random_file_index]
    final_path = file_path + "/" + image_name
    return image.load_img(final_path, target_size = (img_width, img_height)), final_path, path_class

# dimensions of our images
img_width, img_height = 224, 224

files = []
predictions = []
true_labels = []

# predicting images
for i in range(0, 5):
    path = PARENT_PATH + '/test_1/'
    img, final_path, true_label = getRandomImage(path, img_width, img_height)
    files.append(final_path)
    true_labels.append(true_label)
    x = image.img_to_array(img)
    x = x * 1./255
    x = np.expand_dims(x, axis=0)
    images = np.vstack([x])
    classes = np.argmax(trained_model.predict(images, batch_size = 10))
    predictions.append(classes)
    
for i in range(0, len(files)):
    img=mpimg.imread((files[i]))
    print("\n(",i+1,")")
    plt.imshow(img)
    plt.axis('off')
    plt.show()
    print("Predicted: ",class_labels[predictions[i]])
    print("True: ",true_labels[i])


def intersection(lst1, lst2): 
  
    lst3 = [value for value in lst1 if value in lst2] 
    return lst3



{0: 'Bluebell', 1: 'Buttercup', 2: "Colts'Foot", 3: 'Cowslip', 4: 'Crocus', 5: 'Daffodil', 6: 'Daisy', 7: 'Dandelion', 8: 'Fritillary', 9: 'Iris', 10: 'LilyValley', 11: 'Pansy', 12: 'Snowdrop', 13: 'Sunflower', 14: 'Tigerlily', 15: 'Tulip', 16: 'Windflower'}


'\n# predicting images\nfor i in range(0, 5):\n    path = PARENT_PATH + \'/test_1/\'\n    img, final_path, true_label = getRandomImage(path, img_width, img_height)\n    files.append(final_path)\n    true_labels.append(true_label)\n    x = image.img_to_array(img)\n    x = x * 1./255\n    x = np.expand_dims(x, axis=0)\n    images = np.vstack([x])\n    classes = np.argmax(trained_model.predict(images, batch_size = 10))\n    predictions.append(classes)\n    \nfor i in range(0, len(files)):\n    img=mpimg.imread((files[i]))\n    print("\n(",i+1,")")\n    plt.imshow(img)\n    plt.axis(\'off\')\n    plt.show()\n    print("Predicted: ",class_labels[predictions[i]])\n    print("True: ",true_labels[i])\n\n\ndef intersection(lst1, lst2): \n  \n    lst3 = [value for value in lst1 if value in lst2] \n    return lst3\n\n\n\n'