In [0]:
network = 'VGG'

In [0]:
from keras import backend as K

from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)

In [0]:
import matplotlib.pyplot as plt
import itertools
from sklearn.metrics import confusion_matrix


def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

#     print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()


**IMPORTS AND FUNCTIONS**

In [0]:
from keras.applications.vgg16 import  VGG16
from keras.applications.inception_v3 import InceptionV3
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Input, Flatten
from keras.optimizers import Adam, SGD
from keras.callbacks import Callback, CSVLogger, ModelCheckpoint
from keras.models import load_model
import numpy as np

from tqdm import tqdm


import pandas as pd

**DOWNLOAD FILES**

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

VGG

In [0]:
from keras.preprocessing.image import ImageDataGenerator
import numpy as np


def preprocess_input_vgg(x):
      """Wrapper around keras.applications.vgg16.preprocess_input()
      to make it compatible for use with keras.preprocessing.image.ImageDataGenerator's
      `preprocessing_function` argument.

      Parameters
      ----------
      x : a numpy 3darray (a single image to be preprocessed)

      Note we cannot pass keras.applications.vgg16.preprocess_input()
      directly to to keras.preprocessing.image.ImageDataGenerator's
      `preprocessing_function` argument because the former expects a
      4D tensor whereas the latter expects a 3D tensor. Hence the
      existence of this wrapper.

      Returns a numpy 3darray (the preprocessed image).

      """
      from keras.applications.vgg16 import preprocess_input
      X = np.expand_dims(x, axis=0)
      X = preprocess_input(X)
      return X[0]


def get_data_generators(file_num, network, file_name):

  
  if network == 'VGG':
    preprocess_input = preprocess_input_vgg
    input_size = (224, 224)
    print("VGG DATA GENERATOR")
  elif network == 'Xception':
    from keras.applications.xception import preprocess_input 
    print("Xception DATA GENERATOR")
    input_size = (299, 299)
  elif network == 'Inception':
    from keras.applications.inception_v3 import preprocess_input
    print("Inception DATA GENERATOR")
    input_size = (299, 299)
    
  
  
  
  import os
  import zipfile

  local_zip = 'gdrive/My Drive/Colab Notebooks/UC_Merced/' + file_name
#   local_zip = 'gdrive/My Drive/Colab Notebooks/Sint_Maarten/' + 'shape/' + str(10) + '/'+ file_name
#   local_zip = 'gdrive/My Drive/Colab Notebooks/Sint_Maarten/' + 'material/' + str(10) + '/'+ file_name

  print(local_zip)
  zip_ref = zipfile.ZipFile(local_zip, 'r')
  zip_ref.extractall('/tmp')
  zip_ref.close()

  base_dir = '/tmp/' + file_name.split(".")[0]
  # base_dir = '/tmp/UC_Merced_filtered_45'

  print(base_dir)

  train_dir = os.path.join(base_dir, 'train')
  validation_dir = os.path.join(base_dir, 'validation')
  test_dir = os.path.join(base_dir, 'test')


  # Adding rescale, rotation_range, width_shift_range, height_shift_range,
  # shear_range, zoom_range, and horizontal flip to our ImageDataGenerator
  train_datagen = ImageDataGenerator(
      preprocessing_function=preprocess_input,
      rotation_range=180,
      horizontal_flip=True,
      vertical_flip=True
  )
  val_datagen = ImageDataGenerator(
      preprocessing_function=preprocess_input,
      rotation_range=180,
      horizontal_flip=True,
      vertical_flip=True
  )
  # Note that the validation and test data should not be augmented!
  test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

  # Flow training images in batches of 16 using train_datagen generator
  train_generator = train_datagen.flow_from_directory(
          train_dir,  # This is the source directory for training images
          target_size=input_size,  # All images will be resized to 150x150
          batch_size=16,
          # Since we use binary_crossentropy loss, we need binary labels
          class_mode='sparse', 
          shuffle=True)

  # Flow validation images in batches of 16 using test_datagen generator
  validation_generator = val_datagen.flow_from_directory(
          validation_dir,
          target_size=input_size,
          batch_size=16,
          class_mode='sparse')

  # Flow test images in batches of 16 using test_datagen generator
  test_generator = test_datagen.flow_from_directory(
          test_dir,
          target_size=input_size,
          batch_size=16,
          class_mode='sparse')
  
  return train_generator, validation_generator, test_generator


In [0]:

from keras.preprocessing.image import ImageDataGenerator
import numpy as np

def get_Xception_data_generator(file_num, file_name):
  from keras.applications.xception import preprocess_input
  
  
  print('Xception DATA GENERATOR')
    
  import os
  import zipfile
#   local_zip = 'gdrive/My Drive/Colab Notebooks/UC_Merced/' + file_name
#   local_zip = 'gdrive/My Drive/Colab Notebooks/Sint_Maarten/shape/' + file_name
  local_zip = 'gdrive/My Drive/Colab Notebooks/Sint_Maarten/material/' + file_name


  print(local_zip)
  zip_ref = zipfile.ZipFile(local_zip, 'r')
  zip_ref.extractall('/tmp')
  zip_ref.close()

  base_dir = '/tmp/' + file_name.split(".")[0]
  # base_dir = '/tmp/UC_Merced_filtered_45'

  print(base_dir)

  train_dir = os.path.join(base_dir, 'train')
  validation_dir = os.path.join(base_dir, 'validation')
  test_dir = os.path.join(base_dir, 'test')


  # Adding rescale, rotation_range, width_shift_range, height_shift_range,
  # shear_range, zoom_range, and horizontal flip to our ImageDataGenerator
  train_datagen = ImageDataGenerator(
      preprocessing_function=preprocess_input,
      rotation_range=180,
      horizontal_flip=True,
      vertical_flip=True)

  # Note that the validation and test data should not be augmented!
  val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
  test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

  # Flow training images in batches of 16 using train_datagen generator
  train_generator = train_datagen.flow_from_directory(
          train_dir,  # This is the source directory for training images
          target_size=(299, 299),  # All images will be resized to 150x150
          batch_size=16,
          # Since we use binary_crossentropy loss, we need binary labels
          class_mode='sparse', 
          shuffle=True)

  # Flow validation images in batches of 16 using test_datagen generator
  validation_generator = val_datagen.flow_from_directory(
          validation_dir,
          target_size=(299, 299),
          batch_size=16,
          class_mode='sparse')

  # Flow test images in batches of 16 using test_datagen generator
  test_generator = test_datagen.flow_from_directory(
          test_dir,
          target_size=(299, 299),
          batch_size=100,
          class_mode='sparse')
  
  return train_generator, validation_generator, test_generator



In [0]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, cohen_kappa_score

def eval(prediction_lists, label_lists):

  precision_list = []
  recall_list = []
  fscore_list = []
  support_list = []
  accuracy_list =[]
  kappa_list = []



  for prediction_list, label_list in zip(prediction_lists, label_lists): 

    precision, recall, fscore, support= precision_recall_fscore_support(label_list, prediction_list)
    accuracy = accuracy_score(label_list, prediction_list)
    kappa = cohen_kappa_score(label_list, prediction_list)

    precision_list.append(precision)
    recall_list.append(recall)
    fscore_list.append(fscore)
    support_list.append(support)
    accuracy_list.append(accuracy)
    kappa_list.append(kappa)

  precision_list = np.asarray(precision_list)
  recall_list = np.asarray(recall_list)
  fscore_list = np.asarray(fscore_list)
  support_list = np.asarray(support_list)
  accuracy_list = np.asarray(accuracy_list)
  kappa_list = np.asarray(kappa_list) 

  precision = [np.mean(precision_list, axis=0), np.std(precision_list, axis=0)]
  recall = [np.mean(recall_list, axis=0),  np.std(recall_list, axis=0)]
  fscore = [np.mean(fscore_list, axis=0), np.std(fscore_list, axis=0)]
  support = [np.mean(support_list, axis=0), np.std(support_list, axis=0)]
  accuracy = [np.mean(accuracy_list, axis=0), np.std(accuracy_list, axis=0)]
  kappa = [np.mean(kappa_list, axis=0),  np.std(kappa_list, axis=0)]

  return precision, recall, fscore, support, accuracy, kappa
  

**Load models**

In [0]:
import os

PATH = '/content/gdrive/My Drive/Colab Notebooks/RUNS/UC_Merced/models/' + network +'/trained_models/'
# PATH = '/content/gdrive/My Drive/Colab Notebooks/RUNS/Saint_Martin/Shape/models/' + network +'/trained_models/'
# PATH = '/content/gdrive/My Drive/Colab Notebooks/RUNS/Saint_Martin/Material/models/' + network +'/trained_models/'



model_names = os.listdir(PATH)
model_names.sort()
models = []

for name in model_names:
  
  file_path = os.path.join(PATH, name)
  print(file_path)
  models.append(load_model(file_path))

**Predictions**

In [0]:
df = pd.DataFrame(columns=['network', 'classifier', 'best_parameters', 'precision', 'recall', 'fscore', 'support', 'accuracy', 'kappa', 'labels', 'predictions'])
  
columns = list(df.columns)

**Classification**

In [0]:
def get_data(num, models):
  
  base_model =  models[num]
  model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc2').output)
  
  if num == 0 and len(models) ==  10:
    num = 10
  #   Filename
  file_name = 'UC_Merced_filtered_'+ str(num) +'.zip'
#   file_name = 'roof_shape_'+ str(num*10) +'_0_9' + '.zip'
#   file_name = 'roof_material_'+ str(num*10) +'_0_9' + '.zip'

  #generators
  train_generator, validation_generator, test_generator = get_data_generators(num, network, file_name)

  #Create trainings data
  images_train = []
  labels_train = []
  
  for i in range(2*int(np.ceil(train_generator.samples/train_generator.batch_size))):
    images, labels = train_generator.next()
    images = model.predict(images)

    for image, label in zip(images, labels):
      images_train.append(image)
      labels_train.append(label)

  images_train = np.asarray(images_train)
  labels_train = np.asarray(labels_train)
  

  
  #Create validation data
  images_val = []
  labels_val = []
  
  for i in range(1 *int(np.ceil(validation_generator.samples/validation_generator.batch_size))):
    images, labels = validation_generator.next()
    images = model.predict(images)

    for image, label in zip(images, labels):
      images_val.append(image)
      labels_val.append(label)

  images_val = np.asarray(images_val)
  labels_val = np.asarray(labels_val)

  
  #Create test data
  images_test = []
  labels_test = []
  
  for i in range(int(1 *np.ceil(test_generator.samples/test_generator.batch_size))):
    images, labels = test_generator.next()
    images = model.predict(images)

    for image, label in zip(images, labels):
      images_test.append(image)
      labels_test.append(label)

  images_test = np.asarray(images_test)
  labels_test = np.asarray(labels_test)
  
  return images_train, labels_train, images_val, labels_val, images_test, labels_test


In [0]:
param_dist = {"n_estimators": [100, 200, 400, 800, 1600],
          "min_samples_split":  [2, 5, 10],
          "min_samples_leaf": [1, 2, 4],
          "max_features":['sqrt', 'log2'],
          "max_depth": [20, 40, 60, 80, 100, None]}


In [0]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC


file_name = 'Material_SVM.pkl'


i = 0
for clf_name in ['svm']:
  
  prediction_lists = []
  label_lists = []
  best_parameters_lists = []
  
  for num in range(10):
    
    #data
    images_train, labels_train, images_val, labels_val, images_test, labels_test = get_data(num, models)
    
    images_train = np.concatenate((images_train, images_val), axis=0)
    labels_train = np.concatenate((labels_train, labels_val), axis=0)
    
    print(images_train.shape, images_test.shape)
    
    if clf_name == 'rf':
      
      param_dist = {"n_estimators": [100, 200, 400, 800, 1600],
                    "min_samples_split":  [2, 5, 10],
                    "min_samples_leaf": [1, 2, 4],
                    "max_features":['sqrt', 'log'],
                    "max_depth": [20, 40, 60, 80, 100, None]}
      
      clf = RandomForestClassifier()
      search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                     n_iter=60, cv=3)
      
      search.fit(images_train, labels_train)
      parameters =  list(search.best_params_ .values())
      
    elif clf_name == 'svm':
      
      search = LinearSVC(max_iter=500*5000, C = 1, multi_class='ovr')
      search.fit(images_train, labels_train)
      parameters =  1

    #Predict using classifier
    prediction_list = search.predict(images_test)
    label_list = labels_test

    prediction_lists.append(np.asarray(prediction_list))


    
    label_lists.append(np.asarray(label_list))
    best_parameters_lists.append(parameters)
    
    
  #Create evaluation metrics  
  precision, recall, fscore, support, accuracy, kappa =  eval(prediction_lists, label_lists)

  labels = np.asarray(label_lists).flatten()
  predictions = np.asarray(prediction_lists).flatten()

  print(i, clf_name, accuracy[0])
  i += 1

  #Add to dataframe
  obs = pd.DataFrame([[network, clf_name, best_parameters_lists,  precision, recall, fscore, support, accuracy, kappa, labels, predictions]], columns=columns)
  df = df.append(obs)
  df.to_pickle('/content/gdrive/My Drive/Colab Notebooks/Sint_Maarten/pickles/classification/' + file_name)

