In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
from google.colab import drive
%matplotlib inline
from skimage import io, color
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import tensorflow as tf  
from keras.utils import to_categorical
import glob
import re
from tensorflow.python.keras import applications
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator 
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.models import model_from_json
from tensorflow.python.keras.models import Sequential, Model, load_model
from tensorflow.python.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, GlobalAveragePooling2D
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import ClusterCentroids

Using TensorFlow backend.


In [2]:
# mount drive on Google drive to access training data
# Ignore this if you don't use Google Colab
drive.mount("/content/drive")

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# Access training data in My Drive
train_path = "/content/drive/My Drive/ECE_542/TrainData-C2/"
# get training labels
train_labels = pd.read_csv("/content/drive/My Drive/ECE_542/TrainData-C2/TrainAnnotations.csv")
train_labels.sort_values(by=["file_name"], inplace=True)
train_files = glob.glob(train_path + "*.jpg")
train_files.sort()

# Helper function

In [0]:
def extract_data(file_names, labels, size=None):
  """
  Extract all images given list of file names and list of labels.  Also resize
  images according to user-defined size
  Inputs:
  - filenames: list of file paths to images
  - labels: list of label of each image; order based on the order of filenames
  Outputs:
  - images: list of RGB images
  - annotations: list of labels for the images
  """
  images = []
  annotations = []
  for idx, f in enumerate(file_names):
    img = io.imread(f)
    if size is not None:
      img = resize(img, (size, size), anti_aliasing=True)
    images.append(img)
    annotations.append(labels[idx])
  return images, annotations

In [0]:
def RGB2HSV(images, hue=False):
  """
  Convert all RGB images into HSV channel
  Input:
  - images: list of images of shape (H, W, 3)
  """
  hsv = []
  for img in images:
    if hue:
      hsv.append(color.rgb2hsv(img)[:,:,0])
    else:
      hsv.append(color.rgb2hsv(img))
  return hsv

In [0]:
def eval_report(model, test_data, ground_truth):
  p = model.predict(test_data)
  Yhat = np.argmax(p, axis=1)
  return classification_report(ground_truth, Yhat)

# preparing data

In [0]:
# This cell may take awhile to run
# extract all data
images, labels = extract_data(train_files, train_labels.annotation, 224)
# convert RGB to HSV
HSV = RGB2HSV(images)
# convert RGB into HUE
HUE = RGB2HSV(images, hue=True)

In [8]:
# Split training and validation test set
Y = to_categorical(labels)
X_train, X_val, Y_train, Y_val = train_test_split(np.array(images), Y, test_size=0.3, random_state=32)
X_train, X_val_hsv, Y_train, Y_val_hsv = train_test_split(np.array(HSV), Y, test_size=0.3, random_state=32)
X_train, X_val_hue, Y_train, Y_val_hue = train_test_split(np.array(HUE), Y, test_size=0.3, random_state=32)

# access validation data
n_val, H, W, C = X_val.shape 
X_val = np.reshape(X_val, (n_val, H*W*C))
X_val_hsv = np.reshape(X_val_hsv, (n_val, H*W*C))
X_val_hue = np.reshape(X_val_hue, (n_val, H*W))

# undersampling the validation set
cc = ClusterCentroids(random_state=32)
X_val, Y_val = cc.fit_resample(X_val, Y_val)
cc = ClusterCentroids(random_state=32)
X_val_hsv, Y_val_hsv = cc.fit_resample(X_val_hsv, Y_val_hsv)
cc = ClusterCentroids(random_state=32)
X_val_hue, Y_val_hue = cc.fit_resample(X_val_hue, Y_val_hue)

# reshape validation data to prepare for testing
X_val = np.reshape(X_val, (len(X_val), H, W, C))
X_val_hsv = np.reshape(X_val_hsv, (len(X_val_hsv), H, W, C))
X_val_hue = np.reshape(X_val_hue, (len(X_val_hue), H, W, 1))

# Checking the shape of training and validation data
print(X_val.shape)
print(X_val_hsv.shape)
print(X_val_hue.shape)



(190, 224, 224, 3)
(190, 224, 224, 3)
(190, 224, 224, 1)


In [9]:
# class distribution of validation set
print(np.sum(Y_val, axis=0))

[38 38 38 38 38]


# Load models to compute evaluation metrics

In [29]:
model_path = "/content/drive/My Drive/ECE_542/models/"
# Final resnet models in RGB, HSV and HUE
m1 = load_model(model_path + "resnet_SGD.h5")
m2 = load_model(model_path + "resnet_SGD_final_hsv.h5")
m3 = load_model(model_path + "resnet_SGD_final_hue.h5")

# CNN models in RGB, HSV and HUE
m4 = load_model(model_path + "CNN_RGB.h5")
m5 = load_model(model_path + "CNN_HSV.h5")
m6 = load_model(model_path + "CNN_Hue.h5")

# CNN models with ImageGenerator in RGB, HSV and HUE
m7 = load_model(model_path + "VGG16_RGB.h5")
m8 = load_model(model_path + "VGG16_HSV.h5")
m9 = load_model(model_path + "VGG16_HUE.h5")



### Loss and accuracy

In [30]:
# compute models' loss and accuracy
print("Resnet models' loss and accuracy")
m1.evaluate(X_val, Y_val)
m2.evaluate(X_val_hsv, Y_val_hsv)
m3.evaluate(X_val_hue, Y_val_hue)
print("CNN models' loss and accuracy")
m4.evaluate(X_val, Y_val)
m5.evaluate(X_val_hsv, Y_val_hsv)
m6.evaluate(X_val_hue, Y_val_hue)
print("VGG16 models' loss and accuracy")
m7.compile(optimizer="SGD", loss="categorical_crossentropy", metrics=["accuracy"])
m7.evaluate(X_val, Y_val)
m8.compile(optimizer="SGD", loss="categorical_crossentropy", metrics=["accuracy"])
m8.evaluate(X_val_hsv, Y_val_hsv)
m9.compile(optimizer="SGD", loss="categorical_crossentropy", metrics=["accuracy"])
m9.evaluate(X_val_hue, Y_val_hue)

Resnet models' loss and accuracy
CNN models' loss and accuracy
VGG16 models' loss and accuracy


[1.6095185279846191, 0.20000000298023224]

### Resnet models' evaluation matrices

In [0]:
# compute other evaluation metrics
Models = [m1, m2, m3]
Data = [X_val, X_val_hsv, X_val_hue]
Reports = [eval_report(Models[i], Data[i], np.argmax(Y_val, axis=1)) for i in range(3)]
for r in Reports: print(r)

              precision    recall  f1-score   support

           0       0.58      0.95      0.72        38
           1       0.41      0.34      0.37        38
           2       0.59      0.58      0.59        38
           3       0.89      0.82      0.85        38
           4       0.79      0.50      0.61        38

    accuracy                           0.64       190
   macro avg       0.65      0.64      0.63       190
weighted avg       0.65      0.64      0.63       190

              precision    recall  f1-score   support

           0       0.70      0.37      0.48        38
           1       0.42      0.37      0.39        38
           2       0.48      0.84      0.61        38
           3       0.81      0.66      0.72        38
           4       0.56      0.58      0.57        38

    accuracy                           0.56       190
   macro avg       0.59      0.56      0.56       190
weighted avg       0.59      0.56      0.56       190

              precisio

### CNN models' evaluation matrices

In [0]:
# compute other evaluation metrics
Models = [m4, m5, m6]
Data = [X_val, X_val_hsv, X_val_hue]
Reports = [eval_report(Models[i], Data[i], np.argmax(Y_val, axis=1)) for i in range(3)]
for r in Reports: print(r)

              precision    recall  f1-score   support

           0       0.74      0.61      0.67        38
           1       0.51      0.53      0.52        38
           2       0.62      0.87      0.73        38
           3       0.97      0.74      0.84        38
           4       0.66      0.66      0.66        38

    accuracy                           0.68       190
   macro avg       0.70      0.68      0.68       190
weighted avg       0.70      0.68      0.68       190

              precision    recall  f1-score   support

           0       0.71      0.95      0.81        38
           1       0.47      0.68      0.56        38
           2       0.92      0.58      0.71        38
           3       0.91      0.82      0.86        38
           4       0.65      0.45      0.53        38

    accuracy                           0.69       190
   macro avg       0.73      0.69      0.69       190
weighted avg       0.73      0.69      0.69       190

              precisio

  _warn_prf(average, modifier, msg_start, len(result))


### VGG16 models' evaluation matrices



In [31]:
# compute other evaluation metrics
Models = [m7, m8, m9]
Data = [X_val, X_val_hsv, X_val_hue]
Reports = [eval_report(Models[i], Data[i], np.argmax(Y_val, axis=1)) for i in range(3)]
for r in Reports: print(r)

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00        38
           1       0.00      0.00      0.00        38
           2       0.20      1.00      0.33        38
           3       0.00      0.00      0.00        38
           4       0.00      0.00      0.00        38

    accuracy                           0.20       190
   macro avg       0.04      0.20      0.07       190
weighted avg       0.04      0.20      0.07       190

              precision    recall  f1-score   support

           0       0.28      0.79      0.42        38
           1       0.50      0.03      0.05        38
           2       0.16      0.13      0.14        38
           3       0.50      0.16      0.24        38
           4       0.45      0.45      0.45        38

    accuracy                           0.31       190
   macro avg       0.38      0.31      0.26       190
weighted avg       0.38      0.31      0.26       190

              precisio