In [0]:
# installing packages
# !pip install tensorflow
# !pip install -q keras
# !pip install opencv-python
# !pip install matplotlib
# !pip install tqdm
# !pip install tensorflow-gpu
!pip install scikit-plot

In [0]:
# mounting drive from google drive
from google.colab import drive
drive.mount('/content/drive/')

In [0]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [0]:
# Libraries for random forest
import matplotlib
import math
import scikitplot as skplt
from sklearn.model_selection import train_test_split
from random import shuffle
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

# Libraries for creating training data
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import random
import pickle
from tqdm import tqdm

# Libraries for creating model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D

# Visuallize the training of the model (validation & accuracy loss)
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import TensorBoard
import datetime

In [0]:
# Initializes the paths of the datasets
TRAIN_DATA_DIR = "images/JAFFESorted" # Relative directory of the images folder
#TRAIN_DATA_DIR = "images/FEISorted"
#TRAIN_DATA_DIR = "images/FEI+JAFFE"
TEST_DATA_DIR = "images/Testing Data Suzy"

# Initializes the paths of the training pickles
#
# X pickles
train_dir_X = "pickle/X_JAFFE.pickle"
#train_dir_X = "pickle/X_FEI.pickle"
#train_dir_X = "pickle/X_FEI.pickle"

# y pickles
train_dir_y = "pickle/y_JAFFE.pickle"
#train_dir_y = "pickle/y_FEI.pickle"
#train_dir_y = "pickle/y_FEI_JAFFE.pickle"

# Initializes the paths of the test pickles
test_dir_X = "pickle/X_SUZY.pickle"
test_dir_y = "pickle/y_SUZY.pickle"

CATEGORIES = ["Good", "Bad"] # Stores the output classifiers
IMG_SIZE = 100 # Shape of img, 100 x 100 pixels

In [0]:
# Function to create training data
# Returns training_data array
def create_array_data(data_directory, categories, img_size):
    array_data = []
    
    # The following loop iterates through all image paths, converts them to grayscale and stores them in an array
    for category in categories:
        path = os.path.join(data_directory, category)
        class_num = categories.index(category) # Maps indices 0 or 1 for "Good" or "Bad"
        for img in tqdm(os.listdir(path)):
            try:
                img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE) # Reads current img file, convert it to grayscale and stores them in img_array
                new_array = cv2.resize(img_array, (img_size, img_size)) # Resizes img_array's resolution according to given img_size
                array_data.append([new_array, class_num]) # Appends new image array to array_data
            except Exception as e: # ignores errors from weird img files
                pass
    return array_data

# Function to create pickle
# void function
def create_pickle_data(data_dir, categories, dir_X, dir_y, img_size):
    # 50 : 50 ratio is the best, shuffling training data makes learning better
    general_data = create_array_data(data_dir, categories, img_size) # create training/test data set
    random.shuffle(general_data) # shuffles training/test dataset

    # X is for feature set, y is for labels
    # can specify validation set instead of splitting into x, y
    X = []
    y = []

    for features, label in tqdm(general_data):
        X.append(features)
        y.append(label)
        
    # keras only accepts np arrays
    X = np.array(X).reshape(-1, img_size, img_size, 1) # change 1 to 3 for pictures with colors
    y = np.array(y)

    # Exports data as pickle
    pickle_out = open(dir_X, "wb")
    pickle.dump(X, pickle_out)
    pickle_out.close()

    pickle_out = open(dir_y, "wb")
    pickle.dump(y, pickle_out)
    pickle_out.close()

In [0]:
# Creates training pickle data
create_pickle_data(TRAIN_DATA_DIR, CATEGORIES, train_dir_X, train_dir_y, IMG_SIZE)

In [0]:
# Creates test pickle data
create_pickle_data(TEST_DATA_DIR, CATEGORIES, test_dir_X, test_dir_y, IMG_SIZE)

In [0]:
# Loads data from pickle
X_train = pickle.load(open(train_dir_X, "rb"))
y_train = pickle.load(open(train_dir_y, "rb"))

# Normalize data by scaling (changing pixel intensity values)
# min value = 0, max value = 255 for pixels
X_train = X_train/255.0

# Loads data pickle
X_test = pickle.load(open(test_dir_X, "rb"))
y_test = pickle.load(open(test_dir_y, "rb"))

# Normalize data by scaling (changing pixel intensity values)
# min value = 0, max value = 255 for pixels
X_test = X_test/255.0

In [0]:
# Machine Learning
# Random Forest Classifier

# Initializing the classifier
rfc = RandomForestClassifier(n_jobs=-1, n_estimators=35) #10,30,100
X_train_rfc = X_train
y_train_rfc = y_train
X_test_rfc = X_test
y_test_rfc = y_test

# Fitting the model
rfc.fit(X_train_rfc.reshape(len(X_train_rfc), -1), y_train_rfc)

In [0]:
rfc.score(X_test_rfc.reshape(len(X_test_rfc), -1), y_test_rfc)
y_pred = rfc.predict(X_test_rfc.reshape(len(X_test_rfc), -1)) # obtain the RFC predictions and save into variale

In [0]:
plt.figure(figsize = (12,8)) #determine size of confusion matrix
f,ax=plt.subplots(1, 1, figsize = (12,12))

skplt.metrics.plot_confusion_matrix(y_test_rfc, y_pred, normalize = 'true', ax = ax) #plot the confusion matrix
plt.show()


In [0]:
from sklearn import svm 

model_svm = svm.SVC(gamma="scale",C=5,kernel="sigmoid") #use the Support vector classifier class 
model_svm.fit(X_train.reshape(len(X_train), -1),y_train)


In [0]:
#model_svm.score(X_test_rfc.reshape(len(X_test_rfc), -1), y_test_rfc)
model_svm_labels_predict = model_svm.predict(X_test.reshape(len(X_test), -1))


In [0]:
from sklearn.metrics import roc_curve, auc,roc_auc_score

fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(2):
    fpr[i], tpr[i], _ = roc_curve(y_test, model_svm_labels_predict)
    roc_auc[i] = auc(fpr[i], tpr[i])

print("Area under curve:",roc_auc_score(y_test, model_svm_labels_predict))
matplotlib.pyplot.figure()
matplotlib.pyplot.plot(fpr[1], tpr[1])
matplotlib.pyplot.xlim([0.0, 1.0])
matplotlib.pyplot.ylim([0.0, 1.05])
matplotlib.pyplot.xlabel('False Positive Rate')
matplotlib.pyplot.ylabel('True Positive Rate')
matplotlib.pyplot.title('Receiver operating characteristic')
matplotlib.pyplot.show()

In [0]:
# Deep Learning
# Convolutional Neural Network 

# Building the models
dense_layers = [2]
layer_sizes = [16]
conv_layers = [2]
filter_sizes = [3]
dropout_values = [0.2]

# Iterates through the arrays, generates layers or change values based on the array
for dense_layer in dense_layers:
    for layer_size in layer_sizes:
        for conv_layer in conv_layers:
            for filter_size in filter_sizes:
                for dropout_value in dropout_values:
                    # Log file name
                    # tensorboard intialization for training visualization
                    DATE = datetime.datetime.now().strftime("%Y/%m/%d-%H/%M/%S")
                    NAME = "jaffe-{}-conv-{}-nodes-{}-dense-{}-filter_size-{}-dropout-{}".format(conv_layer, layer_size, dense_layer, filter_size, dropout_value, DATE)
                    logfolderdir = 'logs/jaffe/' # change directory to your pc
                    logdir = logfolderdir + NAME
                    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir, histogram_freq=1)

                    print("\n" + NAME)
                    
                    # Building the model
                    # Sequential is appropriate for a plain stack of layers where each layer has exactly one input tensor and one output tensor.
                    model = Sequential()

                    # Input image
                    model.add(Conv2D(layer_size, (filter_size, filter_size), input_shape=X_train.shape[1:]))
                    model.add(Activation('relu'))
                    model.add(MaxPooling2D(pool_size=(2, 2)))

                    for layer in range(conv_layer - 1):
                        model.add(Conv2D(layer_size, (filter_size, filter_size)))
                        model.add(Activation('relu'))
                        model.add(MaxPooling2D(pool_size = (2, 2)))

                    model.add(Flatten())
                    for layer in range(dense_layer):
                        model.add(Dense(layer_size))
                        model.add(Activation('relu'))
                        model.add(Dropout(dropout_value))

                    model.add(Dense(1))
                    model.add(Activation('sigmoid'))

                    model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy', 'Precision'])

                    # Training the model
                    training = model.fit(X_train, y_train, batch_size = 32, epochs = 20, validation_split = 0.3, callbacks = [tensorboard_callback])
                    evaluate_arr = model.evaluate(X_test , y_test , batch_size = 1)

In [0]:
# Load tensorboard webpage to monitor data
%tensorboard --logdir 'logs/jaffe/'