In [None]:
# setup to see the execution time in each cell

!pip install ipython-autotime
!pip install directory_structure
# !pip install wandb
%load_ext autotime

In [None]:
import pandas as pd
import os
import glob
import PIL
from PIL import Image
import numpy as np
import cupy as cp
import matplotlib.pyplot as plt
# import wandb

In [None]:
DRIVER_ROOT_DIR = "/kaggle/input/" # HC Directory
# DRIVER_ROOT_DIR = "/content/drive/MyDrive/DTSC 870/Code" #MT Directory

# no augmentation
# DATASET_02_TRAIN = DRIVER_ROOT_DIR + "fer2013/02_FER/train"
# DATASET_02_TEST = DRIVER_ROOT_DIR + "fer2013/02_FER/test"

# augmentation v.1
# DATASET_02_TRAIN = DRIVER_ROOT_DIR + "fer2013-aug/Aug_train"
# DATASET_02_TEST = DRIVER_ROOT_DIR + "fer2013-aug-test/Aug_test"

# augmentation v.2
DATASET_02_TRAIN = DRIVER_ROOT_DIR + "fer2013-aug-train-2/Aug_train_2"
DATASET_02_TEST = DRIVER_ROOT_DIR + "fer2013-aug-test-2/Aug_test_2"
# DATASET_02_TEST = DRIVER_ROOT_DIR + "d/datasets/huihenrychen/fer2013-aug-test-2/Aug_test_2"

In [None]:
def get_data_df(dir):
    # modified code from: https://www.kaggle.com/namgalielei/simple-load-images-and-count-number-of-each-class

    train_df = pd.DataFrame()

    trainset = glob.glob(dir)

    train_df['file'] = [img.split("/")[-1] for img in trainset]
    train_df['class'] = [img.split("/")[-2] for img in trainset]

    return train_df


def generate_set(df, dir, classes_):

    # new_df = pd.DataFrame()
    pixels = []
    class_ = []

    # trainset = glob.glob(dir)
    for i in range(len(df.index)):
        # get the absolute img path
        # e.g., Brain_tumor_images/<train or test>/<class label>/<file name>
        path = dir + "/" + df.iloc[i]["class"] + "/" +df.iloc[i]["file"]
        # print(img)
        img = Image.open(path)
        # print("Img: {} \tClass: {}".format(np.array(img).flatten(), df.iloc[i]["class"]))
        pixels.append(cp.asnumpy(cp.array(img)).flatten())
        # pixels.append(np.array(img))

        # y_true encoding here
        class_.append(classes_.index(df.iloc[i]["class"]))

        # end loop here

    # return train_df
    return pixels, class_


def feature_scale(_x_train_, _x_test_):
    sc = StandardScaler()
    
    train_sc = sc.fit_transform(_x_train_)
    test_sc = sc.transform(_x_test_)
    
    return train_sc, test_sc



def feature_scale2(_x_train_):
    sc = StandardScaler()
    
    train_sc2 = sc.fit_transform(_x_train_)
    
    return train_sc2

In [None]:
fer_df_train = get_data_df(DATASET_02_TRAIN+"/*/*.jpg")
fer_df_test = get_data_df(DATASET_02_TEST+"/*/*.jpg")

classes = fer_df_train["class"].unique().tolist()

x_train, y_train = generate_set(fer_df_train, DATASET_02_TRAIN, classes)
x_test, y_test = generate_set(fer_df_test, DATASET_02_TEST, classes)

x_train_df = pd.DataFrame()
x_train_df['class'] = y_train

x_test_df = pd.DataFrame()
x_test_df['class'] = y_test

In [None]:
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import StandardScaler
import cv2
import plotly.graph_objects as go
from matplotlib.pyplot import figure

from sklearn.metrics import balanced_accuracy_score, accuracy_score
from numpy import mean
from sklearn.multiclass import OneVsRestClassifier

from cuml.svm import SVC

In [None]:
sc = StandardScaler()

x_train_sc = sc.fit_transform(np.array(x_train))
x_test_sc = sc.transform(np.array(x_test))

In [None]:
model = SVC(random_state=1)

model.fit(np.array(x_train_sc, dtype=np.float32), np.array(y_train, dtype=np.float32))

y_pred = model.predict(x_test_sc)

uw_acc = balanced_accuracy_score(y_test, y_pred)
print("The unweighted accuracy is: {}".format(uw_acc))

w_acc = accuracy_score(y_test, y_pred)
print("The weighted accuracy is: {}".format(w_acc))

In [None]:
from sklearn.metrics import plot_confusion_matrix, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report

In [None]:
for i in range(len(classes)):
    print("[{}: {}]".format(i, classes[i]), end=", ")
    
# disp = plot_confusion_matrix(model, x_test_sc, y_test_hog)
# plt.show()
y_pred = np.array(y_pred, dtype=np.float32)
y_test = np.array(y_test, dtype=np.float32)
conf_matrix = cp.asnumpy(confusion_matrix(y_test, y_pred, normalize="true"))
conf_matrix= np.around(conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis], decimals=2)

fig = px.imshow(conf_matrix, text_auto=True, x=classes, y=classes)
fig.show()

In [None]:
per_class_acc = list()

for i in range(len(classes)):
    row_sum = np.array(conf_matrix[i]).sum()
    acc = (conf_matrix[i][i]/row_sum)*100
    per_class_acc.append(acc)
    
for i in range(len(classes)):
    print("[{}: {} - {}]".format(i, classes[i], per_class_acc[i]), end=", ")

In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import LabelBinarizer
from matplotlib.pyplot import figure

In [None]:
y_pred2 = np.array(y_pred).astype(int)

lb = LabelBinarizer()

end_y_pred2 = lb.fit_transform(y_pred2)
end_y_test_hog = lb.fit_transform(y_test_hog)

In [None]:
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(classes)):
    fpr[i], tpr[i], _ = roc_curve(end_y_test_hog[:,i], end_y_pred2[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
    
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(end_y_test_hog.ravel(), end_y_pred2.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

# Plot ROC curve
plt.figure()
plt.plot(fpr["micro"], tpr["micro"],
         label='micro-average ROC curve (area = {0:0.2f})'
               ''.format(roc_auc["micro"]))
for i in range(len(classes)):
    plt.plot(fpr[i], tpr[i], label='ROC curve of class {0} (area = {1:0.2f})'
                                   ''.format(i, roc_auc[i]))

# figure(figsize=(8, 6), dpi=80)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Some extension of Receiver operating characteristic to multi-class')
plt.legend(bbox_to_anchor=(1.04,1), borderaxespad=0)
plt.show()

In [None]:
target_names = ['Normal', 'Tumor']
print("Classification report for SVM")
y_pred = model.predict(x_test_sc)
print(classification_report(y_test, y_pred, target_names=target_names))
print("\n\n")