# Setup

In [None]:
# setup to see the execution time in each cell

!pip install ipython-autotime
!pip install directory_structure
# !pip install wandb
%load_ext autotime

In [None]:
import pandas as pd
import os
import glob
import PIL
from PIL import Image
import numpy as np
import cupy as cp
import matplotlib.pyplot as plt
# import wandb

In [None]:
DRIVER_ROOT_DIR = "/kaggle/input/" # HC Directory
# DRIVER_ROOT_DIR = "/content/drive/MyDrive/DTSC 870/Code" #MT Directory

# no augmentation
# DATASET_02_TRAIN = DRIVER_ROOT_DIR + "fer2013/02_FER/train"
# DATASET_02_TEST = DRIVER_ROOT_DIR + "fer2013/02_FER/test"

# augmentation v.1
# DATASET_02_TRAIN = DRIVER_ROOT_DIR + "fer2013-aug/Aug_train"
# DATASET_02_TEST = DRIVER_ROOT_DIR + "fer2013-aug-test/Aug_test"

# augmentation v.2
DATASET_02_TRAIN = DRIVER_ROOT_DIR + "fer2013-aug-train-2/Aug_train_2"
DATASET_02_TEST = DRIVER_ROOT_DIR + "d/datasets/huihenrychen/fer2013-aug-test-2/Aug_test_2"

In [None]:
def get_data_df(dir):
    # modified code from: https://www.kaggle.com/namgalielei/simple-load-images-and-count-number-of-each-class

    train_df = pd.DataFrame()

    trainset = glob.glob(dir)

    train_df['file'] = [img.split("/")[-1] for img in trainset]
    train_df['class'] = [img.split("/")[-2] for img in trainset]

    return train_df

In [None]:
def generate_set(df, dir, classes_):

    # new_df = pd.DataFrame()
    pixels = []
    class_ = []

    # trainset = glob.glob(dir)
    for i in range(len(df.index)):
        # get the absolute img path
        # e.g., Brain_tumor_images/<train or test>/<class label>/<file name>
        path = dir + "/" + df.iloc[i]["class"] + "/" +df.iloc[i]["file"]
        # print(img)
        img = Image.open(path)
        # print("Img: {} \tClass: {}".format(np.array(img).flatten(), df.iloc[i]["class"]))
        pixels.append(cp.asnumpy(cp.array(img)).flatten())
        # pixels.append(np.array(img))

        # y_true encoding here
        class_.append(classes_.index(df.iloc[i]["class"]))

        # end loop here

    # return train_df
    return pixels, class_

In [None]:
def feature_scale(_x_train_, _x_test_):
    sc = StandardScaler()
    
    train_sc = sc.fit_transform(_x_train_)
    test_sc = sc.transform(_x_test_)
    
    return train_sc, test_sc


def feature_scale2(_x_train_):
    sc = StandardScaler()
    
    train_sc2 = sc.fit_transform(_x_train_)
    
    return train_sc2

## Generate Train and Test sets

In [None]:
fer_df_train = get_data_df(DATASET_02_TRAIN+"/*/*.jpg")
fer_df_test = get_data_df(DATASET_02_TEST+"/*/*.jpg")

In [None]:
classes = fer_df_train["class"].unique().tolist()
classes
# classes.index("surprise")

In [None]:
x_train, y_train = generate_set(fer_df_train, DATASET_02_TRAIN, classes)
x_test, y_test = generate_set(fer_df_test, DATASET_02_TEST, classes)

## EDA

In [None]:
fer_df_test.head()

In [None]:
fer_df_train.shape

In [None]:
x_train_df = pd.DataFrame()
x_train_df['class'] = y_train

x_test_df = pd.DataFrame()
x_test_df['class'] = y_test

x_train_df

In [None]:
x_test_df["class"].hist()

## Class weights

In [None]:
print("Classes: {}".format(classes))
print("X train shape: {}".format(np.array(x_train).shape))
print("X test shape: {}".format(np.array(x_test).shape))

# total samples
N = np.array(x_train).shape[0] + np.array(x_test).shape[0]
print("Total sample size: {}".format(N))


# total class sample count
train_set_count = np.bincount(np.array(y_train))
test_set_count = np.bincount(np.array(y_test))
print("Train set: {}".format(train_set_count))
print("Test set: {}".format(test_set_count))
# print(type(test_set_count))


# assign weights to each classes
c_weight = {}
class_len = len(classes)
for i in range(len(classes)):
    c_weight[i] = N/(class_len*(train_set_count[i] + test_set_count[i]))
    
print("The class weights: {}".format(c_weight))

In [None]:
# np.unique(y_train_hog)

# HOG

In [None]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import StandardScaler
import cv2
from skimage.feature import local_binary_pattern
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from matplotlib.pyplot import figure

from skimage.feature import hog
from skimage.transform import resize

In [None]:
def generate_hog_feature(df, dir_, classes_):
  # Generate train and test sets through OpenCV with grayscale in all pixels

    pixels = []
    class_ = []
    eps = 1e-7 # 0.0000001
    ppc = 16

    # trainset = glob.glob(dir_)
    for i in range(len(df.index)):
        # get the absolute img path
        # e.g., Brain_tumor_images/<train or test>/<class label>/<file name>
        path = dir_ + "/" + df.iloc[i]["class"] + "/" +df.iloc[i]["file"]
        img = cv2.imread(path)

        # convert the read img into grayscale
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        fd = hog(img_gray, orientations=9, pixels_per_cell=(ppc,ppc), cells_per_block=(2, 2), block_norm= 'L2', feature_vector=True)

        pixels.append(fd)

        # y_true encoding here
        class_.append(classes_.index(df.iloc[i]["class"]))
        # end loop
        
    return pixels, class_

In [None]:
x_train_hog, y_train_hog = generate_hog_feature(fer_df_train, DATASET_02_TRAIN, classes)
x_test_hog, y_test_hog = generate_hog_feature(fer_df_test, DATASET_02_TEST, classes)

# Model

In [None]:
from sklearn.metrics import balanced_accuracy_score, accuracy_score
from numpy import mean
from sklearn.multiclass import OneVsRestClassifier

from cuml.svm import SVC
# from sklearn.svm import SVC

## Training

In [None]:
c_weight

In [None]:
def hog_svm_train(x_train_hog, y_train_hog, x_test_hog, y_test_hog, weights=None):
    # uw_acc_result, w_acc_result = list(), list()
    
    x_train_sc, x_test_sc = feature_scale(x_train_hog, x_test_hog)
    x_train_sc, x_test_sc = np.array(x_train_sc, dtype=np.float32), np.array(x_test_sc, dtype=np.float32)
    y_train = np.array(y_train_hog, dtype=np.float32)
    
    # model = SVC(random_state=1, multiclass_strategy="ovr", class_weight=weights)
    model = OneVsRestClassifier(SVC(random_state=1, multiclass_strategy="ovr", kernel="poly", degree=5))
#     model = (SVC(random_state=1, multiclass_strategy="ovr", kernel="poly", degree=4))
    
    model.fit(x_train_sc, y_train)
    
    # predict the train set
    y_trainHat = model.predict(x_train_sc)

    # predict the train set
    y_testHat = model.predict(x_test_sc)

    # compute the unweighted accuracy
    uw_acc = balanced_accuracy_score(y_test_hog, y_testHat)
    # uw_acc_result.append(uw_acc)
    print("The unweighted accuracy: {}".format(uw_acc))

    # compute the weighted accuracy
    w_acc = accuracy_score(y_test_hog, y_testHat)
    print("The weighted accuracy: {}".format(w_acc))
    # w_acc_result.append(w_acc)
        
    # return uw_acc_result, w_acc_result
    return model, uw_acc, w_acc
    # return uw_acc, w_acc

In [None]:
x_train_sc, x_test_sc = feature_scale(x_train_hog, x_test_hog)
x_train_sc, x_test_sc = np.array(x_train_sc), np.array(x_test_sc)

In [None]:
# none class weights training
# model, uw_acc, w_acc = hog_svm_train(x_train_hog, y_train_hog, x_test_hog, y_test_hog)

# class weights training
model, uw_acc, w_acc = hog_svm_train(x_train_hog, y_train_hog, x_test_hog, y_test_hog, weights=c_weight)

In [None]:
model.get_params()

In [None]:
# wandb.init(
#   project="LBP_81_PCA_SVM",
#   notes="HoG on img aug SVM",
# )

'''
d = 2
unweighted = 0.334569889635343
weighted = 0.38123933261346615

d = 3
unweighted = 0.45263031699835427
weighted = 0.48859242746177156

d = 4
unweighted = 0.5026142830819644
weighted = 0.5353373506565885

d = 5
unweighted = 0.5088910529663084
weighted = 0.5450207252081228

d = 6
unweighted = 0.5055772284402401
weighted = 0.5414330001044968

d = 7
unweighted = 0.49228652039157933
weighted = 0.5312271413145704

d = 8
unweighted = 0.4803844583237599
weighted = 0.5214392699153576


'''



# Result Analysis

In [None]:
plt.bar(x="unweighted", height=uw_acc)
plt.bar(x="weighted", height=w_acc)
plt.title("HoG on img augmentation SVM accuracy")
plt.ylabel("Accuracy (%)")
plt.xlabel("SVM accuracy type")
plt.show()

In [None]:
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import classification_report
from cuml.metrics import confusion_matrix

import plotly.express as px

In [None]:
for i in range(len(classes)):
    print("[{}: {}]".format(i, classes[i]), end=", ")
    
# disp = plot_confusion_matrix(model, x_test_sc, y_test_hog)
# plt.show()
y_pred = np.array(model.predict(x_test_sc), dtype=np.int32)
y_test_hog = np.array(y_test_hog, dtype=np.int32)
conf_matrix = cp.asnumpy(confusion_matrix(y_test_hog, y_pred, normalize="true"))
conf_matrix= np.around(conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis], decimals=2)

fig = px.imshow(conf_matrix, text_auto=True, x=classes, y=classes)
fig.show()

# plt.title("Confusion Matrix wtih " + str(item['variance_']) + "% variance (PCA)")
# disp.confusion_matrix

In [None]:
# print(conf_matrix)
# row1_sum = np.array(disp.confusion_matrix[0]).sum()
# row2_sum = np.array(disp.confusion_matrix[1]).sum()

# acc_Normal = (disp.confusion_matrix[0][0] / row1_sum)*100
# acc_Tumor = (disp.confusion_matrix[1][1] / row2_sum)*100

per_class_acc = list()

for i in range(len(classes)):
    row_sum = np.array(conf_matrix[i]).sum()
    acc = (conf_matrix[i][i]/row_sum)*100
    per_class_acc.append(acc)
    
for i in range(len(classes)):
    print("[{}: {} - {}]".format(i, classes[i], per_class_acc[i]), end=", ")
    

In [None]:
# classes

print("Classification report for HoG img augmentation SVM")
y_pred = model.predict(x_test_sc)
print(classification_report(y_test_hog, y_pred, target_names=classes))

In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import LabelBinarizer
from matplotlib.pyplot import figure

In [None]:
y_pred2 = np.array(y_pred).astype(int)

lb = LabelBinarizer()

end_y_pred2 = lb.fit_transform(y_pred2)
end_y_test_hog = lb.fit_transform(y_test_hog)

In [None]:
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(classes)):
    fpr[i], tpr[i], _ = roc_curve(end_y_test_hog[:,i], end_y_pred2[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
    
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(end_y_test_hog.ravel(), end_y_pred2.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

# Plot ROC curve
plt.figure()
plt.plot(fpr["micro"], tpr["micro"],
         label='micro-average ROC curve (area = {0:0.2f})'
               ''.format(roc_auc["micro"]))
for i in range(len(classes)):
    plt.plot(fpr[i], tpr[i], label='ROC curve of class {0} (area = {1:0.2f})'
                                   ''.format(i, roc_auc[i]))

# figure(figsize=(8, 6), dpi=80)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Some extension of Receiver operating characteristic to multi-class')
plt.legend(bbox_to_anchor=(1.04,1), borderaxespad=0)
plt.show()