In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [2]:
### Import necessary libraries ###
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
np.random.seed(42)

In [3]:
### Hyperparameters ###
re_size = (196, 196)  #(144, 260)
svc_c = 5.0
k = 3
num_classes = 5

In [4]:
### Prepare dataset ###
# Dictionaries for label-int transformation
str2int = {'Crayon_Shin': 0, 'Doraemon': 1, 'Hua_Family': 2, 'Ilu': 3, 'Maruko': 4}
int2str = {0: 'Crayon_Shin', 1: 'Doraemon', 2: 'Hua_Family', 3: 'Ilu', 4: 'Maruko'}


# CustomDataset: load formatted dataset
def CustomDataset(directory):
  data = []
  labels = []

  # Traverse the directories of each class
  for root, dirs, files in os.walk(directory):

    # Traverse the images of each directory
    for d in dirs:
      images = os.listdir(os.path.join(root, d))

      # Image pre-processing: resize & normalize
      for img_path in images:
        img = cv2.imread(os.path.join(root, d, img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, re_size).flatten()
        data.append(img/255.0)

        labels.append(str2int[d])

  # Return images along with corresponding labels
  return np.array(data), np.array(labels)


# Load formatted dataset from the directory "AI_Project1/data"
data, labels = CustomDataset(os.path.join(os.getcwd(), 'gdrive/MyDrive/AI_Project1/data'))
# Split the dataset into train/test subsets, with an 8:2 ratio
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)

In [None]:
"""
### Apply PCA(optional) ###
from sklearn.decomposition import PCA

n_components = 300

# Define PCA model
pca = PCA(n_components=n_components)

# Fit the model with training set
X_train = pca.fit_transform(X_train)
# Transfor testing set with trained model
X_test = pca.transform(X_test)
"""

In [None]:
### Train SVM ###
# CustomScoring: show confusion matrix and return accuracy
def CustomScoring(estimator, X, y):
    pred = estimator.predict(X)

    acc = accuracy_score(y, pred)
    cnf = confusion_matrix(y, pred)
    print("Confusion_Matrix:\n{}\n".format(cnf))

    return acc


# Define SVM model
model = SVC(C=svc_c)
# Apply cross-validation on training
result = cross_validate(model, X_train, y_train, cv=k, scoring=CustomScoring, return_estimator=True)
# Show the results
print(result['test_score'])


# Choose the best trained model
best_idx = result['test_score'].argmax()
model = result['estimator'][best_idx]

# Retrain the model with the whole training set
model.fit(X_train, y_train)

In [None]:
### Test ###
# Make prediction with the trained model
y_pred = model.predict(X_test)

# Show the prediciton result
print("Final Accuracy: {:.4f}".format(accuracy_score(y_test, y_pred)))
print("\nFinal Confusion_Matrix:\n{}".format(confusion_matrix(y_test, y_pred)))

In [None]:
### Show results ###
import matplotlib.pyplot as plt

# Show the image and its prediction
for i in range(10):
  image = X_test[i].reshape((re_size[0], re_size[1], 3))

  plt.imshow(image)
  plt.text(0, -0.1, "Pred : {}\nLabel: {}".format(int2str[y_pred[i]], int2str[y_test[i]]), transform=plt.gca().transAxes)
  plt.axis('off')

  plt.savefig(os.path.join(os.getcwd(), f'gdrive/MyDrive/AI_Project1/result/SVM/prediction_{i+1}.jpg'))
  plt.show()

  plt.clf()