In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python

import io # Input/Output Module
import os # OS interfaces
import cv2 # OpenCV package
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import statistics
import warnings

from urllib import request # module for opening HTTP requests
from matplotlib import pyplot as plt # Plotting library

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import seaborn as sns
import statistics
from sklearn.manifold import TSNE

import sklearn
from sklearn.decomposition import PCA
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from scipy.spatial.distance import cdist
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam,RMSprop,SGD,Adamax
from tensorflow.keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau


<div style="width:100%; height:140px">
    <img src="https://www.kuleuven.be/internationaal/thinktank/fotos-en-logos/ku-leuven-logo.png/image_preview" width = 300px, heigh = auto align=left>
</div>


KUL H02A5a Computer Vision: Group Assignment 1
---------------------------------------------------------------
Student numbers: <span style="color:red">r1, r0927846, r3, r4, r5</span>.

The goal of this assignment is to explore more advanced techniques for constructing features that better describe objects of interest and to perform face recognition using these features. This assignment will be delivered in groups of 5 (either composed by you or randomly assigned by your TA's).

In this assignment you are a group of computer vision experts that have been invited to ECCV 2021 to do a tutorial about  "Feature representations, then and now". To prepare the tutorial you are asked to participate in a kaggle competition and to release a notebook that can be easily studied by the tutorial participants. Your target audience is: (master) students who want to get a first hands-on introduction to the techniques that you apply.

---------------------------------------------------------------
This notebook is structured as follows:
0. Data loading & Preprocessing
1. Feature Representations
2. Evaluation Metrics 
3. Classifiers
4. Experiments
5. Publishing best results
6. Discussion

Make sure that your notebook is **self-contained** and **fully documented**. Walk us through all steps of your code. Treat your notebook as a tutorial for students who need to get a first hands-on introduction to the techniques that you apply. Provide strong arguments for the design choices that you made and what insights you got from your experiments. Make use of the *Group assignment* forum/discussion board on Toledo if you have any questions.

Fill in your student numbers above and get to it! Good luck! 


<div class="alert alert-block alert-info">
<b>NOTE:</b> This notebook is just a example/template, feel free to adjust in any way you please! Just keep things organised and document accordingly!
</div>

<div class="alert alert-block alert-info">
<b>NOTE:</b> Clearly indicate the improvements that you make!!! You can for instance use titles like: <i>3.1. Improvement: Non-linear SVM with RBF Kernel.<i>
</div>
    
---------------------------------------------------------------
# 0. Data loading & Preprocessing

## 0.1. Loading data
The training set is many times smaller than the test set and this might strike you as odd, however, this is close to a real world scenario where your system might be put through daily use! In this session we will try to do the best we can with the data that we've got! 

In [None]:
# Input data files are available in the read-only "../input/" directory

train = pd.read_csv(
    '/kaggle/input/kul-h02a5a-computer-vision-ga1-2023/train_set.csv', index_col = 0)
train.index = train.index.rename('id')

test = pd.read_csv(
    '/kaggle/input/kul-h02a5a-computer-vision-ga1-2023/test_set.csv', index_col = 0)
test.index = test.index.rename('id')

# read the images as numpy arrays and store in "img" column
train['img'] = [cv2.cvtColor(np.load('/kaggle/input/kul-h02a5a-computer-vision-ga1-2023/train/train_{}.npy'.format(index), allow_pickle=False), cv2.COLOR_BGR2RGB) 
                for index, row in train.iterrows()]

test['img'] = [cv2.cvtColor(np.load('/kaggle/input/kul-h02a5a-computer-vision-ga1-2023/test/test_{}.npy'.format(index), allow_pickle=False), cv2.COLOR_BGR2RGB) 
                for index, row in test.iterrows()]
  

train_size, test_size = len(train),len(test)

"The training set contains {} examples, the test set contains {} examples.".format(train_size, test_size)

*Note: this dataset is a subset of the* [*VGG face dataset*](https://www.robots.ox.ac.uk/~vgg/data/vgg_face/).

## 0.2. A first look
Let's have a look at the data columns and class distribution.

In [None]:
# The training set contains an identifier, name, image information and class label
train.head(1)

In [None]:
# The test set only contains an identifier and corresponding image information.

test.head(1)

In [None]:
# The class distribution in the training set:
train.groupby('name').agg({'img':'count', 'class': 'max'})

Note that **Jesse is assigned the classification label 1**, and **Mila is assigned the classification label 2**. The dataset also contains 20 images of **look alikes (assigned classification label 0)** and the raw images. 

## 0.3. Preprocess data
### 0.3.1 Example: HAAR face detector
In this example we use the [HAAR feature based cascade classifiers](https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_objdetect/py_face_detection/py_face_detection.html) to detect faces, then the faces are resized so that they all have the same shape. If there are multiple faces in an image, we only take the first one. 

<div class="alert alert-block alert-info"> <b>NOTE:</b> You can write temporary files to <code>/kaggle/temp/</code> or <code>../../tmp</code>, but they won't be saved outside of the current session
</div>


In [None]:
class HAARPreprocessor():
    """Preprocessing pipeline built around HAAR feature based cascade classifiers. """
    
    def __init__(self, path, face_size):
        self.face_size = face_size
        file_path = os.path.join(path, "haarcascade_frontalface_default.xml")
        if not os.path.exists(file_path): 
            if not os.path.exists(path):
                os.mkdir(path)
            self.download_model(file_path)
        
        self.classifier = cv2.CascadeClassifier(file_path)
  
    def download_model(self, path):
        url = "https://raw.githubusercontent.com/opencv/opencv/master/data/"\
            "haarcascades/haarcascade_frontalface_default.xml"
        
        with request.urlopen(url) as r, open(path, 'wb') as f:
            f.write(r.read())
            
    def detect_faces(self, img):
        """Detect all faces in an image."""
        
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        return self.classifier.detectMultiScale(
            img_gray,
            scaleFactor=1.2,
            minNeighbors=5,
            minSize=(30, 30),
            flags=cv2.CASCADE_SCALE_IMAGE
        )
        
    def extract_faces(self, img):
        """Returns all faces (cropped) in an image."""
        
        faces = self.detect_faces(img)

        return [img[y:y+h, x:x+w] for (x, y, w, h) in faces]
    
    def preprocess(self, data_row):
        faces = self.extract_faces(data_row['img'])
        
        # if no faces were found, return None
        if len(faces) == 0:
            nan_img = np.empty(self.face_size + (3,))
            nan_img[:] = np.nan
            return nan_img
        
        # only return the first face
        return cv2.resize(faces[0], self.face_size, interpolation = cv2.INTER_AREA)
            
    def __call__(self, data):
        return np.stack([self.preprocess(row) for _, row in data.iterrows()]).astype(int)

**Visualise**

Let's plot a few examples.

In [None]:
# parameter to play with 
FACE_SIZE = (150, 150)

def plot_image_sequence(data, n, imgs_per_row=7):
    n_rows = 1 + int(n/(imgs_per_row+1))
    n_cols = min(imgs_per_row, n)

    f,ax = plt.subplots(n_rows,n_cols, figsize=(10*n_cols,10*n_rows))
    for i in range(n):
        if n == 1:
            ax.imshow(data[i])
        elif n_rows > 1:
            ax[int(i/imgs_per_row),int(i%imgs_per_row)].imshow(data[i])
        else:
            ax[int(i%n)].imshow(data[i])
    plt.show()

    
#preprocessed data 
preprocessor = HAARPreprocessor(path = '../../tmp', face_size=FACE_SIZE)

train_X, train_y = preprocessor(train), train['class'].values
test_X = preprocessor(test)



In [None]:
# Filtering imgs that are not faces ONCE
a = train_y[train_y == 1]
for i in [6, 12, 15, 18, 27]:
    np.put(a, i, 0)
train_y[train_y == 1] = a

b = train_y[train_y == 2]
for i in [5, 7, 15, 23, 24]:
    np.put(b, i, 0)
train_y[train_y == 2] = b

In [None]:
# plot faces of Michael and Sarah
plot_image_sequence(train_X[train_y == 0], n=len(train_X[train_y == 0]), imgs_per_row=10)

In [None]:
# plot faces of Jesse
# 6, 12?, 15, 18, 27
plot_image_sequence(train_X[train_y == 1], n=len(train_X[train_y == 1]), imgs_per_row=10)

In [None]:
# plot faces of Mila
# 5?, 7?, 15, 24
plot_image_sequence(train_X[train_y == 2], n=len(train_X[train_y == 2]), imgs_per_row=10)

Now we are ready to rock!

In [None]:
val_X = test_X[-100:]
val_Y = [0, 0, 1, 0, 0, 1, 0, 0, 2, 0,
         0, 0, 0, 0, 1, 0, 0, 1, 0, 2,
         2, 0, 2, 0, 0, 1, 1, 0, 0, 1,
         0, 0, 0, 1, 0, 2, 2, 0, 0, 2,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
         0, 1, 0, 0, 0, 1, 0, 1, 1, 2,
         0, 0, 0, 0, 1, 0, 2, 0, 2, 1,
         0, 0, 2, 0, 0, 0, 2, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
         1, 1, 0, 0, 2, 0, 0, 1, 0, 0]
val_y = np.asarray(val_Y)
val_y = keras.utils.to_categorical(val_y, num_classes = 3)

# 1. Feature Representations
## 1.0. Example: Identify feature extractor
Our example feature extractor doesn't actually do anything... It just returns the input:
$$
\forall x : f(x) = x.
$$

It does make for a good placeholder and baseclass ;).

In [None]:
class IdentityFeatureExtractor:
    """A simple function that returns the input"""
    
    def transform(self, X):
        return X
    
    def __call__(self, X):
        return self.transform(X)

## 1.1. Baseline 1: HOG feature extractor/Scale Invariant Feature Transform
...

In [None]:
class HOGFeatureExtractor(IdentityFeatureExtractor):
    """TODO: this feature extractor is under construction"""
    
    def __init__(**params):
        self.params = params
        
    def transform(self, X):
        raise NotImplmentedError

In [None]:
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 
        
class SIFTFeatureExtractor(IdentityFeatureExtractor):
    
    def __init__(self, **params):
        self.params = params
        self.sift = cv2.SIFT_create(
                            nfeatures =  self.params.get('nfeatures'),
                            nOctaveLayers = self.params.get('nOctaveLayers'),
                            contrastThreshold = self.params.get('contrastThreshold'),
                            edgeThreshold = self.params.get('edgeThreshold'),
                            sigma = self.params.get('sigma') )
        
    def transform(self, X):        
        images_descriptors = []
        for img in X:
            gray = cv2.cvtColor(img.astype(dtype=np.uint8), cv2.COLOR_BGR2GRAY)
            _, descriptors = self.sift.detectAndCompute(gray, None)
            images_descriptors.append(descriptors)
        return np.asarray(images_descriptors)
    
    def flatten(self,image_descriptors):
        # Filter Nones and flattens array to kx128 dimension
        all_descriptors = []
        for descriptors_per_img in image_descriptors:
            if descriptors_per_img is not None:
                for e in descriptors_per_img:
                    all_descriptors.append(e)
        return all_descriptors
    
    def compare_two_img(self, img1, img2):
        img1 = img1.astype(dtype=np.uint8)
        img2 = img2.astype(dtype=np.uint8)
        gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
        gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
        bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck = True)
        
        keypoints1, descriptors1 = self.sift.detectAndCompute(gray1, None)
        keypoints2, descriptors2 = self.sift.detectAndCompute(gray2, None) 
        
        if descriptors1 is None: 
            return print('Cannot proceed. Img1 has descriptors: None')
        if descriptors2 is None:
            return print('Cannot proceed. Img2 has descriptors: None')

        matches = bf.match(descriptors1,descriptors2)
        matches = sorted(matches, key = lambda x:x.distance)

        img_matches = cv2.drawMatches(img1, keypoints1, img2, keypoints2, matches, outImg = np.empty((1,1)))
        plt.imshow(img_matches)
    
    
    def __call__(self, X):
        return self.transform(X)
    

### Comparing Matches between Img(s)

In [None]:
# Create a SIFT extractor with choosen Hyperameters
# sift_extractor = SIFTFeatureExtractor() 
sift_extractor = SIFTFeatureExtractor(
                            nfeatures= 70,
                            nOctaveLayers = 8,
                            contrastThreshold = 0.01,
                            edgeThreshold = 6,
                            sigma = 0.6)

# Plot the matches between two faces
sift_extractor.compare_two_img(train_X[12], train_X[17])

### 1.1.1. t-SNE Plots
...

In [None]:
# Defining TSNE with same seed for reproducibility
seed = 42
tsne = TSNE(random_state = seed, perplexity=15) 

# Colours for scatterplot 
palette = sns.color_palette("bright", 3)

# Function for creating a matrix with similarity-based distances between each image
def get_distance_matrix(images_descriptors):
    bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck = True)
    features = []
    for (i, descriptors1) in enumerate(images_descriptors):
        features.append([])
        for (j, descriptors2) in enumerate(images_descriptors):
            if i == j:
                distance = 0
            elif descriptors1 is None or descriptors2 is None:
                distance = 999
            else:
                matches = bf.match(descriptors1, descriptors2)
                distance = statistics.mean([match.distance for match in matches])

            features[i].append(distance)
    return features

In [None]:
# Defining TSNE with same seed for reproducibility
seed = 42
tsne = TSNE(random_state = seed, perplexity=15) 

# Colours for scatterplot 
palette = sns.color_palette("bright", 3)

# Function for creating a matrix with similarity-based distances between each image
def get_distance_matrix(images_descriptors):
    bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck = True)
    features = []
    for (i, descriptors1) in enumerate(images_descriptors):
        features.append([])
        for (j, descriptors2) in enumerate(images_descriptors):
            if i == j:
                distance = 0
            elif descriptors1 is None or descriptors2 is None:
                distance = 999
            else:
                matches = bf.match(descriptors1, descriptors2)
                distance = statistics.mean([match.distance for match in matches])

            features[i].append(distance)
    return features

In [None]:
# Defines SIFT extractor with specific hyperparameters
sift_extractor = SIFTFeatureExtractor(
                            nfeatures= None,
                            nOctaveLayers = 4,
                            contrastThreshold = 0.03,
                            edgeThreshold = 20,
                            sigma = 1.8)

# Extracting the features
all_features = sift_extractor.transform(train_X)
all_features = get_distance_matrix(all_features)

# Calculates pairs of instances in the new dimensional space
features_low_dimension = tsne.fit_transform(all_features)
df = pd.DataFrame(features_low_dimension, columns=["x","y"])

# Create a Scatter plot
sns.scatterplot(df, x="x", y="y", hue=train_y, legend='full', palette=palette)

### 1.1.2. Discussion
...

In [None]:
# Looking at the effect of SIFT( nOctaveLayers: 3 Default)
acc_TSNE = []
parameter = [3, 2, 4, 5] 

for i in parameter:
    sift_extractor = SIFTFeatureExtractor(
                            nfeatures = None,
                            nOctaveLayers = i,
                            contrastThreshold = None,
                            edgeThreshold = None,
                            sigma = None)
    
    all_features = sift_extractor(train_X)
    all_features = get_distance_matrix(all_features)
    features_low_dimension = tsne.fit_transform(all_features)
    df = pd.DataFrame(features_low_dimension, columns=["x","y"])
    acc_TSNE.append(df)

# Plotting
f, ax = plt.subplots(1, len(parameter), figsize=(20,4))
f.suptitle('Effect of varying SIFT-Hyperparameter: nOctaveLayers')

for i, df in enumerate(acc_TSNE):
    sns.scatterplot(ax=ax[i], data=df, x="x", y="y", hue=train_y, legend='full', palette=palette)
    ax[i].set_title(f"Parameter: {parameter[i]}")

In [None]:
# Looking at the effect of SIFT( contrastThreshold = 0.04)
'''
contrastThreshold	The contrast threshold used to filter out weak features in semi-uniform (low-contrast) regions. 
The larger the threshold, the less features are produced by the detector.
'''
acc_TSNE = []
parameter = [None, 0.04, 0.08, 0.05] 

for i in parameter:
    sift_extractor = SIFTFeatureExtractor(
                            nfeatures = None,
                            nOctaveLayers = None,
                            contrastThreshold = i,
                            edgeThreshold = None,
                            sigma = None)
    
    all_features = sift_extractor(train_X)
    all_features = get_distance_matrix(all_features)
    features_low_dimension = tsne.fit_transform(all_features)
    df = pd.DataFrame(features_low_dimension, columns=["x","y"])
    acc_TSNE.append(df)

# Plotting
f, ax = plt.subplots(1, len(parameter), figsize=(20,4))
f.suptitle('Effect of  SIFT-Hyperparameters')

for i, df in enumerate(acc_TSNE):
    sns.scatterplot(ax=ax[i], data=df, x="x", y="y", hue=train_y, legend='full', palette=palette)
    ax[i].set_title(f"Parameter: {parameter[i]}")

In [None]:
# Looking at the effect of SIFT( edgeThreshold = 10)
'''
edgeThreshold	The threshold used to filter out edge-like features.
The larger the edgeThreshold, the less features are filtered out (more features are retained).
'''
acc_TSNE = []
parameter = [10, 15, 20, 8] 

for i in parameter:
    sift_extractor = SIFTFeatureExtractor(
                            nfeatures = None,
                            nOctaveLayers = None,
                            contrastThreshold = None,
                            edgeThreshold = i,
                            sigma = None)
    
    all_features = sift_extractor(train_X)
    all_features = get_distance_matrix(all_features)
    features_low_dimension = tsne.fit_transform(all_features)
    df = pd.DataFrame(features_low_dimension, columns=["x","y"])
    acc_TSNE.append(df)

# Plotting
f, ax = plt.subplots(1, len(parameter), figsize=(20,4))
f.suptitle('Effect of  SIFT-Hyperparameters')

for i, df in enumerate(acc_TSNE):
    sns.scatterplot(ax=ax[i], data=df, x="x", y="y", hue=train_y, legend='full', palette=palette)
    ax[i].set_title(f"Parameter: {parameter[i]}")

In [None]:
# Looking at the effect of SIFT( sigma = 1.6)
'''
sigma	The sigma of the Gaussian applied to the input image at the octave #0. If your image is captured with a weak camera with soft 
lenses, you might want to reduce the number.
'''
acc_TSNE = []
parameter = [None, 1.6, 1.3, 1.8] 

for i in parameter:
    sift_extractor = SIFTFeatureExtractor(
                            nfeatures = None,
                            nOctaveLayers = None,
                            contrastThreshold = None,
                            edgeThreshold = None,
                            sigma = i)
    
    all_features = sift_extractor(train_X)
    all_features = get_distance_matrix(all_features)
    features_low_dimension = tsne.fit_transform(all_features)
    df = pd.DataFrame(features_low_dimension, columns=["x","y"])
    acc_TSNE.append(df)

# Plotting
f, ax = plt.subplots(1, len(parameter), figsize=(20,4))
f.suptitle('Effect of  SIFT-Hyperparameters')

for i, df in enumerate(acc_TSNE):
    sns.scatterplot(ax=ax[i], data=df, x="x", y="y", hue=train_y, legend='full', palette=palette)
    ax[i].set_title(f"Parameter: {parameter[i]}")


### Training SVM model

We defined two functions:

- 'kmean_bow' is used to cluster all the features into small groups. For instance, an image will have certain features that will be grouped into similar groups. [[1]](#1)

<img width="700" align="center" src="https://raw.githubusercontent.com/danilotpnta/ComputerVission/main/img/2.png" >

- 'create_features_bow' this is used to calculate the features that will be used to train our model. It calculates the minimum distance of the descriptors of one img with klusters created previously. The minimum distance then between clusters is selected to describe the features of an image.

![Features of an iamge](https://raw.githubusercontent.com/danilotpnta/ComputerVission/main/img/4.png)

It will escentially look like the img above where the x-axis is the BoW and the frequencies are the histogram that describes uniquely one image. [[2]](#2).

#### References
<a id="1">[1]</a> 
C 7.1 | Bag Of Visual Words | CNN | Object Detection | Machine learning | EvODN. Available at: https://youtu.be/1_5uuqWXuIA

<a id="2">[2]</a> 
Bag of Visual Words Model for Image Classification and Recognition. Available at: : https://kushalvyas.github.io/BOV.html


In [None]:
# Creating a Bag of Word fromt the descriptors
def kmean_bow(all_descriptors, num_cluster):
    bow_dict = []
    kmeans = KMeans(n_clusters = num_cluster)
    kmeans.fit(all_descriptors)
    bow_dict = kmeans.cluster_centers_

    return bow_dict

def create_kmean(all_descriptors, num_cluster):
    kmeans = KMeans(n_clusters = num_cluster)
    kmeans.fit(all_descriptors)
    return kmeans
    

# Creates features from the BoW
def create_feature_bow(image_descriptors, BoW, num_cluster):

    X_features = []

    for i in range(len(image_descriptors)):
        features = np.array([0] * num_cluster)

        if image_descriptors[i] is not None:
            '''
            Compare per each image k*descriptors with the BoW
            BoW shape: (30, 128)
            One image k*descriptors: (168, 128)
            '''
            distance = cdist(image_descriptors[i], BoW, metric='euclidean')
            
            # Along 168 distances calculated, get the min index
            argmin = np.argmin(distance, axis = 1)   
            
            for j in argmin:
                features[j] += 1
        X_features.append(features)
        
    return X_features
        

In [None]:
# Contains descriptors of 80 without filtering Nones
image_descriptors = sift_extractor(train_X)

# Filter Nones and flattens array to kx128 dimension
all_descriptors = sift_extractor.flatten(image_descriptors)

num_cluster = 70      
BoW = kmean_bow(all_descriptors, num_cluster = num_cluster)
kmeans = create_kmean(all_descriptors, num_cluster = num_cluster)

In [None]:
X_features = create_feature_bow(image_descriptors, BoW, num_cluster)
print(np.array(X_features).shape)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X_features, train_y, test_size = 0.1, random_state = 1)
print("X_train shape: ", np.array(X_train).shape)
print("y_train shape: ", np.array(Y_train).shape)
print("-------------------------------")
print("X_test shape: ", np.array(X_test).shape)
print("y_test shape: ", np.array(Y_test).shape)

In [None]:
mlp = MLPClassifier(verbose=False, max_iter=6000)
mlp.fit(X_train, Y_train)

# clf = GridSearchCV(mlp2, parameter_space, n_jobs=-1, cv=3)
# clf.fit(X_train, Y_train)
mlp_better = MLPClassifier(verbose=False, max_iter=2000, activation='tanh', 
                           alpha=0.0001, hidden_layer_sizes=(50, 50, 50), 
                           learning_rate= 'constant', solver= 'adam')
mlp_better.fit(X_train, Y_train)


kNN = KNeighborsClassifier(n_neighbors = 4, p = 1)
kNN.fit(X_train,Y_train)

rndForest = RandomForestClassifier(n_estimators=75, random_state=1)
rndForest.fit(X_train,Y_train)

model_svm = SVC(random_state = 1, max_iter = 120)
model_svm.fit(X_train, Y_train)

ensemble = VotingClassifier(estimators=[
    ('mlp', mlp_better), 
    ('kNN', kNN), 
    ('model_svm', model_svm)], voting='hard')
ensemble.fit(X_train, Y_train)

In [None]:
print('"mlp" model: ')
print("score on training set params: ", mlp.score(X_train, Y_train))
print("score on testing set params: ", mlp.score(X_test, Y_test))

print('\n"mlp_better" model: ')
print("score on training set params: ", mlp_better.score(X_train, Y_train))
print("score on testing set params: ", mlp_better.score(X_test, Y_test))

print('\n"model_svm" model: ')
print("score on training set params: ", model_svm.score(X_train, Y_train))
print("score on testing set params: ", model_svm.score(X_test, Y_test))

print('\n"kNN" model: ')
print("score on training set params: ", kNN.score(X_train, Y_train))
print("score on testing set params: ", kNN.score(X_test, Y_test))

print('\n"rndForest" model: ')
print("score on training set params: ", rndForest.score(X_train, Y_train))
print("score on testing set params: ", rndForest.score(X_test, Y_test))

print('\n"model_svm" model: ')
print("score on training set params: ", model_svm.score(X_train, Y_train))
print("score on testing set params: ", model_svm.score(X_test, Y_test))

print('\n"ensemble" model: ')
print("score on training set params: ", ensemble.score(X_train, Y_train))
print("score on testing set params: ", ensemble.score(X_test, Y_test))


### Predicting on miniBatch Val_X

In [None]:
# Contains descriptors of 80 without filtering Nones
image_descriptors_test = sift_extractor(val_X)

# Filter Nones and flattens array to kx128 dimension
all_descriptors_test = sift_extractor.flatten(image_descriptors_test)

X_features_Test = create_feature_bow(image_descriptors_test, BoW, num_cluster)
print(np.array(X_features_Test).shape)

In [None]:
print("score on testing set 'mlp_better' model: ", mlp_better.score(X_features_Test, val_Y))
print("score on testing set 'model_svm'  model: ", model_svm.score(X_features_Test, val_Y))
print("score on testing set 'kNN'        model: ", kNN.score(X_features_Test, val_Y))
print("score on testing set 'rndForest'  model: ", rndForest.score(X_features_Test, val_Y))
print("score on testing set 'ensemble'   model: ", ensemble.score(X_features_Test, val_Y))

## 1.2. Baseline 2: PCA feature extractor
...

In [None]:
class PCAFeatureExtractor(IdentityFeatureExtractor):
    """TODO: this feature extractor is under construction"""
    
    def __init__(self, n_components, data):
        self.n_components = n_components
        self.data = self.preprocess_data(data)
        self.mean = np.mean(self.data, axis=0)
        self.singular, self.eigenfaces = self.compute_vectors()
        
    #THIS FUNCTION IS COMPLETELY COPIED BY SCIKIT-LEARN
    def svd_flip(self, u, v, u_based_decision=True):  
        if u_based_decision:
            # columns of u, rows of v
            max_abs_cols = np.argmax(np.abs(u), axis=0)
            signs = np.sign(u[max_abs_cols, range(u.shape[1])])
            u *= signs
            v *= signs[:, np.newaxis]
        else:
            # rows of v, columns of u
            max_abs_rows = np.argmax(np.abs(v), axis=1)
            signs = np.sign(v[range(v.shape[0]), max_abs_rows])
            u *= signs
            v *= signs[:, np.newaxis]
        return u, v
    
    def compute_vectors(self):
        data = self.data - self.mean
        U, S, Vt = np.linalg.svd(data, full_matrices=False)
        U, Vt = self.svd_flip(U, Vt)
        eig_vecs = Vt[:self.n_components]
        #return the singular values and first n eig_vectors
        return S, eig_vecs
    
    def transform(self, X):
        #transform data into set of features
        X_gray = self.preprocess_data(X)
        new_X = X_gray - self.mean
        return np.dot(new_X, self.eigenfaces.T)
    
    def inverse_transform(self, X):
        #transform set of features into data
        
        return np.dot(X, self.eigenfaces) + self.mean
    
    
    def preprocess_data(self, X):
        
        X_gray = np.zeros(X.shape[:-1])
        for i in range(X.shape[0]): 
            X_gray[i] = cv2.cvtColor(X[i].astype(np.uint8), cv2.COLOR_BGR2GRAY) 
            
        facematrix = []
        for face in X_gray:
            facematrix.append(face.flatten())
        facematrix = np.array(facematrix)
        
        return facematrix

In [None]:
pca1 = PCAFeatureExtractor(5, train_X)
#THIqueryS IS A TEST FOR THE PCA
weights = pca1.transform(train_X)
# Test on out-of-sample image of existing class
query = np.expand_dims(test_X[11], axis=0)

print(query.shape)
query_weight = pca1.transform(query)
euclidean_distance = np.linalg.norm(weights - query_weight, axis=1)
best_match = np.argmin(euclidean_distance)
print("Best match %s with Euclidean distance %f" % (train_y[best_match], euclidean_distance[best_match]))
# Visualize
fig, axes = plt.subplots(1,2,sharex=True,sharey=True,figsize=(8,6))
axes[0].imshow(query[0], cmap="gray")
axes[0].set_title("Query")
axes[1].imshow(train_X[best_match], cmap="gray")
axes[1].set_title("Best match")
plt.show()

## choosing the right number of components

This could be done either by testing different values and choosing the one that leads to greater accuracy or by analyzing the singular values. Each singular value indicates how much information does the corresponding eigenvector contain. From the following plot we notice that after the 5th vector the ammount of information start to decrease less rapidly so a good number of components would probably be around 5. THis could be a good trade-off between number of features and accuracy.

In [None]:
#plotting first 15 singular values
plt.plot(range(15), pca1.singular[:15])

### 1.2.1. Eigenface Plots
...

In [None]:
# fig, axes = plt.subplots(1,4,sharex=True,sharey=True,figsize=(8,10))
# for i in range(4):
#     axes[i].imshow(pca1.eigenfaces[i].real.reshape((100,100)), cmap="gray")
# plt.show()

In [None]:
# # WORK IN PROGRESS

# values = [2, 5, 15, 30, 80]
# plotPCA = [PCAFeatureExtractor(i, train_X) for i in values]
# query = test_X[8]
# fig, axes = plt.subplots(2,3,sharex=True,sharey=True,figsize=(9,6))
# axes[0][0].imshow(query), cmap="gray")
# axes[0][0].set_title("Original face")
# features = [plotPCA[i].transform(query) for i in range(5)]
# inverse = [plotPCA[i].inverse_transform(features[i]) for i in range(5)]
# axes[0][1].imshow(inverse[0].reshape((100,100)), cmap="gray")
# axes[0][1].set_title("using 2 eigenfaces")
# axes[0][2].imshow(inverse[1].reshape((100,100)), cmap="gray")
# axes[0][2].set_title("using 5 eigenfaces")
# axes[1][0].imshow(inverse[2].reshape((100,100)), cmap="gray")
# axes[1][0].set_title("using 15 eigenfaces")
# axes[1][1].imshow(inverse[3].reshape((100,100)), cmap="gray")
# axes[1][1].set_title("using 30 eigenfaces")
# axes[1][2].imshow(inverse[4].reshape((100,100)), cmap="gray")
# axes[1][2].set_title("using 80 eigenfaces")

### 1.2.2. Feature Space Plots
...

### 1.2.3. Discussion
...

# 2. Evaluation Metrics
## 2.0. Example: Accuracy
As example metric we take the accuracy. Informally, accuracy is the proportion of correct predictions over the total amount of predictions. It is used a lot in classification but it certainly has its disadvantages...

In [None]:
from sklearn.metrics import accuracy_score

# 3. Classifiers
## 3.0. Example: The *'not so smart'* classifier
This random classifier is not very complicated. It makes predictions at random, based on the distribution obseved in the training set. **It thus assumes** that the class labels of the test set will be distributed similarly to the training set.

In [None]:
class RandomClassificationModel:
    """Random classifier, draws a random sample based on class distribution observed 
    during training."""
    
    def fit(self, X, y):
        """Adjusts the class ratio instance variable to the one observed in y. 

        Parameters
        ----------
        X : tensor
            Training set
        y : array
            Training set labels

        Returns
        -------
        self : RandomClassificationModel
        """
        
        self.classes, self.class_ratio = np.unique(y, return_counts=True)
        self.class_ratio = self.class_ratio / self.class_ratio.sum()
        return self
        
    def predict(self, X):
        """Samples labels for the input data. 

        Parameters
        ----------
        X : tensor
            dataset
            
        Returns
        -------
        y_star : array
            'Predicted' labels
        """

        np.random.seed(0)
        return np.random.choice(self.classes, size = X.shape[0], p=self.class_ratio)
    
    def __call__(self, X):
        return self.predict(X)
    

## 3.1. Baseline 1: My favorite classifier
...

In [None]:
# TRANSFER LEARNING - Xception, VGG16


def create_model_C(num_classes, input_shape, base_model=0):
    
    inputs = keras.Input(shape=input_shape)
    
    if base_model == 0:
        base_model = keras.applications.Xception(
        weights='imagenet',  
        input_shape=input_shape,
        include_top=False)
        
        base_model.trainable = False
        
        x = keras.applications.xception.preprocess_input(inputs)
        
        
    else: # base_model == 1:
        base_model = keras.applications.vgg16.VGG16(weights='imagenet', input_shape=input_shape, include_top=False)
        
        base_model.trainable = False
        
        x = keras.applications.vgg16.preprocess_input(inputs)
    
    
    x = base_model(x, training=False)
#     x = layers.BatchNormalization()(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(16, activation="relu")(x)
    x = layers.Dropout(0.2)(x)
    
    if num_classes == 2:
        activation = "sigmoid"
        units = 1
    else:
        activation = "softmax"
        units = num_classes
    
    outputs =layers.Dense(units, activation=activation)(x)
    
    
    return {"model": keras.Model(inputs, outputs), 
            "num_classes": num_classes, 
            "input_shape": input_shape}

In [None]:
def create_model_D(num_classes, input_shape):
    
    inputs = keras.Input(shape=input_shape)
    x = layers.LayerNormalization(axis=-1)(inputs)
    x = layers.Dense(64, activation="relu")(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(64, activation="relu")(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    
    if num_classes == 2:
        activation = "sigmoid"
        units = 1
    else:
        activation = "softmax"
        units = num_classes
    
    outputs =layers.Dense(units, activation=activation)(x)
    
    
    return {"model": keras.Model(inputs, outputs), 
            "num_classes": num_classes, 
            "input_shape": input_shape}

In [None]:
class DeepLearningModel:
    
    def __init__(self, model, num_classes, input_shape):
        
        self.model = model
        self.num_classes = num_classes
        self.input_shape = input_shape
    
    
    
    def fit(self, X, y, epoch=10, lr=1e-3, batch_size=4, augment_data=False, val_ds=None):
        
        y_OHE = keras.utils.to_categorical(y, num_classes = self.num_classes)
        
        if augment_data:
            
            datagen = keras.preprocessing.image.ImageDataGenerator(
                rescale=1./255,
                rotation_range=40,
#                 width_shift_range=0.1,
#                 height_shift_range=0.1,
#                 shear_range=0.1,
                zoom_range=0.1,
                horizontal_flip=True,
                brightness_range=[0.95, 1.05],
                fill_mode='nearest')
            
        else:
            datagen = keras.preprocessing.image.ImageDataGenerator()
        
            
            
        train_data = datagen.flow(x=X, y=y_OHE, batch_size=batch_size, shuffle=False)
            

#         callbacks = [
#             keras.callbacks.ModelCheckpoint("save_at_{epoch}.keras"),
#         ]
        
        self.model.compile(
#             optimizer=keras.optimizers.SGD(lr=lr, momentum=0.9),
            optimizer=keras.optimizers.experimental.AdamW(lr=lr),
            loss="categorical_crossentropy",
            metrics=["accuracy"],
        )
        
          # one hot encoded
            
        

        return self.model.fit(
            x=train_data,
#             y=y_OHE,
#             batch_size=batch_size,
            epochs=epoch,
#             callbacks=callbacks,
            validation_data=val_ds,
        )
    
        
    def predict(self, X):
        pred = self.model.predict(X)
        return np.argmax(pred, axis=-1)

    def __call__(self, X):
        return self.predict(X)

## 3.2 SVM

In [None]:
class SVM:

    def __init__(self, ensemble=False):

        if not ensemble:
            self.model = svm.SVC()
        
        else:
            
            
            self.params = {'C':[1,10],'gamma':[1,10],'kernel':['rbf', 'linear'], 'decision_function_shape':['ovr']}
            self.model = sklearn.model_selection.GridSearchCV(svm.SVC(), self.params)
#             models = list()
#             models.append(('svm0', svm.SVC(probability=True, kernel="rbf", decision_function_shape='ovr')))
# #             models.append(('svm1', svm.SVC(probability=True, kernel='poly', degree=1)))
#             models.append(('svm2', svm.SVC(probability=True, kernel='poly', degree=3)))
#             models.append(('svm3', svm.SVC(probability=True, kernel='linear')))
#             models.append(('svm4', svm.SVC(probability=True, kernel='poly', degree=4)))
# #             models.append(('svm5', svm.NuSVC(probability=True, nu=0.7, kernel='rbf')))
# #             models.append(('svm6', svm.NuSVC(probability=True, nu=0.5, kernel='rbf')))
#             self.model = sklearn.ensemble.VotingClassifier(estimators=models, voting='soft')

    
    def fit(self, X, y):    
        
        return self.model.fit(X, y) 
        
    def predict(self, X):

        return self.model.predict(X)
    
    def __call__(self, X):
        return self.predict(X)

In [None]:
class RandomForest:
    
    def __init__(self, n_estimators=100, max_depth=2):

        self.model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=0)

    
    def fit(self, X, y):
        
        
        
        return self.model.fit(X, y)
        
    def predict(self, X):

        
        return self.model.predict(X)
    
    def __call__(self, X):
        return self.predict(X)

In [None]:
class EnsembleModel:
    
    def __init__(self, n_estimators=100, max_depth=2):

        models = list()
        models.append(('svm0', svm.SVC(kernel="rbf", decision_function_shape='ovr')))
#         models.append(('svm5', svm.SVC(kernel='poly', degree=3)))
        
        self.model = sklearn.ensemble.VotingClassifier(estimators=models, voting='hard')

    
    def fit(self, X, y):
        
        
        
        return self.model.fit(X, y)
        
    def predict(self, X):

        
        return self.model.predict(X)
    
    def __call__(self, X):
        return self.predict(X)

# 4. Experiments
<div class="alert alert-block alert-info"> <b>NOTE:</b> Do <i>NOT</i> use this section to keep track of every little change you make in your code! Instead, highlight the most important findings and the major (best) pipelines that you've discovered.  
</div>
<br>

## 4.0. Example: basic pipeline
The basic pipeline takes any input and samples a label based on the class label distribution of the training set. As expected the performance is very poor, predicting approximately 1/4 correctly on the training set. There is a lot of room for improvement but this is left to you ;). 

In [None]:
### USING SVM w/ PCA

k = 25


# datagen = keras.preprocessing.image.ImageDataGenerator(
#                 rotation_range=40,
# #                 width_shift_range=0.1,
# #                 height_shift_range=0.1,
# #                 shear_range=0.1,
#                 horizontal_flip=True,
#                 brightness_range=[0.7, 1.3],
#                 fill_mode='nearest',
#                 dtype="uint8")

# train_data = datagen.flow(x=train_X, y=train_y, batch_size=train_X.shape[0] * 5, shuffle=True)
# aX, ay = next(train_data)

aX, ay = train_X, train_y

feature_extractor = PCAFeatureExtractor(k, aX)
classifier = SVM(False)

# val_ds = (feature_extractor(val_X), np.argmax(val_y, axis=-1))


classifier.fit(feature_extractor(aX), ay)

# model/final pipeline
model = lambda X: classifier(feature_extractor(X))

In [None]:
# ### USING Random Forest w/ PCA

# k = 10


# feature_extractor = PCAFeatureExtractor(k, train_X)
# classifier = RandomForest(n_estimators=200, max_depth=2)

# val_ds = (feature_extractor(val_X), np.argmax(val_y, axis=-1))


# classifier.fit(feature_extractor(train_X), train_y)

# # model/final pipeline
# model = lambda X: classifier(feature_extractor(X))

In [None]:
# evaluate performance of the model on the training set
train_y_star = model(train_X)
print(train_y_star)

print(f"The performance on the training set is {accuracy_score(train_y, train_y_star):.2f}. This however, does not tell us much about the actual performance (generalisability).")

In [None]:
# print(f"The performance on the training set is {accuracy_score(val_ds[-1], model(val_X)):.2f}. This however, does not tell us much about the actual performance (generalisability).")

In [None]:
# predict the labels for the test set 
test_y_star = model(test_X)

# 5. Publishing best results

In [None]:
submission = test.copy().drop('img', axis = 1)
submission['class'] = test_y_star

submission

In [None]:
submission.to_csv('submission.csv')

# 6. Discussion
...

In summary we contributed the following: 
* 
