In [None]:
import torch
import torch.nn as nn
import cv2
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.manifold import LocallyLinearEmbedding as LLE
from data_processing import normalize, split_data, generate_pairplot, find_hyper, KNN_classifier
%load_ext autoreload
%autoreload 2

In [None]:
Classes = ["Eyeglasses","Wearing_Hat","Wavy_Hair","Goatee"]

# Visualize the transformation applied on the data

In [None]:
# Import the data
path = ["../Data/selected_images/000720.jpg",
        "../Data/selected_images/201763.jpg",
        "../Data/selected_images/189506.jpg",
        "../Data/selected_images/120827.jpg"]


operator = nn.AvgPool2d(2, stride=2)
img_tensor = torch.empty(4,1,218,178)
img_color_tensor = torch.empty(4,218,178,3)

for i,path_im in enumerate(path):
    
    image = cv2.cvtColor(cv2.imread(path_im),cv2.COLOR_BGR2RGB).astype(float)/255 # RGB image

    gray = cv2.cvtColor(cv2.imread(path_im), cv2.COLOR_BGR2GRAY).astype(float)/255 # gray scale image
    assert(gray.shape == (218,178))
    
    img_color_tensor[i] = torch.from_numpy(image)
    img_tensor[i,0,:,:] = torch.from_numpy(gray)
    

operator = nn.AvgPool2d(2, stride=2)
reduc_img_tensor = operator(img_tensor)

figure = plt.figure(figsize=[25,8])

for i,path in enumerate(path):
    ax1 = figure.add_subplot(2,6,3*i+1)
    ax2 = figure.add_subplot(2,6,3*i+2)
    ax3 = figure.add_subplot(2,6,3*i+3)
    ax1.imshow(img_color_tensor[i])
    ax2.imshow(img_tensor[i,0],cmap='Greys_r')
    ax3.imshow(reduc_img_tensor[i,0],cmap='Greys_r')
    ax1.set_title("Original image Class " + str(i))
    ax2.set_title("Gray scale image Class " + str(i))
    ax3.set_title("Averaged image Class " + str(i))
plt.show()

# Import the dataset (data_frame.csv)

In [None]:
data = pd.read_csv("../Data/data_frame.csv")
data.head(10) # Let's have a look at the sructure of the data set

In [None]:
data_np = data.iloc[:,1:].to_numpy().astype(float)
data_np = data_np[data_np[:,0] != 2]
img = data.iloc[5].to_numpy() # Take a random image
img = img[2:].astype(float).reshape(109,89)
plt.imshow(img,'Greys_r') # Check if the preprocessing has been well done

# Dimensionality Reduction

We will test and compare two dimensionality reduction methods : Locally Linear Embeddings (LLE) and Modified Locally Linear Embeddings (MLLE). To do so we will see if this these algorithms are able to separate the 4 classes in four different clusters (idest to make easier the classification task). 

In [None]:
ratio = [0.2,0]
train_set, validation_set, test_set = split_data(data_np,ratio)

In [None]:
print("train set shape :",train_set.shape)
#print("validation set shape :",validation_set.shape)
print("test set shape :",test_set.shape)

In [None]:
train_set_bis, _, test_set_bis = split_data(train_set,[0.7,0])
classifier = KNN_classifier(train_set_bis[:,1:],train_set_bis[:,0])
classifier.score(test_set_bis[:,1:],test_set_bis[:,0])

In [None]:
range_components = np.arange(4,50,4)
range_neighbors = np.arange(10,50,5)
x,y,metric,KNN_metric = find_hyper(train_set,range_components,range_neighbors)

In [None]:
fig = plt.figure(figsize=[14,5])
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
cs1 = ax1.contourf(x,y,metric)
cs2 = ax2.contourf(x,y,KNN_metric,cmap="viridis")
ax1.set_xlabel("Number of components")
ax1.set_ylabel("Number of neighbors")
ax2.set_xlabel("Number of components")
ax2.set_ylabel("Number of neighbors")
ax1.set_title("Reconstruction error")
ax2.set_title("KNN accuracy")
plt.colorbar(cs1,ax=ax1)
plt.colorbar(cs2,ax=ax2)
plt.show()

In [None]:
embedding = LLE(n_components=10,n_neighbors=25,method="modified")
X_trans = embedding.fit_transform(data_np[:,1:])
X_reduc = np.concatenate((data_np[:,[0]],X_trans),axis=1)

In [None]:
fig = plt.figure(figsize=[10,10])
generate_pairplot(X_reduc,Classes,components_to_show=4)
plt.show()