In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from Feature_Extraction import *
from utils import *

import os
from os import listdir
from os.path import isfile, join

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

path = os.getcwd() + '/'
path_images = path + 'data/FASSEG-frontal03/'
sub_dir_original = 'Original/' # 'Original/' ou 'Train_RGB/'
sub_dir_labeled = 'Labeled/' # 'Labeled/' ou 'Train_Labels/'

  from numpy.core.umath_tests import inner1d


# Loading dataset & preprocessing

## Import raw images

Importation of FASSEG dataset images (original images and labeled ones). Some images from the original dataset are removed in case dimensions of images and labels don't match.

In [2]:
dataset_size = 10 # Nb of images to take in dataset
images, raw_labels, names = import_dataset(path_images,sub_dir_original,sub_dir_labeled,dataset_size)

Removed 0 images from dataset, 10 images remaining.


## Extract label

Transformation of labeled color maps (from RGB to 1D).

In [None]:
labels = extract_label(raw_labels)

Extracting labels :

0 / 10
1 / 10
2 / 10


## Dataset visualisation

In [None]:
plot_dataset(images,labels,names)

# Tests

In [None]:
import skimage.feature as feat
from scipy.ndimage import filters
image = images[3]
image_bw = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

In [None]:
test = filters.minimum_filter(image_bw,size=50)

In [None]:
plt.figure(figsize=(12,6))
    
plt.subplot(1,2,1)
plt.imshow(image)
plt.title('Original')
plt.axis('off')

plt.subplot(1,2,2)
plt.imshow(test)
plt.title('Filtered')
plt.axis('off')

plt.show()

# Feature extraction

We computed features for each pixel of each image. They can be grouped in different categories :

**No transformation** :

   - Identity : The original image.
   - Dimensions X and Y : Coordinates of each pixel.
   
**Segmentation** :

   - SLIC : Segmentation using K-Means.
   - RandomWalker : Segmentation using a set of markers.

**Thresholding** :

   - Otsu : Computing threshold value based on Otsu method.
   - Isodata : Computing threshold value based on Isodata method.
   - Li : Computing threshold value by Li’s iterative Minimum Cross Entropy method.
   - Triangle : Computing threshold value based on the triangle algorithm.
   - Yen : Computing threshold value based on Yen's method.

**Edge detection** :

   - CannyEdge : Edge detection, with canny method.
   - Frangi : Frangi filter, to detect continuous edges.
   - Hessian : Hybrid hessian filter, to detect continuous edges.
   - Laplace : Edge detection, using Laplace operator.
   - Prewitt : Finding edge magnitude with Prewitt transform.
   - Roberts : Finding edge magnitude using Roberts cross operator.
   - Scharr : Finding edge magnitude using the Scharr transform.
   - Sobel : Edge detection using the Sobel filter.

## Calculation

In [None]:
# List of features to extract

no_transformation_functions = [Identity, DimensionX, DimensionY]

segmentation_functions = [SLIC2, SLIC4, SLIC20, SLIC40, SLIC60, RandomWalker,
                          FW_200_5, FW_150_5, FW_100_5, FW_200_10,FW_150_10, FW_100_10, 
                          FW_200_20, FW_150_20,FW_100_20, FW_200_5_UP]

thresholding_functions = [Otsu, Isodata, Li, Triangle, Yen]

pooling_functions = [Max_10, Max_20, Max_30, Min_10, Min_20, Min_30, Min_50]

edges_functions = [Frangi, Hessian, Laplace, Prewitt, Roberts, Scharr, Sobel]

edge_functions_notworking = [CannyEdge, PrewittH, PrewittV, RobertsNegDiag, RobertsPosDiag, 
                             ScharrH, ScharrV, SobelH, SobelV]

feature_functions = no_transformation_functions + segmentation_functions + thresholding_functions + \
                    edges_functions + pooling_functions

In [None]:
X = feature_extraction(images,feature_functions)

## Visualisation

In [None]:
index_viz = 3 # Index of the image whose features will be plotted
plot_features(images, index_viz, feature_functions, N_cols=8)

# Target definition

Each pixel is described by a label name among :

   - Background
   - Eyes
   - Mouth
   - Skin
   - Nose
   - Hair

In [None]:
X = target_definition(X,images,labels)

# Prediction

## Train / Test split

We use 80% of images for training and 20% for testing.

In [None]:
X_train, X_test, y_train, y_test, y_train_label, y_test_label, \
train_ids, test_ids, y_train_imageids, y_test_imageids, features = \
train_test_split(X,ratio=0.8)

## Scaler

## Training

In [None]:
model = RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=20, criterion='gini', min_samples_leaf=0.01)
grid_search = False
params = {'n_estimators' : [10,20],
          'max_depth' : [5,10,20],
          'criterion' : ['gini','entropy'],
          'min_samples_split' : [2]}

model = train(X_train, y_train_label, model, grid_search, params)

## Prediction

In [None]:
y_pred_label = model.predict(X_test)
y_pred_label_train = model.predict(X_train)

print('Accuracy on train set ({} images) : {:.3f}'.format(len(train_ids),accuracy_score(y_train_label,y_pred_label_train)))
print('Accuracy on test set ({} images) : {:.3f}'.format(len(test_ids),accuracy_score(y_test_label,y_pred_label)))

In [None]:
plot_confusion_matrix(model, y_true=y_test_label, y_pred=y_pred_label, normalize=True)

## Visualisation of predictions

The model previously tarined is then used to predict labels for each image in the dataset.

In [None]:
plot_predictions(images, X_train, X_test, y_train, y_test, y_train_label, y_test_label, 
                     y_pred_label_train, y_pred_label, train_ids, test_ids, y_train_imageids, y_test_imageids)

## Feature importance

In [None]:
plot_feature_importance(model, features, max_features=None)