In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

In [3]:
data = np.load('data/data_pca_50_target.npz')
data.files

['arr_0', 'arr_1']

In [4]:
data.allow_pickle = True

In [7]:
X = data['arr_0'] # pca data with 50 components
y = data['arr_1'] # target variable

In [6]:
X

array([[ 1.69404373, -1.1224871 , -0.96057256, ...,  0.28586684,
         0.3710693 , -1.74289245],
       [-0.75328086,  0.47206748, -0.16733758, ..., -0.43480325,
        -0.61056158, -0.48588501],
       [ 1.64893197, -0.35038188, -0.60638437, ..., -0.27134089,
        -0.3902875 , -0.65252902],
       ...,
       [-0.1085147 ,  1.69383411, -1.09101215, ...,  0.54952253,
         0.2414279 , -1.16102374],
       [ 1.05235122,  1.12616373,  1.93402255, ..., -0.02690595,
        -0.09858388,  0.63382462],
       [-0.01341455,  0.63578105,  0.91438289, ...,  0.10418222,
        -1.99422122, -0.87923439]], shape=(5608, 50))

In [8]:
y

array(['female', 'female', 'female', ..., 'male', 'male', 'male'],
      shape=(5608,), dtype=object)

## Split the data into train and test

In [9]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(4486, 50) (1122, 50) (4486,) (1122,)


## Train ML model

In [11]:
model_svc = SVC(probability=True)

param_grid = {
    'C': [0.5,1, 20,30,50],
    'kernel': ['rbf', 'poly'],
    'gamma': [0.1, 0.05, 0.01, 0.001, 0.002, 0.005],
    'coef0': [0, 1]
}

In [12]:
model_grid = GridSearchCV(model_svc, 
                                                param_grid=param_grid,
                                                scoring='accuracy', cv=3, verbose=2)

In [13]:
model_grid.fit(x_train, y_train)

Fitting 3 folds for each of 120 candidates, totalling 360 fits
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   2.2s
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   2.2s
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   2.2s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   1.5s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   1.5s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   1.5s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   2.0s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   1.9s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   2.0s
[CV] END ............C=0.5, coef0=0, gamma=0.05, kernel=poly; total time=   1.4s
[CV] END ............C=0.5, coef0=0, gamma=0.05, kernel=poly; total time=   1.5s
[CV] END ............C=0.5, coef0=0, gamma=0.0

In [14]:
model_final = model_grid.best_estimator_
model_final

In [15]:
model_final.get_params()

{'C': 1,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 0.05,
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': True,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

## Model Evaluation

In [16]:
y_pred = model_final.predict(x_test)

In [17]:
y_pred

array(['female', 'female', 'male', ..., 'female', 'male', 'male'],
      shape=(1122,), dtype=object)

In [18]:
## Classification report
cr = metrics.classification_report(y_test, y_pred, output_dict=True)
pd.DataFrame(cr).T

Unnamed: 0,precision,recall,f1-score,support
female,0.815534,0.837209,0.82623,602.0
male,0.805556,0.780769,0.792969,520.0
accuracy,0.811052,0.811052,0.811052,0.811052
macro avg,0.810545,0.808989,0.809599,1122.0
weighted avg,0.810909,0.811052,0.810815,1122.0


In [19]:
## Kappa score
kp = metrics.cohen_kappa_score(y_test, y_pred)
kp

np.float64(0.6192762753534112)

In [21]:
## Area Under Curve
auc = metrics.roc_auc_score(np.where(y_test=="male", 1, 0), 
                                                np.where(y_pred=="male", 1, 0))
auc

np.float64(0.8089892665474059)

### Save face recognition model

In [22]:
import pickle

In [23]:
pickle.dump(model_final, open('model/model_svm.pickle', mode = 'wb'))