In [132]:
from sklearn.svm import LinearSVC, SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.externals import joblib
import scipy.io as sio
import os
import numpy as np

featureMat= 'pca200_sfs100.mat'

features = sio.loadmat('selected/{}'.format(featureMat) )
x_train = features['trainFeatures']
y_train = [item[0] for item in features['trainLabels']]

x_test = features['testFeatures']
y_test = [item[0] for item in features['testLabels']]

def img_per_class(directory):
    ageRangeCounter = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0}
    for filename in os.listdir(directory):
    # To count images per group when renamed
        group = int(filename.split("_")[1])
        ageRangeCounter[group] += 1
    
    totalSum = sum([v for k,v in ageRangeCounter.items()])
    print("Total sum: ", totalSum)
    print("Per class: ", ageRangeCounter)
    
    return ageRangeCounter

# ageRangeCounter = img_per_class('datasets/train_folder')
n_samples = 1500
do_sampling = False
def sampling(x_train, n_samples, class_counter):
    x_buff = []
    imgCounter = class_counter
    # returns 7 classes with n_samples per class
    index = 0
    for i in range(1, 8):
        if i == 1:
            chunk = x_train[:1500]
#             print(len(chunk))
            x_buff = chunk
        else:
            chunk = x_train[index:index+n_samples]
            x_buff = np.concatenate((x_buff, chunk), axis=0)
            
        index += imgCounter[i]
#         print(index)
    y_train = [i for i in range(1, 8) for j in range(n_samples)]
    return x_buff, y_train

if do_sampling:
    x_train, y_train = sampling(x_train, 1500, ageRangeCounter)
    
print(len(x_train))

20960


## SVM

In [125]:
svm = SVC(verbose=True, C=1, tol=1e-4, )
svm.fit(x_train, y_train)
joblib.dump(svm, 'models/svm_{}.pkl'.format(featureMat))

[LibSVM]

['models/svm_pca200_sfs70_20k.mat.pkl']

In [None]:
svm = joblib.load('models/svm_{}.pkl'.format(featureMat))
svm.score(x_test, y_test)

## Linear SVM

In [119]:
linearsvm = LinearSVC(C=1, dual=False, verbose=True)
linearsvm.fit(x_train, y_train)
joblib.dump(linearsvm, 'models/linearsvm_{}.pkl'.format(featureMat))

[LibLinear]

['models/linearsvm_pca200_sfs70_20k.mat.pkl']

In [120]:
linearsvm = joblib.load('models/linearsvm_{}.pkl'.format(featureMat))
linearsvm.score(x_test, y_test)

0.32500000000000001

## LDA

In [110]:
lda = LinearDiscriminantAnalysis()
lda.fit(x_train, y_train)
joblib.dump(lda, 'models/lda_{}.pkl'.format(featureMat))

['models/lda_pca200_sfs70_20k.mat.pkl']

In [111]:
lda = joblib.load('models/lda_{}.pkl'.format(featureMat))
lda.score(x_test, y_test)

0.31785714285714284

## NN

In [135]:
nn = MLPClassifier(hidden_layer_sizes=(55, 25, 10), activation='relu', 
                   solver='adam', batch_size=512, verbose=True, max_iter=1000, tol=1e-4)
nn.fit(x_train, y_train)
joblib.dump(nn, 'models/nn_{}.pkl'.format(featureMat))

Iteration 1, loss = 2.04292366
Iteration 2, loss = 1.66437152
Iteration 3, loss = 1.37925131
Iteration 4, loss = 1.16960556
Iteration 5, loss = 1.05772447
Iteration 6, loss = 0.99471169
Iteration 7, loss = 0.95477586
Iteration 8, loss = 0.92493180
Iteration 9, loss = 0.90150246
Iteration 10, loss = 0.88185792
Iteration 11, loss = 0.86513983
Iteration 12, loss = 0.85097337
Iteration 13, loss = 0.83668688
Iteration 14, loss = 0.82605136
Iteration 15, loss = 0.81631131
Iteration 16, loss = 0.80474637
Iteration 17, loss = 0.79620061
Iteration 18, loss = 0.78577586
Iteration 19, loss = 0.77834267
Iteration 20, loss = 0.76968943
Iteration 21, loss = 0.76298387
Iteration 22, loss = 0.75588787
Iteration 23, loss = 0.74934411
Iteration 24, loss = 0.74298041
Iteration 25, loss = 0.73557676
Iteration 26, loss = 0.73040725
Iteration 27, loss = 0.72437337
Iteration 28, loss = 0.71812799
Iteration 29, loss = 0.71329762
Iteration 30, loss = 0.70916916
Iteration 31, loss = 0.70223304
Iteration 32, los

['models/nn_pca200_sfs100.mat.pkl']

In [134]:
nn = joblib.load('models/nn_{}.pkl'.format(featureMat))
nn.score(x_test, y_test)


0.36071428571428571

In [128]:
def one_off_prediction(x_test, y_test, model):
    oneOff = 0
    for index, item in enumerate(x_test):
        prediction = model.predict([x_test[index]])
        if abs(prediction[0] - int(y_test[index])) <= 1:
            oneOff += 1
    
    return oneOff/len(y_test)

def total_error(x_test, y_test, model):
    error = 0
    for index, item in enumerate(x_test):
        prediction = model.predict([x_test[index]])
        error += abs(prediction[0] - int(y_test[index]))
    return error

print(one_off_prediction(x_test, y_test, nn))
print(total_error(x_test, y_test, nn))

0.8
242
