In [None]:
import os
import numpy as np

from sklearn.pipeline import Pipeline 
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.externals import joblib
from sklearn.metrics import accuracy_score, log_loss

from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV


import datasets

In [None]:
X, y = datasets.load_X_y_augmented('SampleImages_F_augmented', n_samples=-1, dsize=(18, 22))
n_samples = X.shape[0]; n_channels = X.shape[3]
X = X[:, :, :, 2]

In [None]:
steps = [
    ('reduce_dim', PCA()), 
    ('classification', SVC(probability=True)),
]
pipe = Pipeline(steps=steps)

hyper_params = {
    'reduce_dim__n_components': [int(18*22*(i/10)) for i in range(1, 10)],
    'classification__C': [0.2*i for i in range(1, 30)],
}
searcher = RandomizedSearchCV(pipe, hyper_params, scoring='accuracy')

groupsplit = StratifiedShuffleSplit(n_splits=1, test_size=0.25, random_state=42)

In [None]:
splitRes = groupsplit.split(X_channel_c, y)
for train_index, test_index in splitRes:
    X_train, y_train = X_channel_c[train_index], y[train_index]
    X_test,  y_test  = X_channel_c[test_index],  y[test_index]

    X_train = X_train.reshape((X_train.shape[0], -1))
    X_test  = X_test.reshape((X_test.shape[0], -1))
        
    searcher.fit(X_train, y_train)
    print('The best parameters is: ')
    print(searcher.best_params_)
    print('The best score is: {:>.2f}'.format(searcher.best_score_))

    y_train_pred_proba = searcher.predict_proba(X_train)
    y_test_pred_proba = searcher.predict_proba(X_test)
    y_train_pred = searcher.predict(X_train)
    y_test_pred = searcher.predict(X_test)

    acc_train = accuracy_score(y_train_pred, y_train)
    acc_test  = accuracy_score(y_test_pred,  y_test )
    loss_train = log_loss(y_train, y_train_pred_proba) / y_train.shape[0]
    loss_test  = log_loss(y_test,  y_test_pred_proba) / y_test.shape[0]
        
    print_log = 'channel: {:>2}/{:>2} | acc_train: {:>.2%}, loss_train:{:>.2f} | acc_test: {:>.2%}, loss_test:{:>.2f}'.\
                    format(c, n_channels, acc_train, loss_train, acc_test, loss_test)
    print(print_log)