In [1]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.ensemble import BaggingClassifier


# Critere de performance
def compute_pred_score(y_true, y_pred):
    y_pred_unq = np.unique(y_pred)
    for i in y_pred_unq:
        if (i != -1) & (i!= 1) & (i!= 0):
            raise ValueError('The predictions can contain only -1, 1, or 0!')
    y_comp = y_true * y_pred
    score = float(10*np.sum(y_comp == -1) + np.sum(y_comp == 0))
    score /= y_comp.shape[0]
    return score

X_train_fname = 'training_templates.csv'
y_train_fname = 'training_labels.txt'
X_test_fname = 'testing_templates.csv'
X_train = pd.read_csv(X_train_fname, sep=',', header=None).values
X_test = pd.read_csv(X_test_fname,  sep=',', header=None).values
y_train = np.loadtxt(y_train_fname, dtype=np.int)


def uncerAjust(y_pred, y_pred_pro, threshold=0.9):
    temps = y_pred
    for i in xrange(len(y_pred)):
        if (y_pred_pro[i][0] < threshold) and (y_pred_pro[i][1] < threshold):
            temps[i] = 0
    return temps


def scorer(estimator, X, y):
    y_pred = estimator.predict(X)
    return 1-compute_pred_score(y, y_pred)


X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(X_train, y_train,
                                                            train_size=0.2, test_size=0.05, random_state=42)
pca = PCA(svd_solver='randomized', n_components=128, whiten=True)
X_train_pca = pca.fit_transform(X_train_1)


class MyEstimator(BaggingClassifier):
    def __init__(self,
                 base_estimator=None,
                 n_estimators=10,
                 max_samples=1.0,
                 max_features=1.0,
                 bootstrap=True,
                 bootstrap_features=False,
                 oob_score=False,
                 warm_start=False,
                 n_jobs=1,
                 random_state=None,
                 verbose=0, threshold=0.5):

        super(MyEstimator, self).__init__(
            base_estimator=base_estimator,
            n_estimators=n_estimators,
            max_samples=max_samples,
            max_features=max_features,
            bootstrap=bootstrap,
            bootstrap_features=bootstrap_features,
            oob_score=oob_score,
            warm_start=warm_start,
            n_jobs=n_jobs,
            random_state=random_state,
            verbose=verbose)
        self.threshold = threshold

    def predict(self, X):
        y_pred = BaggingClassifier.predict(self, X)
        y_pred_pro = BaggingClassifier.predict_proba(self, X)
        return uncerAjust(y_pred, y_pred_pro, self.threshold)

    
base = MLPClassifier(hidden_layer_sizes=(10,), learning_rate='adaptive')
parameters = {'n_estimators': xrange(5, 31, 5)}

myEst = MyEstimator(base_estimator=base, n_jobs=-1, max_features=0.7, max_samples=0.5)
clf = GridSearchCV(myEst, parameters, scoring=scorer)
clf.fit(X_train_pca, y_train)


GridSearchCV(cv=None, error_score='raise',
       estimator=MyEstimator(base_estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(260,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
      ...10, n_jobs=-1, oob_score=False,
      random_state=None, threshold=0.5, verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'n_estimators': xrange(5, 35, 5)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=<function scorer at 0x1194307d0>, verbose=0)

In [4]:
ccc = clf.best_estimator_
print ccc

MyEstimator(base_estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(260,), learning_rate='adaptive',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False),
      bootstrap=True, bootstrap_features=False, max_features=1.0,
      max_samples=1.0, n_estimators=15, n_jobs=-1, oob_score=False,
      random_state=None, threshold=0.5, verbose=0, warm_start=False)
