In [None]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif

In [None]:
# Critere de performance
def compute_pred_score(y_true, y_pred):
    y_pred_unq = np.unique(y_pred)
    for i in y_pred_unq:
        if (i != -1) & (i!= 1) & (i!= 0):
            raise ValueError('The predictions can contain only -1, 1, or 0!')
    y_comp = y_true * y_pred
    score = float(10*np.sum(y_comp == -1) + np.sum(y_comp == 0))
    score /= y_comp.shape[0]
    return score

X_train_fname = 'training_templates.csv'
y_train_fname = 'training_labels.txt'
X_test_fname = 'testing_templates.csv'
X_train = pd.read_csv(X_train_fname, sep=',', header=None).values
X_test = pd.read_csv(X_test_fname,  sep=',', header=None).values
y_train = np.loadtxt(y_train_fname, dtype=np.int)


def uncerAjust(y_pred, y_pred_pro, threshold=0.9):
    temps = y_pred
    for i in xrange(len(y_pred)):
        if y_pred_pro[i][1] < threshold and y_pred_pro[i][0] < threshold:
            temps[i] = 0
    return temps


def scorer(estimator, X, y):
    y_pred = estimator.predict(X)
    y_pred_pro = estimator.predict_proba(X)
    y_pred = uncerAjust(y_pred, y_pred_pro)
    return 1-compute_pred_score(y, y_pred)


nbrs = NearestNeighbors(n_neighbors=10, algorithm='ball_tree').fit(X_train)
distances, indices = nbrs.kneighbors(X_test)
index = np.ravel(indices)

In [None]:
print index.shape
index_unique = np.unique(index)
X_knn = X_train[index_unique]
y_knn = y_train[index_unique]
fsl = SelectKBest(f_classif, k=120)
X_new = fsl.fit_transform(X_knn, y_knn)
X_test_fsl = fsl.transform(X_test)
print X_knn.shape, y_knn.shape

In [None]:
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(X_new, y_knn,
                                                            train_size=0.95, test_size=0.05, random_state=2)

In [None]:
pca = PCA(svd_solver='randomized', n_components=120, whiten=True)
X_train_pca = pca.fit_transform(X_train_1)
print 1

clf = BaggingClassifier(MLPClassifier(hidden_layer_sizes=(100, 100, 100)),
                        n_estimators=30, max_features=0.7, max_samples=0.5, n_jobs=-1)
clf.fit(X_train_pca, y_train_1)

In [None]:
threshold = 0
X_test_pca = pca.transform(X_test_1)
y_pred = clf.predict(X_test_pca)
y_pred_pro = clf.predict_proba(X_test_pca)

score = 1
for i in np.arange(0, 1, 0.01):
    y_pred_1 = uncerAjust(y_pred, y_pred_pro, i)
    temp = compute_pred_score(y_pred_1, y_test_1)
    if temp < score:
        score = temp
        threshold = i
print 'thre:%f' %threshold
print 'score:%f' %score
X_test_pca1 = pca.transform(fsl.transform(X_test))
y_pred = clf.predict(X_test_pca1)
y_pred_pro = clf.predict_proba(X_test_pca1)
y_pred = uncerAjust(y_pred, y_pred_pro, 0.697)
np.savetxt('y_pred_bagging.txt', y_pred, fmt='%d')

In [None]:
l = list(y_pred)
print l.count(0)