In [1]:
from sklearn.model_selection import StratifiedKFold
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.svm import SVC

In [1]:
%run ./report.ipynb

In [1]:
default_params = {
    "activation": "relu",
    "alpha": 0.0001,
    "beta_1": 0.9,
    "beta_2": 0.999,
    "hidden_layer_sizes": (100,),
    "max_iter": 200,
    "n_iter_no_change": 10,
    "validation_fraction": 0.1,
    "warm_start": False,
}

from scipy.stats import mode
import numpy as np


def soft_voting(predictions_list, weights=None):
    if weights is None:
        weights = [1] * len(predictions_list)
    combined_probs = np.average(predictions_list, axis=0, weights=weights)
    return np.argmax(combined_probs, axis=1)


def majority_voting(predictions_list):
    return mode(predictions_list, axis=0)[0]


def get_model(params=default_params):
    model = params.pop("model")

    if model == "mlp":
        return MLPClassifier(**params)

    if model == "svm":
        return SVC(**params)


def classifier(base, train_size=False, params={}):
    model = get_model(params)

    if train_size != False:
        cross_validation = StratifiedShuffleSplit(n_splits=10, train_size=train_size)
    else:
        cross_validation = StratifiedKFold(n_splits=10, shuffle=True)

    report = Report()

    y = base[:, -1]
    X = base[:, 0:-1]

    for train_index, test_index in cross_validation.split(X, y):
        x_train_fold, x_test_fold = X[train_index], X[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        model.fit(x_train_fold, y_train_fold)
        y_pred = model.predict_proba(x_test_fold)
        report.register_report_fold(y=y_test_fold, y_pred=y_pred)

    print(report.report_folds[-1])
    report.print_report()
    return model, report


def classifier_ensembled(base1, base2):
    cross_validation = StratifiedKFold(n_splits=10, shuffle=True)

    best_params = {
        "activation": "tanh",
        "alpha": 2.6e-06,
        "beta_1": 0.999,
        "beta_2": 0.9,
        "hidden_layer_sizes": (50, 50),
        "max_iter": 64,
        "n_iter_no_change": 32,
        "validation_fraction": 0.0,
        "warm_start": True,
    }
    model1 = MLPClassifier(**best_params)

    y_pred1 = []
    for train_index, test_index in cross_validation.split(X, y):
        x_train_fold, x_test_fold = X[train_index], X[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        model1.fit(x_train_fold, y_train_fold)
        y_pred1.append(model1.predict_proba(x_test_fold))
    y_pred1 = [item for sublist in y_pred1 for item in sublist]

    best_params = {
        "model": "svm",
        "C": 2.128185297299607,
        "cache_size": 1940.3489583333333,
        "class_weight": "balanced",
        "gamma": 0.0033784208920037287,
        "max_iter": -1.0,
        "random_state": 1,
        "tol": 0.00030144315690791237,
    }
    model2 = SVC(**best_params)

    y_pred2 = []
    for train_index, test_index in cross_validation.split(X, y):
        x_train_fold, x_test_fold = X[train_index], X[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        model2.fit(x_train_fold, y_train_fold)
        y_pred2.append(model2.predict(x_test_fold))
    y_pred2 = [item for sublist in y_pred2 for item in sublist]

    combined_predictions = soft_voting([y_pred1, y_pred2])
    return combined_predictions