In [1]:
import numpy as np

def mean_predictions(probas):
    '''create a mean prediction from a list of probabilities'''
    return np.mean(probas, axis=0)
def max_voting(preds):
    idxs = np.argmax(preds, axis=1)
    return np.take_along_axis(preds, idxs[:, None], axis=1)

In [2]:
import scipy.stats as stats
def rank_mean(probas):
    ranked = []
    for i in range(probas.shape[1]):
        rank_data = stats.rankdata(probas[:, i])
        ranked.append(rank_data)
        ranked = np.column_stack(ranked)
    return np.mean(ranked, axis=1)

In [3]:
import numpy as np
from functools import partial 
from scipy.optimize import fmin 
from sklearn import metrics

class OptimizeAUC:
    def __init__(self):
        self.coef = 0
    
    def _auc(self, coef, X, y):
        '''Calculate the AUC value'''
        x_coef = X * coef
        predictions = np.sum(x_coef, axis=1)
        auc_score = metrics.roc_auc_score(y, predictions)
        return -1.0 * auc_score

    def fit(self, X, y):
        loss_partial = partial(self._auc, X=X, y=y)
        initial_coef = np.random.dirichlet(np.ones(X.shape[1]), size=1)
        self.coef = fmin(loss_partial, initial_coef, disp=True)
    
    def predict(self, X):
        x_coef = X * self.coef
        predictions = np.sum(x_coef, axis=1)
        return predictions
    

In [4]:
import xgboost as xgb
from sklearn.datasets import make_classification
from sklearn import ensemble
from sklearn import linear_model 
from sklearn import metrics
from sklearn import model_selection

X, y = make_classification(n_samples = 10000, n_features = 25)

xfold1, xfold2, yfold1, yfold2 = model_selection.train_test_split(X, y, test_size=0.5, stratify=y)

# fit models on fold1 and make predictions on fold2
logreg = linear_model.LogisticRegression()
rf = ensemble.RandomForestClassifier()
xgbc = xgb.XGBClassifier()

# fit all models on fold1 data
logreg.fit(xfold1, yfold1)
rf.fit(xfold1, yfold1)
xgbc.fit(xfold1, yfold1)

# predict on fold2 with all models
pred_logreg = logreg.predict_proba(xfold2)[:, 1]
pred_rf = rf.predict_proba(xfold2)[:, 1]
pred_xgbc = xgbc.predict_proba(xfold2)[:, 1]

# create a mean prediction
avg_preds = mean_predictions([pred_logreg, pred_rf, pred_xgbc]) / 3

# a 2d array of predictions
folds2_preds = np.column_stack((pred_logreg, pred_rf, pred_xgbc))

# calculate the AUC score
aucs_folds2 = []
for i in range(folds2_preds.shape[1]):
    auc = metrics.roc_auc_score(yfold2, folds2_preds[:, i])
    aucs_folds2.append(auc)
print(f"Fold-2: LR AUC = {aucs_folds2[0]}") 
print(f"Fold-2: RF AUC = {aucs_folds2[1]}") 
print(f"Fold-2: XGB AUC = {aucs_folds2[2]}") 






Fold-2: LR AUC = 0.9111297920269005
Fold-2: RF AUC = 0.961219704156374
Fold-2: XGB AUC = 0.9590301010033456


IndexError: list index out of range