In [30]:
import numpy as np

def mean_predictions(probas):
    return np.mean(probas, axis=1)

def max_voting(preds):
    idxs=np.argmax(preds, axis=1)
    return np.take_along_axis(preds, idxs[:, None], axis=1)

probs=np.random.rand(4,5)
preds=np.random.randint(0,4,(4,5))

print(mean_predictions(probs))
print(max_voting(preds))

[0.63846667 0.35799425 0.58598419 0.55709848]
[[3]
 [1]
 [3]
 [3]]


In [34]:
from scipy import stats

def rank_mean(probas):
    ranked=[]
    for i in range(probas.shape[1]):
        rank_data=stats.rankdata(probas[:, i])
        ranked.append(rank_data)
    ranked=np.column_stack(ranked)
    return np.mean(ranked, axis=1)

probs=np.random.rand(4,5)
print(probs)
rank_mean(probs)

[[0.4595349  0.5283434  0.83220901 0.2237004  0.8352632 ]
 [0.85293325 0.18130427 0.3596681  0.50345398 0.70738105]
 [0.22695863 0.26101784 0.85074546 0.27082399 0.46517596]
 [0.23093361 0.80933674 0.64141253 0.8078626  0.85931651]]


array([2.6, 2.2, 2. , 3.2])

In [46]:
rankedd=[]
for i in range(probs.shape[1]):
    rank_data=stats.rankdata(probs[:, i])
    rankedd.append(rank_data)
    print(i, rank_data)
    ranked=np.column_stack(ranked)

print(ranked)
np.mean(ranked, axis=1)

0 [3. 4. 1. 2.]
1 [3. 1. 2. 4.]
2 [3. 1. 4. 2.]
3 [1. 3. 2. 4.]
4 [3. 2. 1. 4.]
[[3. 4. 1. 2.]
 [3. 1. 2. 4.]
 [3. 1. 4. 2.]
 [1. 3. 2. 4.]
 [3. 2. 1. 4.]]


array([2.5, 2.5, 2.5, 2.5, 2.5])

In [57]:
import numpy as np

from functools import partial
from scipy.optimize import fmin
from sklearn import metrics


class OptimizeAUC:
    def __init__(self):
        self.coef_=0
        
    def _auc(self, coef, X, y):
        x_coef=X*coef
        preds=np.sum(x_coef, axis=1)
        auc_score = metrics.roc_auc_score(y, preds)
        return -1.0*auc_score
    
    def fit(self, X, y):
        loss_partial=partial(self._auc, X=X, y=y)
        initial_coef=np.random.dirichlet(np.ones(X.shape[1]), size=1)
        self.coef_=fmin(loss_partial, initial_coef, disp=True)
        
    def predict(self, X):
        x_coef=X*self.coef_
        preds=np.sum(x_coef, axis=1)
        return preds

In [63]:
import xgboost as xgb
from sklearn.datasets import make_classification
from sklearn import ensemble, linear_model, metrics, model_selection

X,y=make_classification(n_samples=10000, n_features=25)
xfold1, xfold2, yfold1, yfold2=model_selection.train_test_split(X, y, test_size=0.5, stratify=y)


# fit on fold1, pred on fold2
logreg=linear_model.LogisticRegression()
rf=ensemble.RandomForestClassifier()
xgb_clf=xgb.XGBClassifier(use_label_encoder=False)

logreg.fit(xfold1, yfold1)
rf.fit(xfold1, yfold1)
xgb_clf.fit(xfold1, yfold1)

pred_logreg, pred_rf, pred_xgb=logreg.predict_proba(xfold2)[:, 1], rf.predict_proba(xfold2)[:, 1], xgb_clf.predict_proba(xfold2)[:, 1]
avg_pred=(pred_logreg+pred_rf+pred_xgb)/3

fold2_preds=np.column_stack((pred_logreg, pred_rf, pred_xgb, avg_pred))

aucs_fold2=[]
for i in range(fold2_preds.shape[1]):
    auc=metrics.roc_auc_score(yfold2, fold2_preds[:, i])
    aucs_fold2.append(auc)
    
print(f"Fold-2: LR AUC = {aucs_fold2[0]}")
print(f"Fold-2: RF AUC = {aucs_fold2[1]}")
print(f"Fold-2: XGB AUC = {aucs_fold2[2]}")
print(f"Fold-2: Average Pred AUC = {aucs_fold2[3]}")


# fit on fold2, pred on fold1
logreg=linear_model.LogisticRegression()
rf=ensemble.RandomForestClassifier()
xgb_clf=xgb.XGBClassifier(use_label_encoder=False)

logreg.fit(xfold2, yfold2)
rf.fit(xfold2, yfold2)
xgb_clf.fit(xfold2, yfold2)

pred_logreg, pred_rf, pred_xgb=logreg.predict_proba(xfold1)[:, 1], rf.predict_proba(xfold1)[:, 1], xgb_clf.predict_proba(xfold1)[:, 1]
avg_pred=(pred_logreg+pred_rf+pred_xgb)/3

fold1_preds=np.column_stack((pred_logreg, pred_rf, pred_xgb, avg_pred))

aucs_fold1=[]
for i in range(fold1_preds.shape[1]):
    auc=metrics.roc_auc_score(yfold1, fold1_preds[:, i])
    aucs_fold1.append(auc)
    
print(f"Fold-1: LR AUC = {aucs_fold1[0]}")
print(f"Fold-1: RF AUC = {aucs_fold1[1]}")
print(f"Fold-1: XGB AUC = {aucs_fold1[2]}")
print(f"Fold-1: Average Pred AUC = {aucs_fold1[3]}\n")



opt=OptimizeAUC()
opt.fit(fold1_preds[:, :-1], yfold2)
opt_preds_fold2=opt.predict(fold2_preds[:, :-1])
auc=metrics.roc_auc_score(yfold2, opt_preds_fold2)
print(f"Optimized AUC, Fold 2 = {auc}")
print(f"Coefficients = {opt.coef_}\n")

opt=OptimizeAUC()
opt.fit(fold2_preds[:, :-1], yfold2)
opt_preds_fold1=opt.predict(fold1_preds[:, :-1])
auc=metrics.roc_auc_score(yfold1, opt_preds_fold1)
print(f"Optimized AUC, Fold 1 = {auc}")
print(f"Coefficients = {opt.coef_}")

Fold-2: LR AUC = 0.9643058971557743
Fold-2: RF AUC = 0.98520839053337
Fold-2: XGB AUC = 0.9856955108451269
Fold-2: Average Pred AUC = 0.9834028693778364
Fold-1: LR AUC = 0.9610986951031649
Fold-1: RF AUC = 0.9858444709404615
Fold-1: XGB AUC = 0.9878575922288588
Fold-1: Average Pred AUC = 0.9841047898270654

Optimization terminated successfully.
         Current function value: -0.514162
         Iterations: 56
         Function evaluations: 114
Optimized AUC, Fold 2 = 0.9864641513370569
Coefficients = [-0.05289877  0.35647758  0.83209972]

Optimization terminated successfully.
         Current function value: -0.986797
         Iterations: 42
         Function evaluations: 92
Optimized AUC, Fold 1 = 0.9878161522023374
Coefficients = [-0.01362855  0.21467562  1.01087033]
