In [1]:
import utils
from supermodel import SVNSupermodel
from supermodel import test_supermodel
import numpy as np
import utils
import classifiers
from scipy.stats import bernoulli

In [2]:
SHAPE = (1000, 10)

def get_randomized_classifiers(ndim, C=100, nsamples = 100):
    clfs = utils.get_all_classifiers(C=C)
    X = np.random.normal(0, 1, (nsamples, ndim))
    y = np.random.randint(0, 2, nsamples)
    utils.train_clfs(clfs, X, y)
    return clfs

def generate_synthetic_test(shape=SHAPE, w_scale = 0.1):
    clfs = get_randomized_classifiers(shape[1])
    X = np.random.normal(0, 1, shape)
    margins = utils.get_margins(clfs, X)
    w = w_scale * np.random.random(len(clfs))
    p = utils.sigmoid(-np.dot(margins, w))
    y = np.zeros(shape[0])
    for idx, prob in enumerate(p):
        y[idx] = bernoulli.rvs(prob)
    return X, y

In [3]:
X, y = generate_synthetic_test()
DATASETS = [(X, y)]
N_FOLDS = 5
TIMES = 20

## l2

In [4]:
test_supermodel(args={"algo":"l2", "auc_window":None, "refit_after":False}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5082, std = 0.0417
Mean = 0.5311, std = 0.0400
Mean = 0.5072, std = 0.0367
Mean = 0.5031, std = 0.0362
Mean = 0.5239, std = 0.0383
Mean = 0.5196, std = 0.0375
Mean = 0.5080, std = 0.0401
Mean = 0.5102, std = 0.0399
Mean = 0.5257, std = 0.0397
Mean = 0.5189, std = 0.0369
Mean = 0.4846, std = 0.0378
Max classifier mean:
Mean = 0.5311, std = 0.0400
Superclassifier score
Mean = 0.5202, std = 0.0422


## l1

In [5]:
test_supermodel(args={"algo":"l1", "auc_window":None, "refit_after":False}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5046, std = 0.0410
Mean = 0.5298, std = 0.0387
Mean = 0.4970, std = 0.0430
Mean = 0.5070, std = 0.0376
Mean = 0.5178, std = 0.0460
Mean = 0.5112, std = 0.0446
Mean = 0.5056, std = 0.0399
Mean = 0.5049, std = 0.0401
Mean = 0.5177, std = 0.0388
Mean = 0.5174, std = 0.0419
Mean = 0.4868, std = 0.0424
Max classifier mean:
Mean = 0.5298, std = 0.0387
Superclassifier score
Mean = 0.5212, std = 0.0413


## l1 + refit

In [6]:
test_supermodel(args={"algo":"l1", "auc_window":None, "refit_after":True}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5143, std = 0.0396
Mean = 0.5311, std = 0.0357
Mean = 0.5195, std = 0.0348
Mean = 0.4944, std = 0.0383
Mean = 0.5208, std = 0.0332
Mean = 0.5183, std = 0.0336
Mean = 0.5142, std = 0.0396
Mean = 0.5146, std = 0.0389
Mean = 0.5303, std = 0.0343
Mean = 0.5159, std = 0.0364
Mean = 0.4757, std = 0.0352
Max classifier mean:
Mean = 0.5311, std = 0.0357
Superclassifier score
Mean = 0.5162, std = 0.0394


## l1 + auc_window=0.05

In [7]:
test_supermodel(args={"algo":"l1", "auc_window":0.05, "refit_after":False}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5075, std = 0.0375
Mean = 0.5236, std = 0.0365
Mean = 0.5146, std = 0.0418
Mean = 0.4995, std = 0.0340
Mean = 0.5172, std = 0.0365
Mean = 0.5118, std = 0.0371
Mean = 0.5099, std = 0.0361
Mean = 0.5085, std = 0.0365
Mean = 0.5206, std = 0.0357
Mean = 0.5181, std = 0.0374
Mean = 0.4860, std = 0.0385
Max classifier mean:
Mean = 0.5236, std = 0.0365
Superclassifier score
Mean = 0.5165, std = 0.0385


## l1 + auc_window=0.05 + refit

In [8]:
test_supermodel(args={"algo":"l1", "auc_window":0.05, "refit_after":True}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5122, std = 0.0364
Mean = 0.5378, std = 0.0341
Mean = 0.5137, std = 0.0369
Mean = 0.4930, std = 0.0371
Mean = 0.5248, std = 0.0331
Mean = 0.5218, std = 0.0330
Mean = 0.5107, std = 0.0364
Mean = 0.5128, std = 0.0365
Mean = 0.5317, std = 0.0354
Mean = 0.5146, std = 0.0364
Mean = 0.4722, std = 0.0344
Max classifier mean:
Mean = 0.5378, std = 0.0341
Superclassifier score
Mean = 0.5261, std = 0.0383


## l1 + positive

In [9]:
test_supermodel(args={"algo":"l1+", "auc_window":None, "refit_after":False}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5119, std = 0.0343
Mean = 0.5260, std = 0.0370
Mean = 0.5120, std = 0.0350
Mean = 0.4987, std = 0.0367
Mean = 0.5240, std = 0.0389
Mean = 0.5211, std = 0.0401
Mean = 0.5130, std = 0.0349
Mean = 0.5115, std = 0.0350
Mean = 0.5241, std = 0.0356
Mean = 0.5228, std = 0.0420
Mean = 0.4848, std = 0.0385
Max classifier mean:
Mean = 0.5260, std = 0.0370
Superclassifier score
Mean = 0.5075, std = 0.0409


## l1 + positive + refit

In [10]:
test_supermodel(args={"algo":"l1+", "auc_window":None, "refit_after":True}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5112, std = 0.0361
Mean = 0.5319, std = 0.0335
Mean = 0.5183, std = 0.0360
Mean = 0.4945, std = 0.0349
Mean = 0.5214, std = 0.0352
Mean = 0.5184, std = 0.0346
Mean = 0.5107, std = 0.0353
Mean = 0.5119, std = 0.0361
Mean = 0.5283, std = 0.0332
Mean = 0.5177, std = 0.0399
Mean = 0.4794, std = 0.0363
Max classifier mean:
Mean = 0.5319, std = 0.0335
Superclassifier score
Mean = 0.5060, std = 0.0410


## l1 + positive + auc_window=0.05

In [11]:
test_supermodel(args={"algo":"l1+", "auc_window":0.05, "refit_after":False}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5062, std = 0.0345
Mean = 0.5282, std = 0.0380
Mean = 0.5004, std = 0.0370
Mean = 0.5005, std = 0.0395
Mean = 0.5229, std = 0.0378
Mean = 0.5182, std = 0.0387
Mean = 0.5074, std = 0.0348
Mean = 0.5084, std = 0.0347
Mean = 0.5225, std = 0.0366
Mean = 0.5156, std = 0.0381
Mean = 0.4828, std = 0.0392
Max classifier mean:
Mean = 0.5282, std = 0.0380
Superclassifier score
Mean = 0.5113, std = 0.0392


## l1 + positive + auc_window=0.05 + refit

In [12]:
test_supermodel(args={"algo":"l1", "auc_window":0.05, "refit_after":True}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5153, std = 0.0364
Mean = 0.5361, std = 0.0327
Mean = 0.5191, std = 0.0351
Mean = 0.4947, std = 0.0301
Mean = 0.5224, std = 0.0330
Mean = 0.5192, std = 0.0344
Mean = 0.5141, std = 0.0356
Mean = 0.5158, std = 0.0350
Mean = 0.5320, std = 0.0358
Mean = 0.5144, std = 0.0357
Mean = 0.4772, std = 0.0365
Max classifier mean:
Mean = 0.5361, std = 0.0327
Superclassifier score
Mean = 0.5232, std = 0.0391


# Using Robust Logistic Regression

## robust + l2

In [13]:
test_supermodel(args={"algo":"l2", "auc_window":None, "refit_after":False, "superclassifier":"robust"}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5053, std = 0.0414
Mean = 0.5276, std = 0.0344
Mean = 0.5082, std = 0.0335
Mean = 0.5023, std = 0.0381
Mean = 0.5214, std = 0.0341
Mean = 0.5171, std = 0.0313
Mean = 0.5054, std = 0.0399
Mean = 0.5061, std = 0.0391
Mean = 0.5211, std = 0.0330
Mean = 0.5209, std = 0.0354
Mean = 0.4828, std = 0.0395
Max classifier mean:
Mean = 0.5276, std = 0.0344
Superclassifier score
Mean = 0.5108, std = 0.0335


## robust + l1

In [14]:
test_supermodel(args={"algo":"l1", "auc_window":None, "refit_after":False, "superclassifier":"robust"}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5061, std = 0.0394
Mean = 0.5285, std = 0.0419
Mean = 0.5033, std = 0.0376
Mean = 0.4967, std = 0.0442
Mean = 0.5202, std = 0.0418
Mean = 0.5143, std = 0.0394
Mean = 0.5075, std = 0.0372
Mean = 0.5076, std = 0.0392
Mean = 0.5211, std = 0.0376
Mean = 0.5147, std = 0.0354
Mean = 0.4830, std = 0.0404
Max classifier mean:
Mean = 0.5285, std = 0.0419
Superclassifier score
Mean = 0.5084, std = 0.0368


## robust + l1 + auc_window=0.05

In [15]:
test_supermodel(args={"algo":"l1", "auc_window":0.05, "refit_after":False, "superclassifier":"robust"}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5093, std = 0.0364
Mean = 0.5260, std = 0.0393
Mean = 0.5092, std = 0.0385
Mean = 0.4997, std = 0.0409
Mean = 0.5206, std = 0.0339
Mean = 0.5153, std = 0.0352
Mean = 0.5117, std = 0.0358
Mean = 0.5110, std = 0.0370
Mean = 0.5207, std = 0.0368
Mean = 0.5203, std = 0.0387
Mean = 0.4861, std = 0.0412
Max classifier mean:
Mean = 0.5260, std = 0.0393
Superclassifier score
Mean = 0.5165, std = 0.0364


## robust + l1 + refit

In [16]:
test_supermodel(args={"algo":"l1", "auc_window":None, "refit_after":True, "superclassifier":"robust"}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5142, std = 0.0331
Mean = 0.5378, std = 0.0321
Mean = 0.5217, std = 0.0385
Mean = 0.4930, std = 0.0300
Mean = 0.5287, std = 0.0360
Mean = 0.5260, std = 0.0355
Mean = 0.5134, std = 0.0328
Mean = 0.5152, std = 0.0331
Mean = 0.5359, std = 0.0331
Mean = 0.5185, std = 0.0417
Mean = 0.4737, std = 0.0338
Max classifier mean:
Mean = 0.5378, std = 0.0321
Superclassifier score
Mean = 0.5185, std = 0.0372


## robust + l1 + auc_window=0.05 + refit

In [17]:
test_supermodel(args={"algo":"l1", "auc_window":0.05, "refit_after":True, "superclassifier":"robust"}, datasets=DATASETS, n_folds=N_FOLDS, times=TIMES)

<class 'tuple'>
Dataset shape: (1000, 10), (1000,)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 Result samples = 100
Classifiers results:
Mean = 0.5154, std = 0.0333
Mean = 0.5383, std = 0.0310
Mean = 0.5235, std = 0.0333
Mean = 0.4967, std = 0.0353
Mean = 0.5228, std = 0.0318
Mean = 0.5206, std = 0.0332
Mean = 0.5142, std = 0.0330
Mean = 0.5163, std = 0.0333
Mean = 0.5366, std = 0.0324
Mean = 0.5169, std = 0.0355
Mean = 0.4712, std = 0.0327
Max classifier mean:
Mean = 0.5383, std = 0.0310
Superclassifier score
Mean = 0.5043, std = 0.0310
