In [28]:
import xgboost as xgb
import pandas as pd
import numpy as np
import pickle
import os.path
import scipy
import random
from loadfft import getData
from sklearn import cross_validation
from sklearn.ensemble.base import BaseEnsemble
from sklearn.calibration import CalibratedClassifierCV
from sklearn.base import clone
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from blend_nn import NeuralNet

results = pd.io.pickle.read_pickle('results/results_df_last_fft.pkl')
models_to_use = results[(results['best_score']<0.07) | ((results['best_score']<0.09)&(results['bst:eta']>0.3))]
models_to_use=models_to_use.sort(['best_score'])
models_to_use

Unnamed: 0,best_ntrees,best_score,bst:colsample_bytree,bst:eta,bst:max_depth,bst:subsample,early_stopping,n_round,ntrees,score
6,134,0.059785,0.943942,0.115512,18,0.647034,100,800,234,0.061653
0,254,0.066324,0.858151,0.203779,17,0.585147,100,800,354,0.066791
46,147,0.066324,0.79923,0.153808,13,0.536295,100,800,247,0.071929
113,152,0.066324,0.780146,0.097607,16,0.755971,100,800,252,0.069594
89,88,0.066791,0.760885,0.204191,18,0.575165,100,800,188,0.070528
132,103,0.067258,0.820301,0.138812,28,0.656281,100,800,203,0.071462
62,73,0.067725,0.743723,0.126824,25,0.510473,100,800,173,0.072863
87,50,0.067725,0.732633,0.17233,13,0.760958,100,800,150,0.069594
18,199,0.068192,0.773489,0.140193,12,0.575883,100,800,299,0.071929
131,59,0.068192,0.968865,0.164062,16,0.510009,100,800,159,0.071462


In [2]:
trX,trY,teX,teY = getData(oh=0)
l=len(trX)
cut=l*0

vaX=trX[0:cut,]
vaY=trY[:cut]
trX=trX[cut:,:]
trY=trY[cut:]

2224
3106
536
759


In [4]:
class ModifiedXGBClassifier(xgb.XGBClassifier):
    def __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, 
                 silent=True, objective="reg:linear", max_features=1, subsample = 1):
        self.max_depth = max_depth
        self.learning_rate = learning_rate
        self.silent = silent
        self.n_estimators = n_estimators
        self.objective = objective
        self.max_features = max_features
        self.subsample = subsample
        self._Booster = xgb.Booster()
        
    def get_params(self, deep=True):
        return {'max_depth': self.max_depth,
                'learning_rate': self.learning_rate,
                'n_estimators': self.n_estimators,
                'silent': self.silent,
                'objective': self.objective,
                'max_features' : self.max_features,
                'subsample' : self.subsample
                }
        
    def get_xgb_params(self):
        return {'eta': self.learning_rate,
                'max_depth': self.max_depth,
                'silent': 1 if self.silent else 0,
                'objective': self.objective,
                'bst:subsample': self.subsample,
                'bst:colsample_bytree': self.max_features
                }

In [4]:
class BlendedModel(BaseEnsemble):
    def __init__(self, models=[], blending='average'):
        self.models = models
        self.blending = blending
        self.logR = LogisticRegression()
        self.logRT= LogisticRegression()
        self.nn=NeuralNet(len(models)*4)
        if self.blending not in ['average', 'most_confident']:
            raise Exception('Wrong blending method')
        
    def fit(self, X, y):
        for model in self.models:
            print 'Training model :'
            print model.get_params()
            model.fit(X, y)                
        return self
    
    def fitLog(self,X,y,mod=0):
        if mod==0:
            preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
            features=np.array([np.array([preds[j][i] for j in range(len(self.models))]).flatten() for i in range(len(X))])
            #features = preprocessing.scale(features)
            self.logR.fit(features, y)
        elif mod==1:
            preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
            features=np.array([np.array([[math.log(preds[j][i][k]/(1-preds[j][i][k])) for k in range(4)] for j in range(len(self.models))]).flatten() for i in range(len(X))])
            #features = preprocessing.scale(features)
            self.logRT.fit(features, y)
        return self
    def fitNN(self,X,y):
        preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
        features=np.array([np.array([preds[j][i] for j in range(len(self.models))]).flatten() for i in range(len(X))])
        #features = preprocessing.scale(features)
        self.nn.fit(features,y)
        return self
    def predict_NNproba(self,X):
        preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
        features=np.array([np.array([preds[j][i] for j in range(len(self.models))]).flatten() for i in range(len(X))])
        #features = preprocessing.scale(features)
        return self.nn.predict_proba(features)
    def predict_proba(self, X):
        preds = np.array(
                    [model.predict_proba(X) for model in self.models]
                )
        if self.blending == 'average':
            return np.mean(preds , axis=0 )
        elif self.blending == 'most_confident':
            def dirac_weights(entropies):
                w = (entropies == np.min(entropies)).astype(float)
                return w/np.sum(w)
            def shannon_entropy(l):
                l = [min(max(1e-5,p),1-1e-5) for p in l]
                l = np.array(l)/sum(l)
                return sum([-p*math.log(p) for p in l])
            shannon_entropy_array = lambda l : np.apply_along_axis(shannon_entropy, 1, l)
            entropies = np.array([shannon_entropy_array(pred) for pred in preds])
            weights = np.apply_along_axis(dirac_weights, 0, entropies)
            return np.sum(np.multiply(weights.T, preds.T).T, axis = 0)
    
    def predict_Logproba(self, X,mod=0):
        if mod==0:
            preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
            features=np.array([np.array([preds[j][i] for j in range(len(self.models))]).flatten() for i in range(len(X))])
            #features = preprocessing.scale(features)
            preds=self.logR.predict_proba(features)
            return preds
        elif mod==1:
            preds = np.array(
                    [model.predict_proba(X) for model in self.models]
                )
            features=np.array([np.array([[math.log(preds[j][i][k]/(1-preds[j][i][k])) for k in range(4)] for j in range(len(self.models))]).flatten() for i in range(len(X))])
            #features = preprocessing.scale(features)
            preds=self.logRT.predict_proba(features)
            return preds

In [29]:
models = []
for j,row in enumerate(models_to_use.iterrows()):
    print j
    hyperparams = dict(row[1])
    models.append(ModifiedXGBClassifier(
                            max_depth=hyperparams['bst:max_depth'], 
                            learning_rate=hyperparams['bst:eta'], 
                            n_estimators=int(hyperparams['best_ntrees']),
                            max_features=hyperparams['bst:colsample_bytree'],
                            subsample=hyperparams['bst:subsample'],
                            silent=True, 
                            objective='multi:softprob')
                  )
models


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26


[ModifiedXGBClassifier(learning_rate=0.11551197421096039, max_depth=18.0,
            max_features=0.94394155601070184, n_estimators=134,
            objective='multi:softprob', silent=True,
            subsample=0.64703405479748977),
 ModifiedXGBClassifier(learning_rate=0.20377858764434154, max_depth=17.0,
            max_features=0.85815076222488917, n_estimators=254,
            objective='multi:softprob', silent=True,
            subsample=0.58514739013739292),
 ModifiedXGBClassifier(learning_rate=0.15380842512268544, max_depth=13.0,
            max_features=0.79923049310131822, n_estimators=147,
            objective='multi:softprob', silent=True,
            subsample=0.53629487294143696),
 ModifiedXGBClassifier(learning_rate=0.09760719357142815, max_depth=16.0,
            max_features=0.78014637555483268, n_estimators=152,
            objective='multi:softprob', silent=True,
            subsample=0.75597110990741156),
 ModifiedXGBClassifier(learning_rate=0.20419134525467328, ma

In [6]:
bmdl = BlendedModel(models)
bmdl.fit(trX,trY)

Training model :
{'silent': True, 'subsample': 0.64703405479748977, 'learning_rate': 0.11551197421096039, 'n_estimators': 20, 'max_features': 0.94394155601070184, 'objective': 'multi:softprob', 'max_depth': 18.0}
Training model :
{'silent': True, 'subsample': 0.58514739013739292, 'learning_rate': 0.20377858764434154, 'n_estimators': 20, 'max_features': 0.85815076222488917, 'objective': 'multi:softprob', 'max_depth': 17.0}


  data = np.array(mat.reshape(mat.size), dtype=np.float32)


BlendedModel(blending='average',
       models=[ModifiedXGBClassifier(learning_rate=0.11551197421096039, max_depth=18.0,
           max_features=0.94394155601070184, n_estimators=20,
           objective='multi:softprob', silent=True,
           subsample=0.64703405479748977), ModifiedXGBClassifier(learning_rate=0.20377858764434154, max_depth=17.0,
           max_features=0.85815076222488917, n_estimators=20,
           objective='multi:softprob', silent=True,
           subsample=0.58514739013739292)])

In [31]:
print("score des differents modeles")
for model in models:
    print(np.mean(np.argmax(model.predict_proba(teX), axis=1) == teY))
    

score des differents modeles
0.85987856142
0.862680990191


In [33]:
predictTe=bmdl.predict_proba(teX)
scoreTe=np.mean(np.argmax(predictTe, axis=1) == teY) 
print("score sur la moyenne des differents modeles")
print(scoreTe)


score sur la moyenne des differents modeles
0.865016347501


In [27]:
print("blend logistic regression")
bmdl.fitLog(vaX,vaY)
predictTeL=bmdl.predict_Logproba(teX)
scoreTeL=np.mean(np.argmax(predictTeL, axis=1) == teY) 
print("score blend logistic regression")
print(scoreTeL)
predictVaL=bmdl.predict_Logproba(vaX)
scoreVaL=np.mean(np.argmax(predictVaL, axis=1) == vaY) 
print("score blend logistic regression val test")
print(scoreVaL)

blend logistic regression
score blend logistic regression
0.870154133582
score blend logistic regression val test
0.865732632808


In [11]:
import math
print("blend logistic regression with log(x/(1-x)")
bmdl.fitLog(vaX,vaY,mod=1)
predictTeL=bmdl.predict_Logproba(teX,mod=1)
scoreTeL=np.mean(np.argmax(predictTeL, axis=1) == teY) 
print("score blend logistic regression with log(x/(1-x)")
print(scoreTeL)

blend logistic regression with log(x/(1-x)
score blend logistic regression with log(x/(1-x)
0.875758991126


In [24]:
import math
print("blend logistic regression with log(x/(1-x)")
predictVaL=bmdl.predict_Logproba(vaX,mod=1)
scoreVaL=np.mean(np.argmax(predictVaL, axis=1) == vaY) 
print("score blend logistic regression with log(x/(1-x) val test")
print(scoreVaL)

blend logistic regression with log(x/(1-x)
score blend logistic regression with log(x/(1-x) val test
0.86660828955


In [21]:
print("blend nn")
bmdl.fitNN(vaX,vaY)

blend nn
1000 3 100 0.0 0.0
3426
50 3 0.92119089317 0.332678195701
100 3 0.923234092236 0.331736259725
150 3 0.907764156451 0.383622712641
200 3 0.913018096906 0.453299786582
250 3 0.906304728546 0.497605131131
300 3 0.926152948044 0.35361344257
350 3 0.906012842966 0.486000995051
400 3 0.927904261529 0.294094355944
450 3 0.923817863398 0.35618066646
500 3 0.934033858727 0.285077570489
550 3 0.933158201985 0.355116227444
600 3 0.925277291302 0.388622822674
650 3 0.92819614711 0.514564685446
700 3 0.922358435493 0.418172264738
750 3 0.936952714536 0.310220887796
800 3 0.921774664332 0.485793205803
850 3 0.9176882662 0.395951754026
900 3 0.934617629889 0.314578548014
950 3 0.92469352014 0.396100629105
1000 3 0.917980151781 0.424617771886


BlendedModel(blending='average',
       models=[ModifiedXGBClassifier(learning_rate=0.11551197421096039, max_depth=18.0,
           max_features=0.94394155601070184, n_estimators=20,
           objective='multi:softprob', silent=True,
           subsample=0.64703405479748977), ModifiedXGBClassifier(learning_rate=0.20377858764434154, max_depth=17.0,
           max_features=0.85815076222488917, n_estimators=20,
           objective='multi:softprob', silent=True,
           subsample=0.58514739013739292)])

In [22]:
predictTeL=bmdl.predict_NNproba(teX)
predictTeL
scoreTeL=np.mean(np.argmax(predictTeL, axis=1) == teY) 
print("score blend nn")
print(scoreTeL)

score blend nn
0.846333489024


In [23]:
predictVaL=bmdl.predict_NNproba(vaX)

scoreVaL=np.mean(np.argmax(predictVaL, axis=1) == vaY) 
print("score blend nn val set")
print(scoreVaL)



score blend nn val set
0.917980151781


In [35]:
models_to_use

Unnamed: 0,best_ntrees,best_score,bst:colsample_bytree,bst:eta,bst:max_depth,bst:subsample,early_stopping,n_round,ntrees,score
6,134,0.059785,0.943942,0.115512,18,0.647034,100,800,234,0.061653
0,254,0.066324,0.858151,0.203779,17,0.585147,100,800,354,0.066791


In [30]:
model1=models[0]

In [31]:
model1

ModifiedXGBClassifier(learning_rate=0.11551197421096039, max_depth=18.0,
           max_features=0.94394155601070184, n_estimators=134,
           objective='multi:softprob', silent=True,
           subsample=0.64703405479748977)

In [32]:
model1.fit(trX,trY)

ModifiedXGBClassifier(learning_rate=0.11551197421096039, max_depth=18.0,
           max_features=0.94394155601070184, n_estimators=134,
           objective='multi:softprob', silent=True,
           subsample=0.64703405479748977)

In [33]:
print(np.mean(np.argmax(model1.predict_proba(teX), axis=1) == teY))

0.922466137319


In [38]:
#hyperparams=model1.get_xgb_params()
hyperparams = dict(models_to_use.iloc[0])
hyperparams

{'best_ntrees': 134.0,
 'best_score': 0.059784999999999998,
 'bst:colsample_bytree': 0.94394155601070184,
 'bst:eta': 0.11551197421096039,
 'bst:max_depth': 18.0,
 'bst:subsample': 0.64703405479748977,
 'early_stopping': 100.0,
 'n_round': 800.0,
 'ntrees': 234.0,
 'score': 0.061652999999999999}

In [41]:
hyperparams=model1.get_xgb_params()
n_round = int(model1.get_params()['n_estimators'])
n_round
hyperparams


{'bst:colsample_bytree': 0.94394155601070184,
 'bst:subsample': 0.64703405479748977,
 'eta': 0.11551197421096039,
 'max_depth': 18.0,
 'objective': 'multi:softprob',
 'silent': 1}

In [42]:
model1.get_params()

{'learning_rate': 0.11551197421096039,
 'max_depth': 18.0,
 'max_features': 0.94394155601070184,
 'n_estimators': 134,
 'objective': 'multi:softprob',
 'silent': True,
 'subsample': 0.64703405479748977}

In [35]:
#hyperparams['n_round'] = int(hyperparams['best_ntrees'])
#hyperparams['bst:max_depth'] = int(hyperparams['bst:max_depth'])

dtrain = xgb.DMatrix(trX, label = trY)
dtest = xgb.DMatrix(teX, label = teY)

n_round = int(model1.get_params()['n_estimators'])
plst = [
        ('bst:max_depth', hyperparams['max_depth']),
        ('objective', 'multi:softprob'),
        ('silent', 1),
        ('bst:eta', model1.get_params()['learning_rate']),
        ('bst:subsample', hyperparams['bst:subsample']),
        ('bst:colsample_bytree', hyperparams['bst:colsample_bytree']),
        ('num_class', 4)
]




In [36]:
evallist  = [(dtest,'test')]
mdl = xgb.train(plst, dtrain, 
                num_boost_round = n_round, 
                evals = evallist)

[0]	test-merror:0.160206
[1]	test-merror:0.125642
[2]	test-merror:0.114900
[3]	test-merror:0.117702
[4]	test-merror:0.113498
[5]	test-merror:0.109762
[6]	test-merror:0.106492
[7]	test-merror:0.106492
[8]	test-merror:0.103690
[9]	test-merror:0.105558
[10]	test-merror:0.104157
[11]	test-merror:0.103690
[12]	test-merror:0.101355
[13]	test-merror:0.100420
[14]	test-merror:0.098552
[15]	test-merror:0.099019
[16]	test-merror:0.097151
[17]	test-merror:0.097151
[18]	test-merror:0.098085
[19]	test-merror:0.096217
[20]	test-merror:0.096684
[21]	test-merror:0.094348
[22]	test-merror:0.094348
[23]	test-merror:0.092480
[24]	test-merror:0.093881
[25]	test-merror:0.090612
[26]	test-merror:0.090145
[27]	test-merror:0.089211
[28]	test-merror:0.086875
[29]	test-merror:0.087342
[30]	test-merror:0.086408
[31]	test-merror:0.086408
[32]	test-merror:0.085474
[33]	test-merror:0.085474
[34]	test-merror:0.085474
[35]	test-merror:0.085474
[36]	test-merror:0.085474
[37]	test-merror:0.085474
[38]	test-merror:0.085

In [37]:
preds = mdl.predict(dtest)
print(np.mean(np.argmax(preds, axis=1) == teY))

0.921999065857
