In [1]:
import xgboost as xgb
import pandas as pd
import numpy as np
import pickle
import os.path
import scipy
import random

from sklearn import cross_validation
from sklearn.ensemble.base import BaseEnsemble
from sklearn.calibration import CalibratedClassifierCV
from sklearn.base import clone
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import StratifiedKFold
import math

from neuralnet import NeuralNet
from load_new import getData



class ModifiedXGBClassifier(xgb.XGBClassifier):
    def __init__(self, max_depth=20, learning_rate=0.1, n_estimators=300, 
                 silent=True, objective='multi:softprob', max_features=0.3, subsample = 0.5):
        self.max_depth = max_depth
        self.learning_rate = learning_rate
        self.silent = silent
        self.n_estimators = n_estimators
        self.objective = objective
        self.max_features = max_features
        self.subsample = subsample
        self._Booster = xgb.Booster()
        
    def get_params(self, deep=True):
        return {'max_depth': self.max_depth,
                'learning_rate': self.learning_rate,
                'n_estimators': self.n_estimators,
                'silent': self.silent,
                'objective': self.objective,
                'max_features' : self.max_features,
                'subsample' : self.subsample,
                'num_class':4
                }
        
    def get_xgb_params(self):
        return {'eta': self.learning_rate,
                'max_depth': self.max_depth,
                'silent': 1 if self.silent else 0,
                'objective': self.objective,
                'bst:subsample': self.subsample,
                'bst:colsample_bytree': self.max_features,
                'num_class':4
                }
    
class BlendedModel(BaseEnsemble):
    def __init__(self, models=[], blending='average',nbFeatures=4):
        self.models = models
        self.blending = blending
        self.logR = LogisticRegression(C=10)#,multi_class='multinomial',solver='lbfgs', max_iter=10000)
        self.logRT= LogisticRegression(C=10)#,multi_class='multinomial',solver='lbfgs', max_iter=10000)
        self.nn=NeuralNet(nbFeatures) 
        self.XGB=ModifiedXGBClassifier()
        if self.blending not in ['average', 'most_confident']:
            raise Exception('Wrong blending method')
        
    def fit(self, X, y):
        for model in self.models:
            print 'Training model :'
            print model.get_params()
            model.fit(X, y)                
        return self
    
    def fitLog(self,X,y,mod=0):
        if mod==0:
            preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
            features=np.array([np.array([preds[j][i] for j in range(len(self.models))]).flatten() for i in range(len(X))])
            self.logR.fit(features, y)
        elif mod==1:
            preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
            features=np.array([np.array([[math.log(preds[j][i][k]/(1-preds[j][i][k])) for k in range(4)] for j in range(len(self.models))]).flatten() for i in range(len(X))])
            self.logRT.fit(features, y)
        return self
    
    def fitXGB(self,X,y):
        preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
        features=np.array([np.array([[math.log(preds[j][i][k]/(1-preds[j][i][k])) for k in range(4)] for j in range(len(self.models))]).flatten() for i in range(len(X))])
        #features= np.append(features, X, axis=1)
        self.XGB.fit(features,y)
        return self
    
    def predict_XGBproba(self,X):
        preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
        features=np.array([np.array([[math.log(preds[j][i][k]/(1-preds[j][i][k])) for k in range(4)] for j in range(len(self.models))]).flatten() for i in range(len(X))])
        #features= np.append(features, X, axis=1)
        return self.XGB.predict_proba(features)
    
    def fitNN(self,X,y,lambda1=0.00000001,lambda2=0.00005,new=0,teX=[],teY=[],lr=0.001):
        
        preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
        features=np.array([np.array([preds[j][i] for j in range(len(self.models))]).flatten() for i in range(len(X))]) 
        features= np.append(features, X, axis=1)
        
        if len(teX)>0:
            preds = np.array(
                        [model.predict_proba(teX) for model in self.models]
                    )
            featuresteX=np.array([np.array([preds[j][i] for j in range(len(self.models))]).flatten() for i in range(len(teX))])
            featuresteX= np.append(featuresteX, teX, axis=1)
        else:
            featuresteX=[]
            
        self.nn.fit(features,y,lambda1,lambda2,new,featuresteX,teY,lr)
        return self
    
    def predict_NNproba(self,X):
        preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
        features=np.array([np.array([preds[j][i] for j in range(len(self.models))]).flatten() for i in range(len(X))])
        features= np.append(features, X, axis=1)
        return self.nn.predict_proba(features)
    
    def predict_proba(self, X):
        preds = np.array(
                    [model.predict_proba(X) for model in self.models]
                )
        if self.blending == 'average':
            return np.mean(preds , axis=0 )
        elif self.blending == 'most_confident':
            def dirac_weights(entropies):
                w = (entropies == np.min(entropies)).astype(float)
                return w/np.sum(w)
            def shannon_entropy(l):
                l = [min(max(1e-5,p),1-1e-5) for p in l]
                l = np.array(l)/sum(l)
                return sum([-p*math.log(p) for p in l])
            shannon_entropy_array = lambda l : np.apply_along_axis(shannon_entropy, 1, l)
            entropies = np.array([shannon_entropy_array(pred) for pred in preds])
            weights = np.apply_along_axis(dirac_weights, 0, entropies)
            return np.sum(np.multiply(weights.T, preds.T).T, axis = 0)
    
    def predict_Logproba(self, X,mod=0):
        if mod==0:
            preds = np.array(
                        [model.predict_proba(X) for model in self.models]
                    )
            features=np.array([np.array([preds[j][i] for j in range(len(self.models))]).flatten() for i in range(len(X))])
            preds=self.logR.predict_proba(features)
            return preds
        elif mod==1:
            preds = np.array(
                    [model.predict_proba(X) for model in self.models]
                )
            features=np.array([np.array([[math.log(preds[j][i][k]/(1-preds[j][i][k])) for k in range(4)] for j in range(len(self.models))]).flatten() for i in range(len(X))])
            preds=self.logRT.predict_proba(features)
            return preds



Couldn't import dot_parser, loading of dot files will not be possible.


In [2]:
if __name__ == '__main__': 
    results = pd.io.pickle.read_pickle('results/results_df_last.pkl')
    models_to_use = results[(results['best_score']<0.07) | ((results['best_score']<0.09)&(results['bst:eta']>0.3))]
    models_to_use=models_to_use.sort(['best_score'])[:4]
    models = []
    for j,row in enumerate(models_to_use.iterrows()):
            print j
            hyperparams = dict(row[1])
            models.append(ModifiedXGBClassifier(
                                    max_depth=hyperparams['bst:max_depth'], 
                                    learning_rate=hyperparams['bst:eta'], 
                                    n_estimators=int(hyperparams['best_ntrees']),
                                    max_features=hyperparams['bst:colsample_bytree'],
                                    subsample=hyperparams['bst:subsample'],
                                    silent=True, 
                                    objective='multi:softprob')
                          )

0
1
2
3


In [3]:
nb_fold=3
dtrX,dtrY,dteX,dteY = getData(prop=0,oh=0)
skf = StratifiedKFold(dtrY, nb_fold)

2224
3106
536
759
450


  temp[:,j]=np.fft.fft(temp[:,j])


In [4]:
if __name__ == '__main__':  
    
    rNNTe=np.zeros(nb_fold)
    rNNVa=np.zeros(nb_fold)

        
    for j,(train, test) in enumerate(skf):
        if j==0:
        #print j,train, test
            vaX=dtrX[test,][:(len(test)/2),]
            teX=dtrX[test,][(len(test)/2):,]
            trX=dtrX[train,]
            vaY=dtrY[test,][:(len(test)/2),]
            teY=dtrY[test,][(len(test)/2):,]
            trY=dtrY[train,]


            bmdl = BlendedModel(models,nbFeatures=(len(models)*4+len(trX[0])))
            bmdl.fit(trX,trY)
            
            #for lambda1,lambda2 in zip([
            print("blend nn")
            bmdl.fitNN(vaX,vaY,lambda1=0.0000005,lambda2=0.00001,teX=teX,teY=teY)

            predictTe=bmdl.predict_NNproba(teX)
            scoreTe=np.mean(np.argmax(predictTe, axis=1) == teY) 
            print("score blend nn")
            print(scoreTe)
            rNNTe[j]=scoreTe

            predictVa=bmdl.predict_NNproba(vaX)
            scoreVa=np.mean(np.argmax(predictVa, axis=1) == vaY) 
            print("score blend nn val set")
            print(scoreVa)
            rNNVa[j]=scoreVa

    print "rNNTe"
    print rNNTe
    print "rNNVa"
    print rNNVa

Training model :
{'n_estimators': 145, 'max_features': 0.87143152648660338, 'num_class': 4, 'silent': True, 'subsample': 0.5677004279168405, 'objective': 'multi:softprob', 'learning_rate': 0.075351999024363861, 'max_depth': 17.0}
Training model :
{'n_estimators': 77, 'max_features': 0.81208414906970527, 'num_class': 4, 'silent': True, 'subsample': 0.53454467116594329, 'objective': 'multi:softprob', 'learning_rate': 0.19966463026760631, 'max_depth': 28.0}
Training model :
{'n_estimators': 79, 'max_features': 0.97500489683440184, 'num_class': 4, 'silent': True, 'subsample': 0.55386963110903675, 'objective': 'multi:softprob', 'learning_rate': 0.13222069369172015, 'max_depth': 22.0}
Training model :
{'n_estimators': 111, 'max_features': 0.82207855775474548, 'num_class': 4, 'silent': True, 'subsample': 0.74990595153698836, 'objective': 'multi:softprob', 'learning_rate': 0.087168586238034751, 'max_depth': 26.0}
blend nn
200 5 150 0.2 0.4
1071
10 5 0.463118580766 1.38628845143
1.38701456778 0

In [5]:
print("score models")
for m,model in enumerate(models):
    print(np.mean(np.argmax(model.predict_proba(teX), axis=1) == teY))

score models
0.945895522388
0.946828358209
0.944962686567
0.943097014925


In [6]:
predictTe=bmdl.predict_proba(teX)
scoreTe=np.mean(np.argmax(predictTe, axis=1) == teY) 
print("score average of models")
print(scoreTe)

print("blend logistic regression")
bmdl.fitLog(vaX,vaY)

predictTe=bmdl.predict_Logproba(teX)
scoreTe=np.mean(np.argmax(predictTe, axis=1) == teY) 
print("score blend logistic regression")
print(scoreTe)


predictVa=bmdl.predict_Logproba(vaX,mod=0)
scoreVa=np.mean(np.argmax(predictVa, axis=1) == vaY) 
print("score blend logistic regression val test")
print(scoreVa)
       
           
print("blend logistic regression with log(x/(1-x)")
bmdl.fitLog(vaX,vaY,mod=1)
predictTe=bmdl.predict_Logproba(teX,mod=1)
scoreTe=np.mean(np.argmax(predictTe, axis=1) == teY) 
print("score blend logistic regression with log(x/(1-x)")
print(scoreTe)


predictVa=bmdl.predict_Logproba(vaX,mod=1)
scoreVa=np.mean(np.argmax(predictVa, axis=1) == vaY) 
print("score blend logistic regression with log(x/(1-x) val test")
print(scoreVa)


score average of models
0.948694029851
blend logistic regression
score blend logistic regression
0.945895522388
score blend logistic regression val test
0.936507936508
blend logistic regression with log(x/(1-x)
score blend logistic regression with log(x/(1-x)
0.944029850746
score blend logistic regression with log(x/(1-x) val test
0.940242763772


In [10]:
if __name__ == '__main__':  
          
    print("blend nn")
    bmdl.fitNN(vaX,vaY,lambda1=0.000000,lambda2=0.0000,teX=teX,teY=teY)

    predictTe=bmdl.predict_NNproba(teX)
    scoreTe=np.mean(np.argmax(predictTe, axis=1) == teY) 
    print("score blend nn")
    print(scoreTe)
    rNNTe[j]=scoreTe

    predictVa=bmdl.predict_NNproba(vaX)
    scoreVa=np.mean(np.argmax(predictVa, axis=1) == vaY) 
    print("score blend nn val set")
    print(scoreVa)
    rNNVa[j]=scoreVa

    print "rNNTe"
    print rNNTe
    print "rNNVa"
    print rNNVa

blend nn
200 5 150 0.2 0.4
1071
10 5 0.988795518207 0.0247885748217
0.012380550439 0.0 0.0 11110.7001978 2409.5518776
scoreTe 0.939365671642
20 5 0.986928104575 0.0295253385682
0.0127036667653 0.0 0.0 11329.167318 2509.24930597
scoreTe 0.9375
30 5 0.990662931839 0.0198829996661
0.00408293510102 0.0 0.0 11508.3268008 2599.15338832
scoreTe 0.940298507463
40 5 0.992530345472 0.0175836409914
0.000170022627524 0.0 0.0 11730.0218639 2702.25989392
scoreTe 0.9375
50 5 0.987861811391 0.0221001606801
0.00186877830533 0.0 0.0 11925.0541316 2800.21353591
scoreTe 0.944029850746
60 5 0.988795518207 0.0263464115172
0.00801669951797 0.0 0.0 12100.1706556 2892.31205918
scoreTe 0.942164179104
70 5 0.992530345472 0.0229496714865
0.0295303606575 0.0 0.0 12292.1676453 2995.78294325
scoreTe 0.944029850746
80 5 0.989729225023 0.0220802476201
0.0012865615144 0.0 0.0 12468.138041 3089.88324609
scoreTe 0.945895522388
90 5 0.993464052288 0.0192367216913
0.0349641011283 0.0 0.0 12617.4636858 3182.80255842
scoreTe

In [9]:
j

2