In [144]:
import pandas as pd
import numpy as np
import xgboost as xgb
from random import randint,uniform
from sklearn.model_selection import train_test_split
from time import time
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

In [134]:
popSize = 50
eliteSize = 0.1
mutation_rate = 0.2
nGeneration = 100

In [135]:
paramList = ['depth','max_bin','eta','gamma','min_child_weight','colsample_bytree','subsample',\
            'fitness']
population = pd.DataFrame(np.zeros(shape=(popSize,len(paramList))),columns=paramList)
population.depth = [randint(6,15) for p in range(0,popSize)]
population.max_bin = [randint(80,150) for p in range(0,popSize)]
population.eta = [uniform(0.1,1) for p in range(0,popSize)]
population.gamma = [uniform(0.01,0.03) for p in range(0,popSize)]
population.min_child_weight = [randint(1,20) for p in range(0,popSize)]
population.colsample_bytree = [uniform(0.7,0.95) for p in range(0,popSize)]
population.subsample = [uniform(0.7,0.95) for p in range(0,popSize)]
population.fitness = [uniform(0,0) for p in range(0,popSize)]

In [136]:
population.head()

Unnamed: 0,depth,max_bin,eta,gamma,min_child_weight,colsample_bytree,subsample,fitness
0,15,90,0.234493,0.020332,10,0.806451,0.787508,0.0
1,11,148,0.721107,0.014304,13,0.701083,0.837215,0.0
2,15,145,0.834003,0.017085,15,0.828568,0.898657,0.0
3,14,129,0.600723,0.022222,3,0.862359,0.816103,0.0
4,13,101,0.875595,0.029728,3,0.913291,0.768916,0.0


In [137]:
population.shape

(50, 8)

In [138]:
def createNewPopulation(population,eliteSize=eliteSize,mutation_rate=mutation_rate,popSize=popSize):
    population.sort_values(['fitness'],ascending=False,inplace=True)
    population.reset_index(drop=True,inplace=True)
    popSize = population.shape[0]
    nElite = int(round(eliteSize*popSize))
    
    new_population = population.copy(deep=True)
    for i in range(nElite,popSize):
        # create father and mother
        p1 = randint(0,int(popSize/2))
        p2 = randint(0,int(popSize/2))
        
        for attr in list(new_population.columns.values):
            if(uniform(0,1)>0.5):
                new_population.loc[i,attr] = population.loc[p1,attr]
            else:
                new_population.loc[i,attr] = population.loc[p2,attr]
                
            if(uniform(0,1)<mutation_rate):
                attr = list(new_population.columns.values)[randint(0,new_population.shape[1]-2)]
                if(attr=='depth'):
                    new_population.loc[i,attr] = max(3,new_population.loc[i,attr]+randint(-2,2))
                elif(attr=='max_bin'):
                    new_population.loc[i,attr] = max(70,new_population.loc[i,attr]+randint(-20,20))
                elif(attr=='eta'):
                    new_population.loc[i,attr] = min(max(0.1,new_population.loc[i,attr]+uniform(-0.05,0.05)),1)
                elif(attr=='gamma'):
                    new_population.loc[i,attr] = max(0.1,new_population.loc[i,attr]+uniform(-0.005,0.005))
                elif(attr=='min_child_weight'):
                    new_population.loc[i,attr] = max(0,new_population.loc[i,attr]+randint(-2,2))
                elif(attr=='colsample_bytree'):
                    new_population.loc[i,attr] = min(max(0.6,new_population.loc[i,attr]+uniform(-0.05,0.05)),1)
                elif(attr=='subsample'):
                    new_population.loc[i,attr] = min(max(0.6,new_population.loc[i,attr]+uniform(-0.05,0.05)),1)
    return new_population

In [139]:
def testInstance(population,i,dtrain):
    params = {
        'objective':'binary:logitraw',
        'tree_method':'hist',
        'eta':population.eta[i],
        'max_depth':population.depth[i],
        'subsample':population.subsample[i],
        'colsample_bytree':population.colsample_bytree[i],
        'max_bin':population.max_bin[i],
        'gamma':population.gamma[i],
        'min_child_weight':population.min_child_weight[i],
        'silent':1,
        'seed':401,
    }
    history = xgb.cv(
        params,
        dtrain,
        num_boost_round = 30,
        nfold=3,
        verbose_eval = False,
        metrics = 'auc'
        )
    return history['test-auc-mean'].iloc[-1]

In [140]:
def printResult(filename,population,i,generation):
    f1 = open(filename,'a')
    f1.write('Generation %d Best fitness %f\n' % (generation,population.fitness[i]))
    f1.write('"eta":%f\n' % population.eta[i])
    f1.write('"max_depth":%f\n' %population.depth[i])
    f1.write('"subsample":%f\n' %population.subsample[i])
    f1.write('"colsample_bytree":%f\n' %population.colsample_bytree[i])
    f1.write('"min_child_weight":%f\n' %population.min_child_weight[i])
    f1.write('"max_bin":%f\n' %population.max_bin[i])
    f1.close()

In [141]:
def evolve(dtrain,nGeneration=nGeneration,popSize=popSize,eliteSize=eliteSize,population=population):
    for gen in range(nGeneration):
        print('Generation %d\n' %gen)
        population = createNewPopulation(population)
        nElite = int(round(eliteSize*popSize))
        for i in range(nElite,popSize):
            print('testing instance %d' %i)
            population.loc[i,'fitness'] = testInstance(population,i,dtrain)
            print('--Fitness %f \n' %population.fitness[i])
        population.sort_values(['fitness'],ascending=False,inplace=True)
        population.reset_index(drop=True,inplace=True)
        printResult('xgb_result.txt',population,0,gen)
        print('Generation %d Best fitness (5-fold mae cv): %f' %(gen,population.fitness[0]))

In [94]:
data = pd.read_csv('titanic_data.csv')

In [95]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [96]:
for col in data.columns.values:
    print(col,sum(data[col].isnull()))

PassengerId 0
Survived 0
Pclass 0
Name 0
Sex 0
Age 177
SibSp 0
Parch 0
Ticket 0
Fare 0
Cabin 687
Embarked 2


In [97]:
np.mean(data.Age)

29.69911764705882

In [98]:
data.Age.fillna(np.mean(data.Age),inplace=True)

In [99]:
data.Cabin.fillna(0,inplace=True)

In [100]:
data.Embarked.fillna(0,inplace=True)

In [101]:
for col in data.columns.values:
    print(col,sum(data[col].isnull()))

PassengerId 0
Survived 0
Pclass 0
Name 0
Sex 0
Age 0
SibSp 0
Parch 0
Ticket 0
Fare 0
Cabin 0
Embarked 0


In [102]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,0,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,0,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,0,S


In [104]:
def zero_to_string(data):
    if data == 0:
        return '0'
    else:
        return data

In [105]:
sex_encoder = LabelEncoder()
newSex = sex_encoder.fit_transform(data.Sex)
cabin_encoder = LabelEncoder()
newCabin = cabin_encoder.fit_transform(data.Cabin.apply(zero_to_string))
embarked_encoder = LabelEncoder()
newEmbarked = embarked_encoder.fit_transform(data.Embarked.apply(zero_to_string))

In [106]:
sex = pd.Series(newSex,name='Sex')
cabin = pd.Series(newCabin,name='Cabin')
embarked = pd.Series(newEmbarked,name='Embarked')

In [107]:
full_data = pd.concat([data.drop(['PassengerId','Name','Sex','Ticket','Cabin','Embarked'],axis=1),\
                      sex,cabin,embarked],axis=1)

In [108]:
full_data.head()

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Sex,Cabin,Embarked
0,0,3,22.0,1,0,7.25,1,0,3
1,1,1,38.0,1,0,71.2833,0,82,1
2,1,3,26.0,0,0,7.925,0,0,3
3,1,1,35.0,1,0,53.1,0,56,3
4,0,3,35.0,0,0,8.05,1,0,3


In [109]:
xtrain,xtest,ytrain,ytest = train_test_split(full_data.drop('Survived',axis=1),full_data.Survived,test_size=0.1,random_state=401)

In [148]:
dtrain = xgb.DMatrix(xtrain,ytrain)
dtest = xgb.DMatrix(xtest,ytest)

In [142]:
evolve(dtrain)

Generation 0

testing instance 5
--Fitness 0.845142 

testing instance 6
--Fitness 0.858937 

testing instance 7
--Fitness 0.840686 

testing instance 8
--Fitness 0.844962 

testing instance 9
--Fitness 0.845631 

testing instance 10
--Fitness 0.855334 

testing instance 11
--Fitness 0.839391 

testing instance 12
--Fitness 0.855199 

testing instance 13
--Fitness 0.847989 

testing instance 14
--Fitness 0.847010 

testing instance 15
--Fitness 0.842017 

testing instance 16
--Fitness 0.863658 

testing instance 17
--Fitness 0.854683 

testing instance 18
--Fitness 0.855369 

testing instance 19
--Fitness 0.846708 

testing instance 20
--Fitness 0.852344 

testing instance 21
--Fitness 0.863649 

testing instance 22
--Fitness 0.856990 

testing instance 23
--Fitness 0.854650 

testing instance 24
--Fitness 0.842955 

testing instance 25
--Fitness 0.847169 

testing instance 26
--Fitness 0.844894 

testing instance 27
--Fitness 0.867087 

testing instance 28
--Fitness 0.853688 

testing


testing instance 21
--Fitness 0.863970 

testing instance 22
--Fitness 0.860035 

testing instance 23
--Fitness 0.854575 

testing instance 24
--Fitness 0.867087 

testing instance 25
--Fitness 0.863190 

testing instance 26
--Fitness 0.865032 

testing instance 27
--Fitness 0.863049 

testing instance 28
--Fitness 0.860480 

testing instance 29
--Fitness 0.865747 

testing instance 30
--Fitness 0.862853 

testing instance 31
--Fitness 0.851196 

testing instance 32
--Fitness 0.863928 

testing instance 33
--Fitness 0.862143 

testing instance 34
--Fitness 0.870082 

testing instance 35
--Fitness 0.851222 

testing instance 36
--Fitness 0.858751 

testing instance 37
--Fitness 0.861509 

testing instance 38
--Fitness 0.857017 

testing instance 39
--Fitness 0.856353 

testing instance 40
--Fitness 0.852727 

testing instance 41
--Fitness 0.860115 

testing instance 42
--Fitness 0.863347 

testing instance 43
--Fitness 0.856584 

testing instance 44
--Fitness 0.859048 

testing instanc

--Fitness 0.859287 

testing instance 40
--Fitness 0.868327 

testing instance 41
--Fitness 0.861548 

testing instance 42
--Fitness 0.870710 

testing instance 43
--Fitness 0.861894 

testing instance 44
--Fitness 0.869268 

testing instance 45
--Fitness 0.862763 

testing instance 46
--Fitness 0.871251 

testing instance 47
--Fitness 0.861609 

testing instance 48
--Fitness 0.864767 

testing instance 49
--Fitness 0.859264 

Generation 8 Best fitness (5-fold mae cv): 0.871251
Generation 9

testing instance 5
--Fitness 0.862881 

testing instance 6
--Fitness 0.864478 

testing instance 7
--Fitness 0.869209 

testing instance 8
--Fitness 0.868695 

testing instance 9
--Fitness 0.862176 

testing instance 10
--Fitness 0.868754 

testing instance 11
--Fitness 0.866221 

testing instance 12
--Fitness 0.863761 

testing instance 13
--Fitness 0.867022 

testing instance 14
--Fitness 0.857864 

testing instance 15
--Fitness 0.860533 

testing instance 16
--Fitness 0.867582 

testing instance

--Fitness 0.863191 

testing instance 8
--Fitness 0.871251 

testing instance 9
--Fitness 0.867674 

testing instance 10
--Fitness 0.862579 

testing instance 11
--Fitness 0.861656 

testing instance 12
--Fitness 0.866100 

testing instance 13
--Fitness 0.862280 

testing instance 14
--Fitness 0.870710 

testing instance 15
--Fitness 0.864926 

testing instance 16
--Fitness 0.863694 

testing instance 17
--Fitness 0.868580 

testing instance 18
--Fitness 0.863642 

testing instance 19
--Fitness 0.867632 

testing instance 20
--Fitness 0.869209 

testing instance 21
--Fitness 0.869209 

testing instance 22
--Fitness 0.866178 

testing instance 23
--Fitness 0.870044 

testing instance 24
--Fitness 0.864529 

testing instance 25
--Fitness 0.865835 

testing instance 26
--Fitness 0.862793 

testing instance 27
--Fitness 0.867793 

testing instance 28
--Fitness 0.866100 

testing instance 29
--Fitness 0.870710 

testing instance 30
--Fitness 0.866115 

testing instance 31
--Fitness 0.867674

--Fitness 0.868168 

testing instance 23
--Fitness 0.871251 

testing instance 24
--Fitness 0.862026 

testing instance 25
--Fitness 0.861279 

testing instance 26
--Fitness 0.867674 

testing instance 27
--Fitness 0.871126 

testing instance 28
--Fitness 0.868568 

testing instance 29
--Fitness 0.869334 

testing instance 30
--Fitness 0.868138 

testing instance 31
--Fitness 0.864917 

testing instance 32
--Fitness 0.862642 

testing instance 33
--Fitness 0.865167 

testing instance 34
--Fitness 0.864590 

testing instance 35
--Fitness 0.863191 

testing instance 36
--Fitness 0.871039 

testing instance 37
--Fitness 0.867757 

testing instance 38
--Fitness 0.861947 

testing instance 39
--Fitness 0.862753 

testing instance 40
--Fitness 0.871251 

testing instance 41
--Fitness 0.868343 

testing instance 42
--Fitness 0.871126 

testing instance 43
--Fitness 0.861610 

testing instance 44
--Fitness 0.871251 

testing instance 45
--Fitness 0.867574 

testing instance 46
--Fitness 0.8712

--Fitness 0.863181 

testing instance 38
--Fitness 0.871126 

testing instance 39
--Fitness 0.861508 

testing instance 40
--Fitness 0.866690 

testing instance 41
--Fitness 0.871251 

testing instance 42
--Fitness 0.871251 

testing instance 43
--Fitness 0.867674 

testing instance 44
--Fitness 0.860022 

testing instance 45
--Fitness 0.864503 

testing instance 46
--Fitness 0.871126 

testing instance 47
--Fitness 0.869830 

testing instance 48
--Fitness 0.865140 

testing instance 49
--Fitness 0.871251 

Generation 21 Best fitness (5-fold mae cv): 0.873108
Generation 22

testing instance 5
--Fitness 0.871126 

testing instance 6
--Fitness 0.871126 

testing instance 7
--Fitness 0.871251 

testing instance 8
--Fitness 0.868140 

testing instance 9
--Fitness 0.864505 

testing instance 10
--Fitness 0.865167 

testing instance 11
--Fitness 0.873108 

testing instance 12
--Fitness 0.873088 

testing instance 13
--Fitness 0.867319 

testing instance 14
--Fitness 0.873108 

testing instan

testing instance 5
--Fitness 0.871126 

testing instance 6
--Fitness 0.866675 

testing instance 7
--Fitness 0.862673 

testing instance 8
--Fitness 0.873108 

testing instance 9
--Fitness 0.873108 

testing instance 10
--Fitness 0.871251 

testing instance 11
--Fitness 0.866928 

testing instance 12
--Fitness 0.873108 

testing instance 13
--Fitness 0.869057 

testing instance 14
--Fitness 0.873108 

testing instance 15
--Fitness 0.865251 

testing instance 16
--Fitness 0.873108 

testing instance 17
--Fitness 0.871126 

testing instance 18
--Fitness 0.871126 

testing instance 19
--Fitness 0.865047 

testing instance 20
--Fitness 0.871039 

testing instance 21
--Fitness 0.871251 

testing instance 22
--Fitness 0.867016 

testing instance 23
--Fitness 0.873088 

testing instance 24
--Fitness 0.866293 

testing instance 25
--Fitness 0.871126 

testing instance 26
--Fitness 0.863122 

testing instance 27
--Fitness 0.867567 

testing instance 28
--Fitness 0.873108 

testing instance 29
-

--Fitness 0.873108 

testing instance 22
--Fitness 0.873108 

testing instance 23
--Fitness 0.865818 

testing instance 24
--Fitness 0.873108 

testing instance 25
--Fitness 0.873108 

testing instance 26
--Fitness 0.865389 

testing instance 27
--Fitness 0.861628 

testing instance 28
--Fitness 0.861508 

testing instance 29
--Fitness 0.863788 

testing instance 30
--Fitness 0.873108 

testing instance 31
--Fitness 0.873108 

testing instance 32
--Fitness 0.873108 

testing instance 33
--Fitness 0.873108 

testing instance 34
--Fitness 0.866379 

testing instance 35
--Fitness 0.857330 

testing instance 36
--Fitness 0.873108 

testing instance 37
--Fitness 0.873108 

testing instance 38
--Fitness 0.861355 

testing instance 39
--Fitness 0.861508 

testing instance 40
--Fitness 0.873108 

testing instance 41
--Fitness 0.873108 

testing instance 42
--Fitness 0.873108 

testing instance 43
--Fitness 0.865107 

testing instance 44
--Fitness 0.873108 

testing instance 45
--Fitness 0.8731

--Fitness 0.873108 

testing instance 39
--Fitness 0.867199 

testing instance 40
--Fitness 0.861462 

testing instance 41
--Fitness 0.862928 

testing instance 42
--Fitness 0.873108 

testing instance 43
--Fitness 0.859434 

testing instance 44
--Fitness 0.873108 

testing instance 45
--Fitness 0.863862 

testing instance 46
--Fitness 0.865702 

testing instance 47
--Fitness 0.870549 

testing instance 48
--Fitness 0.873108 

testing instance 49
--Fitness 0.865831 

Generation 34 Best fitness (5-fold mae cv): 0.873108
Generation 35

testing instance 5
--Fitness 0.873108 

testing instance 6
--Fitness 0.861159 

testing instance 7
--Fitness 0.861159 

testing instance 8
--Fitness 0.873108 

testing instance 9
--Fitness 0.859204 

testing instance 10
--Fitness 0.861488 

testing instance 11
--Fitness 0.863463 

testing instance 12
--Fitness 0.864877 

testing instance 13
--Fitness 0.864263 

testing instance 14
--Fitness 0.861133 

testing instance 15
--Fitness 0.873108 

testing instan

--Fitness 0.873108 

testing instance 10
--Fitness 0.873108 

testing instance 11
--Fitness 0.873108 

testing instance 12
--Fitness 0.873108 

testing instance 13
--Fitness 0.873108 

testing instance 14
--Fitness 0.864587 

testing instance 15
--Fitness 0.870773 

testing instance 16
--Fitness 0.873108 

testing instance 17
--Fitness 0.872845 

testing instance 18
--Fitness 0.873108 

testing instance 19
--Fitness 0.873108 

testing instance 20
--Fitness 0.864641 

testing instance 21
--Fitness 0.858194 

testing instance 22
--Fitness 0.873108 

testing instance 23
--Fitness 0.873108 

testing instance 24
--Fitness 0.873108 

testing instance 25
--Fitness 0.857318 

testing instance 26
--Fitness 0.873108 

testing instance 27
--Fitness 0.861023 

testing instance 28
--Fitness 0.873108 

testing instance 29
--Fitness 0.873108 

testing instance 30
--Fitness 0.873108 

testing instance 31
--Fitness 0.856553 

testing instance 32
--Fitness 0.865195 

testing instance 33
--Fitness 0.8731

--Fitness 0.873108 

testing instance 26
--Fitness 0.873108 

testing instance 27
--Fitness 0.873108 

testing instance 28
--Fitness 0.859290 

testing instance 29
--Fitness 0.862088 

testing instance 30
--Fitness 0.873108 

testing instance 31
--Fitness 0.873108 

testing instance 32
--Fitness 0.864937 

testing instance 33
--Fitness 0.865180 

testing instance 34
--Fitness 0.873108 

testing instance 35
--Fitness 0.873108 

testing instance 36
--Fitness 0.873108 

testing instance 37
--Fitness 0.873108 

testing instance 38
--Fitness 0.873108 

testing instance 39
--Fitness 0.873108 

testing instance 40
--Fitness 0.867606 

testing instance 41
--Fitness 0.869127 

testing instance 42
--Fitness 0.873108 

testing instance 43
--Fitness 0.873108 

testing instance 44
--Fitness 0.865231 

testing instance 45
--Fitness 0.862207 

testing instance 46
--Fitness 0.873108 

testing instance 47
--Fitness 0.873108 

testing instance 48
--Fitness 0.853904 

testing instance 49
--Fitness 0.8620

--Fitness 0.862106 

testing instance 44
--Fitness 0.868703 

testing instance 45
--Fitness 0.873108 

testing instance 46
--Fitness 0.861488 

testing instance 47
--Fitness 0.873108 

testing instance 48
--Fitness 0.873108 

testing instance 49
--Fitness 0.873108 

Generation 47 Best fitness (5-fold mae cv): 0.873108
Generation 48

testing instance 5
--Fitness 0.873108 

testing instance 6
--Fitness 0.873108 

testing instance 7
--Fitness 0.873108 

testing instance 8
--Fitness 0.867055 

testing instance 9
--Fitness 0.873108 

testing instance 10
--Fitness 0.873108 

testing instance 11
--Fitness 0.873108 

testing instance 12
--Fitness 0.865665 

testing instance 13
--Fitness 0.865167 

testing instance 14
--Fitness 0.873108 

testing instance 15
--Fitness 0.862240 

testing instance 16
--Fitness 0.872845 

testing instance 17
--Fitness 0.873108 

testing instance 18
--Fitness 0.867567 

testing instance 19
--Fitness 0.873108 

testing instance 20
--Fitness 0.867523 

testing instan

--Fitness 0.873108 

testing instance 12
--Fitness 0.861598 

testing instance 13
--Fitness 0.865474 

testing instance 14
--Fitness 0.873108 

testing instance 15
--Fitness 0.873108 

testing instance 16
--Fitness 0.873108 

testing instance 17
--Fitness 0.873108 

testing instance 18
--Fitness 0.866820 

testing instance 19
--Fitness 0.863731 

testing instance 20
--Fitness 0.866553 

testing instance 21
--Fitness 0.863463 

testing instance 22
--Fitness 0.867199 

testing instance 23
--Fitness 0.855108 

testing instance 24
--Fitness 0.873108 

testing instance 25
--Fitness 0.873108 

testing instance 26
--Fitness 0.864263 

testing instance 27
--Fitness 0.865905 

testing instance 28
--Fitness 0.866038 

testing instance 29
--Fitness 0.873108 

testing instance 30
--Fitness 0.867082 

testing instance 31
--Fitness 0.873108 

testing instance 32
--Fitness 0.873108 

testing instance 33
--Fitness 0.873108 

testing instance 34
--Fitness 0.873108 

testing instance 35
--Fitness 0.8661

--Fitness 0.865231 

testing instance 26
--Fitness 0.873108 

testing instance 27
--Fitness 0.860907 

testing instance 28
--Fitness 0.873108 

testing instance 29
--Fitness 0.873108 

testing instance 30
--Fitness 0.873108 

testing instance 31
--Fitness 0.862997 

testing instance 32
--Fitness 0.873108 

testing instance 33
--Fitness 0.861181 

testing instance 34
--Fitness 0.870871 

testing instance 35
--Fitness 0.864263 

testing instance 36
--Fitness 0.873108 

testing instance 37
--Fitness 0.873108 

testing instance 38
--Fitness 0.867776 

testing instance 39
--Fitness 0.873108 

testing instance 40
--Fitness 0.865665 

testing instance 41
--Fitness 0.865276 

testing instance 42
--Fitness 0.873108 

testing instance 43
--Fitness 0.860579 

testing instance 44
--Fitness 0.873108 

testing instance 45
--Fitness 0.873108 

testing instance 46
--Fitness 0.867082 

testing instance 47
--Fitness 0.873108 

testing instance 48
--Fitness 0.873108 

testing instance 49
--Fitness 0.8658

--Fitness 0.869112 

testing instance 41
--Fitness 0.873108 

testing instance 42
--Fitness 0.867077 

testing instance 43
--Fitness 0.873108 

testing instance 44
--Fitness 0.873108 

testing instance 45
--Fitness 0.863928 

testing instance 46
--Fitness 0.873108 

testing instance 47
--Fitness 0.870871 

testing instance 48
--Fitness 0.873108 

testing instance 49
--Fitness 0.873108 

Generation 60 Best fitness (5-fold mae cv): 0.873108
Generation 61

testing instance 5
--Fitness 0.873108 

testing instance 6
--Fitness 0.873108 

testing instance 7
--Fitness 0.873108 

testing instance 8
--Fitness 0.865889 

testing instance 9
--Fitness 0.873010 

testing instance 10
--Fitness 0.865180 

testing instance 11
--Fitness 0.873108 

testing instance 12
--Fitness 0.863317 

testing instance 13
--Fitness 0.873108 

testing instance 14
--Fitness 0.867077 

testing instance 15
--Fitness 0.864273 

testing instance 16
--Fitness 0.863221 

testing instance 17
--Fitness 0.873108 

testing instan

--Fitness 0.869635 

testing instance 9
--Fitness 0.873108 

testing instance 10
--Fitness 0.873108 

testing instance 11
--Fitness 0.873108 

testing instance 12
--Fitness 0.867523 

testing instance 13
--Fitness 0.865138 

testing instance 14
--Fitness 0.863463 

testing instance 15
--Fitness 0.873108 

testing instance 16
--Fitness 0.873108 

testing instance 17
--Fitness 0.865185 

testing instance 18
--Fitness 0.873108 

testing instance 19
--Fitness 0.873108 

testing instance 20
--Fitness 0.862339 

testing instance 21
--Fitness 0.870871 

testing instance 22
--Fitness 0.869907 

testing instance 23
--Fitness 0.870871 

testing instance 24
--Fitness 0.873108 

testing instance 25
--Fitness 0.873108 

testing instance 26
--Fitness 0.873108 

testing instance 27
--Fitness 0.873108 

testing instance 28
--Fitness 0.865878 

testing instance 29
--Fitness 0.873108 

testing instance 30
--Fitness 0.867776 

testing instance 31
--Fitness 0.873108 

testing instance 32
--Fitness 0.87310

--Fitness 0.873108 

testing instance 26
--Fitness 0.867776 

testing instance 27
--Fitness 0.861429 

testing instance 28
--Fitness 0.873108 

testing instance 29
--Fitness 0.873108 

testing instance 30
--Fitness 0.866271 

testing instance 31
--Fitness 0.873108 

testing instance 32
--Fitness 0.873108 

testing instance 33
--Fitness 0.859224 

testing instance 34
--Fitness 0.862505 

testing instance 35
--Fitness 0.868168 

testing instance 36
--Fitness 0.868500 

testing instance 37
--Fitness 0.873108 

testing instance 38
--Fitness 0.873108 

testing instance 39
--Fitness 0.873108 

testing instance 40
--Fitness 0.873108 

testing instance 41
--Fitness 0.873108 

testing instance 42
--Fitness 0.870871 

testing instance 43
--Fitness 0.873108 

testing instance 44
--Fitness 0.861620 

testing instance 45
--Fitness 0.873108 

testing instance 46
--Fitness 0.862082 

testing instance 47
--Fitness 0.873108 

testing instance 48
--Fitness 0.861488 

testing instance 49
--Fitness 0.8731


testing instance 43
--Fitness 0.868168 

testing instance 44
--Fitness 0.861586 

testing instance 45
--Fitness 0.873108 

testing instance 46
--Fitness 0.873108 

testing instance 47
--Fitness 0.873108 

testing instance 48
--Fitness 0.873108 

testing instance 49
--Fitness 0.870549 

Generation 73 Best fitness (5-fold mae cv): 0.873108
Generation 74

testing instance 5
--Fitness 0.859774 

testing instance 6
--Fitness 0.873108 

testing instance 7
--Fitness 0.873108 

testing instance 8
--Fitness 0.873108 

testing instance 9
--Fitness 0.873108 

testing instance 10
--Fitness 0.861133 

testing instance 11
--Fitness 0.863483 

testing instance 12
--Fitness 0.859774 

testing instance 13
--Fitness 0.873108 

testing instance 14
--Fitness 0.873108 

testing instance 15
--Fitness 0.873108 

testing instance 16
--Fitness 0.873108 

testing instance 17
--Fitness 0.866407 

testing instance 18
--Fitness 0.873108 

testing instance 19
--Fitness 0.868770 

testing instance 20
--Fitness 0.87

--Fitness 0.873108 

testing instance 13
--Fitness 0.873108 

testing instance 14
--Fitness 0.873108 

testing instance 15
--Fitness 0.861598 

testing instance 16
--Fitness 0.867007 

testing instance 17
--Fitness 0.873108 

testing instance 18
--Fitness 0.873108 

testing instance 19
--Fitness 0.873108 

testing instance 20
--Fitness 0.873108 

testing instance 21
--Fitness 0.873108 

testing instance 22
--Fitness 0.864587 

testing instance 23
--Fitness 0.864100 

testing instance 24
--Fitness 0.865315 

testing instance 25
--Fitness 0.860549 

testing instance 26
--Fitness 0.873108 

testing instance 27
--Fitness 0.873108 

testing instance 28
--Fitness 0.861470 

testing instance 29
--Fitness 0.873108 

testing instance 30
--Fitness 0.872845 

testing instance 31
--Fitness 0.863573 

testing instance 32
--Fitness 0.873108 

testing instance 33
--Fitness 0.863573 

testing instance 34
--Fitness 0.873108 

testing instance 35
--Fitness 0.859774 

testing instance 36
--Fitness 0.8642

--Fitness 0.864794 

testing instance 30
--Fitness 0.863463 

testing instance 31
--Fitness 0.873108 

testing instance 32
--Fitness 0.873108 

testing instance 33
--Fitness 0.858772 

testing instance 34
--Fitness 0.873108 

testing instance 35
--Fitness 0.862701 

testing instance 36
--Fitness 0.868168 

testing instance 37
--Fitness 0.868435 

testing instance 38
--Fitness 0.865178 

testing instance 39
--Fitness 0.873108 

testing instance 40
--Fitness 0.861789 

testing instance 41
--Fitness 0.873108 

testing instance 42
--Fitness 0.870225 

testing instance 43
--Fitness 0.873108 

testing instance 44
--Fitness 0.873108 

testing instance 45
--Fitness 0.873108 

testing instance 46
--Fitness 0.862093 

testing instance 47
--Fitness 0.873108 

testing instance 48
--Fitness 0.873108 

testing instance 49
--Fitness 0.868435 

Generation 82 Best fitness (5-fold mae cv): 0.873108
Generation 83

testing instance 5
--Fitness 0.861508 

testing instance 6
--Fitness 0.873108 

testing ins

--Fitness 0.873108 

testing instance 46
--Fitness 0.863559 

testing instance 47
--Fitness 0.868500 

testing instance 48
--Fitness 0.873108 

testing instance 49
--Fitness 0.864804 

Generation 86 Best fitness (5-fold mae cv): 0.873108
Generation 87

testing instance 5
--Fitness 0.862647 

testing instance 6
--Fitness 0.873108 

testing instance 7
--Fitness 0.873108 

testing instance 8
--Fitness 0.863718 

testing instance 9
--Fitness 0.873108 

testing instance 10
--Fitness 0.873108 

testing instance 11
--Fitness 0.866907 

testing instance 12
--Fitness 0.869524 

testing instance 13
--Fitness 0.864021 

testing instance 14
--Fitness 0.859774 

testing instance 15
--Fitness 0.870871 

testing instance 16
--Fitness 0.873108 

testing instance 17
--Fitness 0.873108 

testing instance 18
--Fitness 0.864834 

testing instance 19
--Fitness 0.873108 

testing instance 20
--Fitness 0.873108 

testing instance 21
--Fitness 0.873108 

testing instance 22
--Fitness 0.873108 

testing instan

--Fitness 0.867512 

testing instance 16
--Fitness 0.873108 

testing instance 17
--Fitness 0.867567 

testing instance 18
--Fitness 0.873108 

testing instance 19
--Fitness 0.863698 

testing instance 20
--Fitness 0.873108 

testing instance 21
--Fitness 0.873108 

testing instance 22
--Fitness 0.873108 

testing instance 23
--Fitness 0.859010 

testing instance 24
--Fitness 0.860267 

testing instance 25
--Fitness 0.856899 

testing instance 26
--Fitness 0.873108 

testing instance 27
--Fitness 0.864378 

testing instance 28
--Fitness 0.859774 

testing instance 29
--Fitness 0.873108 

testing instance 30
--Fitness 0.865470 

testing instance 31
--Fitness 0.873108 

testing instance 32
--Fitness 0.873108 

testing instance 33
--Fitness 0.865859 

testing instance 34
--Fitness 0.873108 

testing instance 35
--Fitness 0.873108 

testing instance 36
--Fitness 0.865135 

testing instance 37
--Fitness 0.868732 

testing instance 38
--Fitness 0.873108 

testing instance 39
--Fitness 0.8704

--Fitness 0.865435 

testing instance 34
--Fitness 0.873108 

testing instance 35
--Fitness 0.873108 

testing instance 36
--Fitness 0.873108 

testing instance 37
--Fitness 0.868732 

testing instance 38
--Fitness 0.862276 

testing instance 39
--Fitness 0.863928 

testing instance 40
--Fitness 0.863928 

testing instance 41
--Fitness 0.865141 

testing instance 42
--Fitness 0.868243 

testing instance 43
--Fitness 0.868770 

testing instance 44
--Fitness 0.873108 

testing instance 45
--Fitness 0.863764 

testing instance 46
--Fitness 0.873108 

testing instance 47
--Fitness 0.867007 

testing instance 48
--Fitness 0.873108 

testing instance 49
--Fitness 0.873108 

Generation 95 Best fitness (5-fold mae cv): 0.873108
Generation 96

testing instance 5
--Fitness 0.873108 

testing instance 6
--Fitness 0.873108 

testing instance 7
--Fitness 0.873108 

testing instance 8
--Fitness 0.866376 

testing instance 9
--Fitness 0.873108 

testing instance 10
--Fitness 0.873108 

testing instan

--Fitness 0.867457 

Generation 99 Best fitness (5-fold mae cv): 0.873108


In [171]:
param = {
    'objective':'binary:logitraw',
    'tree_method':'hist',
    
    "eta":0.427638,
    "max_depth":11,
    "subsample":0.834637,
    "colsample_bytree":0.849069,
    "min_child_weight":5,
    "max_bin":107
    
}
bst = xgb.train(param,dtrain,num_boost_round=200)
pred = bst.predict(dtest)
preds = []
for p in pred:
    if p <0:
        preds.append(0)
    else:
        preds.append(1)
print(accuracy_score(ytest,np.sign(preds)))

0.844444444444


In [166]:
preds

[]

In [163]:
ytest

812    0
626    0
32     1
175    0
869    1
27     0
256    1
546    1
132    0
872    0
10     1
410    0
121    0
333    0
886    0
727    1
404    0
243    0
759    1
618    1
584    0
434    0
480    0
219    0
354    0
506    1
660    1
269    1
378    0
14     0
      ..
398    0
110    0
11     1
442    0
663    0
53     1
373    0
142    1
262    0
63     0
52     1
88     1
334    1
623    0
322    1
49     0
413    0
655    0
430    1
564    0
427    1
718    0
33     0
78     1
425    0
852    0
271    1
871    1
733    0
312    0
Name: Survived, Length: 90, dtype: int64