In [1]:
import os

import numpy as np
import pandas as pd
import tensorflow.keras as keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

DIR_TRAIN = os.getcwd() + "\\data\\train.csv"
DIR_TEST = os.getcwd() + "\\data\\test.csv"

test_names = ['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']
train_names = ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin',
               'Embarked']

In [2]:
def loadData(is_train, dir):
    if is_train:
        names = train_names
    else:
        names = test_names
    data = pd.read_csv(dir, header=0, names=names)
    return data

In [3]:
def splitData(datas, labels, splite):
    return train_test_split(datas, labels, test_size=splite, random_state=42)

In [4]:
data_train = loadData(is_train=True, dir=DIR_TRAIN)

In [5]:
data_test = loadData(is_train=False,dir=DIR_TEST)

In [6]:
x_train = data_train.drop(['PassengerId','Ticket','Survived','Cabin'],axis = 1)
y_train = data_train['Survived']

In [7]:
print(x_train.head())
print(y_train.head())

   Pclass                                               Name     Sex   Age  \
0       3                            Braund, Mr. Owen Harris    male  22.0   
1       1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0   
2       3                             Heikkinen, Miss. Laina  female  26.0   
3       1       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0   
4       3                           Allen, Mr. William Henry    male  35.0   

   SibSp  Parch     Fare Embarked  
0      1      0   7.2500        S  
1      1      0  71.2833        C  
2      0      0   7.9250        S  
3      1      0  53.1000        S  
4      0      0   8.0500        S  
0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64


In [8]:
x_val = data_test.drop(['PassengerId','Ticket','Cabin'],axis=1)

In [9]:
print(x_train.shape)
print(x_val.shape)
data = x_train.append(x_val)
print(data.shape)

(891, 8)
(418, 8)
(1309, 8)


In [10]:
data.head()

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,7.25,S
1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,71.2833,C
2,3,"Heikkinen, Miss. Laina",female,26.0,0,0,7.925,S
3,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,53.1,S
4,3,"Allen, Mr. William Henry",male,35.0,0,0,8.05,S


In [11]:
data['Title'] = data['Name'].map(lambda x: x.split(', ')[1].split('.')[0])
data.sample(20)

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Fare,Embarked,Title
361,2,"del Carlo, Mr. Sebastiano",male,29.0,1,0,27.7208,C,Mr
241,2,"Christy, Mrs. (Alice Frances)",female,45.0,0,2,30.0,S,Mrs
192,3,"van Billiard, Master. Walter John",male,11.5,1,1,14.5,S,Master
122,2,"Nasser, Mr. Nicholas",male,32.5,1,0,30.0708,C,Mr
3,3,"Wirz, Mr. Albert",male,27.0,0,0,8.6625,S,Mr
287,1,"Snyder, Mr. John Pillsbury",male,24.0,1,0,82.2667,S,Mr
302,3,"Johnson, Mr. William Cahoone Jr",male,19.0,0,0,0.0,S,Mr
145,2,"Nicholls, Mr. Joseph Charles",male,19.0,1,1,36.75,S,Mr
394,3,"Kink-Heilmann, Mr. Anton",male,29.0,3,1,22.025,S,Mr
423,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria ...",female,28.0,1,1,14.4,S,Mrs


In [12]:
data = data.drop('Name',axis=1)
data.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title
0,3,male,22.0,1,0,7.25,S,Mr
1,1,female,38.0,1,0,71.2833,C,Mrs
2,3,female,26.0,0,0,7.925,S,Miss
3,1,female,35.0,1,0,53.1,S,Mrs
4,3,male,35.0,0,0,8.05,S,Mr


In [13]:
data['FamilySize'] = data['SibSp']+data['Parch']+1
data.sample(10)

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title,FamilySize
224,1,male,38.0,1,0,90.0,S,Mr,2
444,3,male,,0,0,8.1125,S,Mr,1
102,1,male,21.0,0,1,77.2875,S,Mr,2
269,3,male,17.0,0,0,8.6625,S,Mr,1
414,3,male,44.0,0,0,7.925,S,Mr,1
506,2,female,33.0,0,2,26.0,S,Mrs,3
662,1,male,47.0,0,0,25.5875,S,Mr,1
94,1,male,25.0,0,0,26.0,C,Mr,1
582,2,male,54.0,0,0,26.0,S,Mr,1
37,3,male,21.0,0,0,8.05,S,Mr,1


In [14]:
data['IsAlone'] = 1
data['IsAlone'].loc[data['FamilySize'] > 1] = 0
data.sample(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title,FamilySize,IsAlone
502,3,female,,0,0,7.6292,Q,Miss,1,1
18,3,female,31.0,1,0,18.0,S,Mrs,2,0
809,1,female,33.0,1,0,53.1,S,Mrs,2,0
714,2,male,52.0,0,0,13.0,S,Mr,1,1
734,2,male,23.0,0,0,13.0,S,Mr,1,1
890,3,male,32.0,0,0,7.75,Q,Mr,1,1
121,3,male,,1,0,7.75,Q,Mr,2,0
336,2,male,32.0,0,0,13.0,S,Mr,1,1
203,2,female,8.0,1,1,26.0,S,Miss,3,0
722,2,male,34.0,0,0,13.0,S,Mr,1,1


In [15]:
print('-' *10)
print(x_train.isnull().sum())
print('-' *10)
print(x_val.isnull().sum())

----------
Pclass        0
Name          0
Sex           0
Age         177
SibSp         0
Parch         0
Fare          0
Embarked      2
dtype: int64
----------
Pclass       0
Name         0
Sex          0
Age         86
SibSp        0
Parch        0
Fare         1
Embarked     0
dtype: int64


In [16]:
print(data['Title'].value_counts())
#将Title小于10的变为Msic

title_names = (data['Title'].value_counts()<10)

data['Title'] = data['Title'].map(lambda x: 'Msic' if title_names.loc[x] == True else x)
data.sample(20)

Mr              757
Miss            260
Mrs             197
Master           61
Rev               8
Dr                8
Col               4
Major             2
Ms                2
Mlle              2
Lady              1
Sir               1
Capt              1
Dona              1
the Countess      1
Mme               1
Jonkheer          1
Don               1
Name: Title, dtype: int64


Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title,FamilySize,IsAlone
376,3,female,22.0,2,0,8.6625,S,Miss,3,0
9,3,male,21.0,2,0,24.15,S,Mr,3,0
124,3,male,,0,0,7.75,Q,Mr,1,1
285,3,male,36.0,0,0,7.25,S,Mr,1,1
640,3,male,20.0,0,0,7.8542,S,Mr,1,1
87,3,female,18.0,0,0,8.05,S,Miss,1,1
265,2,male,36.0,0,0,10.5,S,Mr,1,1
157,3,male,30.0,0,0,8.05,S,Mr,1,1
382,3,female,,0,0,14.5,S,Mrs,1,1
286,3,male,,0,0,7.25,S,Mr,1,1


In [17]:
age_mean = data['Age'].mean()
fare_mean = data['Fare'].mean()
print('age mean : {}    fare mean : {}'.format(age_mean,fare_mean))

age mean : 29.881137667304014    fare mean : 33.29547928134557


In [18]:
#mode 泛数
data['Embarked'].mode()

0    S
dtype: object

In [19]:
data['Embarked'].fillna(data['Embarked'].mode()[0],inplace=True)
data = data.fillna({'Age':age_mean,'Fare':fare_mean})

In [20]:
print(data.isnull().sum())

Pclass        0
Sex           0
Age           0
SibSp         0
Parch         0
Fare          0
Embarked      0
Title         0
FamilySize    0
IsAlone       0
dtype: int64


In [21]:
data['Embarked'] = pd.Categorical(data['Embarked'])
data['Embarked'] = data['Embarked'].cat.codes+1

data['Sex'] = pd.Categorical(data['Sex'])
data['Sex'] = data['Sex'].cat.codes+1

data['Title'] = pd.Categorical(data['Title'])
data['Title'] = data['Title'].cat.codes+1

In [22]:
data.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title,FamilySize,IsAlone
0,3,2,22.0,1,0,7.25,3,3,2,0
1,1,1,38.0,1,0,71.2833,1,4,2,0
2,3,1,26.0,0,0,7.925,3,2,1,1
3,1,1,35.0,1,0,53.1,3,4,2,0
4,3,2,35.0,0,0,8.05,3,3,1,1


In [23]:
age_bin = [-1,0,5,12,18,25,35,60,120]
group_names = ['Unknown', 'Baby', 'Child', 'Teenager', 'Student', 'Young Adult', 'Adult', 'Senior']
age_cat = pd.cut(data['Age'],bins=age_bin,labels=group_names)
data['Age'] = age_cat.cat.codes

In [24]:
data.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title,FamilySize,IsAlone
0,3,2,4,1,0,7.25,3,3,2,0
1,1,1,6,1,0,71.2833,1,4,2,0
2,3,1,5,0,0,7.925,3,2,1,1
3,1,1,5,1,0,53.1,3,4,2,0
4,3,2,5,0,0,8.05,3,3,1,1


In [25]:
fare_bin = (-1, 0, 8, 15, 31, 1000)
group_names = ['Unknown', '1_quartile', '2_quartile', '3_quartile', '4_quartile']
fare_cat = pd.cut(data['Fare'],bins=fare_bin,labels=group_names)
data['Fare'] = fare_cat.cat.codes
data.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title,FamilySize,IsAlone
0,3,2,4,1,0,1,3,3,2,0
1,1,1,6,1,0,4,1,4,2,0
2,3,1,5,0,0,1,3,2,1,1
3,1,1,5,1,0,4,3,4,2,0
4,3,2,5,0,0,2,3,3,1,1


True

In [46]:
if os.path.exists('./working/train.csv'):
    os.remove('./working/train.csv')
    
save_train = data.iloc[0:891].copy()
save_train['Survived'] = y_train
save_train.to_csv('./working/train.csv',index=False)

if os.path.exists('./working/test.csv'):
    os.remove('./working/test.csv')
    
save_val = data.iloc[891:].copy()
save_val.to_csv('./working/test.csv',index=False)

In [27]:
def generator_data():
    x_train = pd.read_csv('./working/train.csv', header=0,)
    y_train = x_train.pop('Survived') 
    x_train,x_test,y_train,y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=33)
    
    y_train = np.asarray(y_train)
    x_train = np.asarray(x_train)

    y_test = np.asarray(y_test)
    y_train = np.asarray(y_train)
    
    return x_train,y_train,x_test,y_test

In [28]:
x_train_,y_train_,x_test_,y_test_ = generator_data()
x_train_

array([[3, 2, 5, ..., 3, 1, 1],
       [2, 1, 6, ..., 4, 3, 0],
       [3, 2, 4, ..., 3, 1, 1],
       ...,
       [2, 2, 4, ..., 3, 2, 0],
       [3, 2, 4, ..., 3, 1, 1],
       [2, 2, 5, ..., 3, 1, 1]], dtype=int64)

In [66]:
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform


def create_model(x_train,y_train,x_test,y_test):
    '''
    :return: keras model
    '''
    model = keras.models.Sequential()
    model.add(keras.layers.Dense({{choice([16,32,64,128])}}, activation='relu',kernel_regularizer=keras.regularizers.l1_l2(l1=0.001,l2=0.001)))
    model.add(keras.layers.Dropout({{uniform(0.1,0.8)}}))
    model.add(keras.layers.Dense({{choice([16,32,64,128])}}, activation='relu',kernel_regularizer=keras.regularizers.l1_l2(l1=0.001,l2=0.001)))
    model.add(keras.layers.Dropout({{uniform(0.1,0.8)}}))
    model.add(keras.layers.Dense({{choice([16,32,64,128])}}, activation='relu',kernel_regularizer=keras.regularizers.l1_l2(l1=0.001,l2=0.001)))
    model.add(keras.layers.Dropout({{uniform(0.1,0.8)}}))
    model.add(keras.layers.Dense(1,activation='sigmoid'))
#     model.add(keras.layers.Dense(8, activation='relu'))
    model.compile(optimizer=keras.optimizers.RMSprop(), loss=keras.losses.binary_crossentropy, metrics=[keras.metrics.binary_accuracy])
    
    history = model.fit(x_train,y_train,batch_size=128,validation_data=[x_test,y_test],epochs=20)
    
    validation_acc = np.amax(history.history['val_binary_accuracy']) 
    
    return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}

In [67]:
??optim.minimize

In [68]:
keras.backend.clear_session()

best_run, best_model= optim.minimize(model=create_model,
                                          data=generator_data,
                                          algo=tpe.suggest,
                                          max_evals=5,
                                          trials=Trials(),notebook_name='titanic_v2')



>>> Imports:
#coding=utf-8

try:
    import os
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    import pandas as pd
except:
    pass

try:
    import tensorflow.keras as keras
except:
    pass

try:
    from sklearn.model_selection import train_test_split
except:
    pass

try:
    import matplotlib.pyplot as plt
except:
    pass

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

>>> Hyperas search space:

def get_space():
    return {
        'Dense': hp.choice('Dense', [16,32,64,128]),
        'Dropout': hp.uniform('Dropout', 0.1,0.8),
        'Dense_1': hp.choice('Dense_1', [16,32,64,128]),
        'Dropout_1': hp.uniform('Dropout_1', 0.1,0.8),
        'Dense_2': hp.choice('Dense_2', [16,32,64,128]),
        'Dropout_2': hp.uniform('Dropout_2', 0.1,0.8),
        'Dense_3': hp.choice('Dense_3', [2,4,8,16]),
    

Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 712 samples, validate on 179 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 712 samples, validate on 179 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train on 712 samples, validate on 179 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [69]:
print("Evalutation of best performing model:")
print(best_model.evaluate(x_test_,y_test_))
print("Best performing model chosen hyper-parameters:")
print(best_run)

Evalutation of best performing model:
[0.7792185238619757, 0.7877094955417697]
Best performing model chosen hyper-parameters:
{'Dense': 2, 'Dense_1': 1, 'Dense_2': 1, 'Dropout': 0.616018886223065, 'Dropout_1': 0.5562577708421448, 'Dropout_2': 0.4059813816022896}


In [77]:
import xgboost as xgb  
from sklearn.model_selection import StratifiedKFold

In [90]:
test_x = data.iloc[891:]
x_train= data.iloc[0:891]


In [91]:
y_train.shape

(891,)

In [92]:
X, val_X, y, val_y = splitData(x_train,y_train,0.2)

# xgb矩阵赋值  
xgb_val = xgb.DMatrix(val_X, label=val_y)  
xgb_train = xgb.DMatrix(X, label=y)  
xgb_test = xgb.DMatrix(test_x) 

In [97]:
# xgboost模型 #####################  
  
params = {  
    'booster': 'gbtree',  
    # 'objective': 'multi:softmax',  # 多分类的问题、  
    # 'objective': 'multi:softprob',   # 多分类概率  
    'objective': 'binary:logistic',  
    'eval_metric': 'logloss',  
    # 'num_class': 9,  # 类别数，与 multisoftmax 并用  
    'gamma': 0.1,  # 用于控制是否后剪枝的参数,越大越保守，一般0.1、0.2这样子。  
    'max_depth': 8,  # 构建树的深度，越大越容易过拟合  
    'alpha': 0,   # L1正则化系数  
    'lambda': 10,  # 控制模型复杂度的权重值的L2正则化项参数，参数越大，模型越不容易过拟合。  
    'subsample': 0.7,  # 随机采样训练样本  
    'colsample_bytree': 0.5,  # 生成树时进行的列采样  
    'min_child_weight': 3,  
    # 这个参数默认是 1，是每个叶子里面 h 的和至少是多少，对正负样本不均衡时的 0-1 分类而言  
    # ，假设 h 在 0.01 附近，min_child_weight 为 1 意味着叶子节点中最少需要包含 100 个样本。  
    # 这个参数非常影响结果，控制叶子节点中二阶导的和的最小值，该参数值越小，越容易 overfitting。  
    'silent': 0,  # 设置成1则没有运行信息输出，最好是设置为0.  
    'eta': 0.03,  # 如同学习率  
    'seed': 1000,  
    'nthread': -1,  # cpu 线程数  
    'missing': 1,  
    'scale_pos_weight': (np.sum(y==0)/np.sum(y==1))  # 用来处理正负样本不均衡的问题,通常取：sum(negative cases) / sum(positive cases)  
    # 'eval_metric': 'auc'  
}  
plst = list(params.items())  
num_rounds = 2000  # 迭代次数  
watchlist = [(xgb_train, 'train'), (xgb_val, 'val')]  
  
# 交叉验证  
#result = xgb.cv(plst, xgb_train, num_boost_round=200, nfold=4, early_stopping_rounds=200, verbose_eval=True, folds=StratifiedKFold(n_splits=4).split(X, y))  
  
# 训练模型并保存  
# early_stopping_rounds 当设置的迭代次数较大时，early_stopping_rounds 可在一定的迭代次数内准确率没有提升就停止训练  
model = xgb.train(plst, xgb_train, num_rounds, watchlist, early_stopping_rounds=200)  

[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 0 pruned nodes, max_depth=3
[0]	train-logloss:0.683262	val-logloss:0.683498
Multiple eval metrics have been passed: 'val-logloss' will be used for early stopping.

Will train until val-logloss hasn't improved in 200 rounds.
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 2 pruned nodes, max_depth=5
[1]	train-logloss:0.673648	val-logloss:0.673511
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 0 pruned nodes, max_depth=5
[2]	train-logloss:0.670953	val-logloss:0.670127
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 0 pruned nodes, max_depth=3
[3]	train-logloss:0.662652	val-logloss:0.662075
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tre

[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=4
[42]	train-logloss:0.509411	val-logloss:0.506217
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 2 pruned nodes, max_depth=4
[43]	train-logloss:0.505947	val-logloss:0.503097
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=5
[44]	train-logloss:0.505103	val-logloss:0.502314
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 2 pruned nodes, max_depth=4
[45]	train-logloss:0.50199	val-logloss:0.499712
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 0 pruned nodes, max_depth=2
[46]	train-logloss:0.499413	val-logloss:0.497409
[16:45:54] d:\build\xgboost

[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 0 pruned nodes, max_depth=5
[85]	train-logloss:0.451135	val-logloss:0.45492
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 0 pruned nodes, max_depth=2
[86]	train-logloss:0.450307	val-logloss:0.454262
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 0 pruned nodes, max_depth=4
[87]	train-logloss:0.450052	val-logloss:0.454075
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 2 pruned nodes, max_depth=5
[88]	train-logloss:0.449137	val-logloss:0.453475
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 2 pruned nodes, max_depth=7
[89]	train-logloss:0.448532	val-logloss:0.452811
[16:45:54] d:\build\xgboost\

[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 2 pruned nodes, max_depth=8
[127]	train-logloss:0.422358	val-logloss:0.434642
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 2 pruned nodes, max_depth=6
[128]	train-logloss:0.421702	val-logloss:0.434184
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=5
[129]	train-logloss:0.421301	val-logloss:0.434284
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 4 pruned nodes, max_depth=5
[130]	train-logloss:0.421131	val-logloss:0.434171
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 6 pruned nodes, max_depth=5
[131]	train-logloss:0.420807	val-logloss:0.434026
[16:45:54] d:\build\

[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 0 pruned nodes, max_depth=6
[169]	train-logloss:0.409657	val-logloss:0.428759
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 0 pruned nodes, max_depth=8
[170]	train-logloss:0.409292	val-logloss:0.42877
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 2 pruned nodes, max_depth=7
[171]	train-logloss:0.408882	val-logloss:0.428356
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 0 pruned nodes, max_depth=6
[172]	train-logloss:0.408347	val-logloss:0.428274
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 0 pruned nodes, max_depth=5
[173]	train-logloss:0.408156	val-logloss:0.428297
[16:45:54] d:\build\x

[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=4
[211]	train-logloss:0.399123	val-logloss:0.425797
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 2 pruned nodes, max_depth=7
[212]	train-logloss:0.398858	val-logloss:0.425775
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 0 pruned nodes, max_depth=7
[213]	train-logloss:0.398631	val-logloss:0.425597
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 4 pruned nodes, max_depth=6
[214]	train-logloss:0.398636	val-logloss:0.425582
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 2 pruned nodes, max_depth=6
[215]	train-logloss:0.398627	val-logloss:0.425415
[16:45:54] d:\build\

[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 2 pruned nodes, max_depth=5
[253]	train-logloss:0.392117	val-logloss:0.423636
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 2 pruned nodes, max_depth=4
[254]	train-logloss:0.392078	val-logloss:0.423745
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 2 pruned nodes, max_depth=8
[255]	train-logloss:0.391945	val-logloss:0.423696
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 0 pruned nodes, max_depth=7
[256]	train-logloss:0.391792	val-logloss:0.423547
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 0 pruned nodes, max_depth=5
[257]	train-logloss:0.39165	val-logloss:0.423611
[16:45:54] d:\build\x

[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 4 pruned nodes, max_depth=6
[295]	train-logloss:0.387283	val-logloss:0.421226
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 2 pruned nodes, max_depth=6
[296]	train-logloss:0.387111	val-logloss:0.421011
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 2 pruned nodes, max_depth=4
[297]	train-logloss:0.387043	val-logloss:0.421077
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 6 pruned nodes, max_depth=4
[298]	train-logloss:0.38699	val-logloss:0.421293
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 2 pruned nodes, max_depth=4
[299]	train-logloss:0.386838	val-logloss:0.421433
[16:45:54] d:\build\x

[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 4 pruned nodes, max_depth=5
[337]	train-logloss:0.382268	val-logloss:0.419028
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 2 pruned nodes, max_depth=6
[338]	train-logloss:0.382127	val-logloss:0.418833
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 4 pruned nodes, max_depth=6
[339]	train-logloss:0.38214	val-logloss:0.418853
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 4 pruned nodes, max_depth=6
[340]	train-logloss:0.382121	val-logloss:0.418806
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 2 pruned nodes, max_depth=7
[341]	train-logloss:0.381896	val-logloss:0.41842
[16:45:54] d:\build\xg

[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 2 pruned nodes, max_depth=5
[379]	train-logloss:0.378353	val-logloss:0.417406
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=3
[380]	train-logloss:0.378423	val-logloss:0.417534
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 2 pruned nodes, max_depth=3
[381]	train-logloss:0.378552	val-logloss:0.417792
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 2 pruned nodes, max_depth=5
[382]	train-logloss:0.378582	val-logloss:0.417966
[16:45:54] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 2 pruned nodes, max_depth=6
[383]	train-logloss:0.378538	val-logloss:0.418018
[16:45:54] d:\build\

[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 2 pruned nodes, max_depth=6
[421]	train-logloss:0.376257	val-logloss:0.418329
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 2 pruned nodes, max_depth=5
[422]	train-logloss:0.376091	val-logloss:0.418239
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 0 pruned nodes, max_depth=3
[423]	train-logloss:0.375764	val-logloss:0.418045
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 4 pruned nodes, max_depth=5
[424]	train-logloss:0.375579	val-logloss:0.417918
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 2 pruned nodes, max_depth=4
[425]	train-logloss:0.375655	val-logloss:0.418079
[16:45:55] d:\build\

[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 4 pruned nodes, max_depth=6
[463]	train-logloss:0.373652	val-logloss:0.418848
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 2 pruned nodes, max_depth=7
[464]	train-logloss:0.373612	val-logloss:0.4188
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 6 pruned nodes, max_depth=6
[465]	train-logloss:0.373466	val-logloss:0.418622
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 4 pruned nodes, max_depth=4
[466]	train-logloss:0.373512	val-logloss:0.41871
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=4
[467]	train-logloss:0.373142	val-logloss:0.418442
[16:45:55] d:\build\xgb

[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 8 pruned nodes, max_depth=5
[505]	train-logloss:0.371387	val-logloss:0.418791
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 2 pruned nodes, max_depth=4
[506]	train-logloss:0.371422	val-logloss:0.419199
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 4 pruned nodes, max_depth=5
[507]	train-logloss:0.371376	val-logloss:0.419288
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 2 pruned nodes, max_depth=4
[508]	train-logloss:0.371002	val-logloss:0.418925
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 0 pruned nodes, max_depth=8
[509]	train-logloss:0.370847	val-logloss:0.418687
[16:45:55] d:\build\x

[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 0 pruned nodes, max_depth=4
[547]	train-logloss:0.368746	val-logloss:0.418
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 0 pruned nodes, max_depth=2
[548]	train-logloss:0.36893	val-logloss:0.418329
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 8 pruned nodes, max_depth=4
[549]	train-logloss:0.36887	val-logloss:0.418359
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 6 pruned nodes, max_depth=6
[550]	train-logloss:0.368693	val-logloss:0.418373
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 2 pruned nodes, max_depth=6
[551]	train-logloss:0.368764	val-logloss:0.418526
[16:45:55] d:\build\xgboos

[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 2 pruned nodes, max_depth=5
[589]	train-logloss:0.367283	val-logloss:0.419081
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 8 pruned nodes, max_depth=4
[590]	train-logloss:0.367178	val-logloss:0.418968
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 4 pruned nodes, max_depth=6
[591]	train-logloss:0.367216	val-logloss:0.419276
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 4 pruned nodes, max_depth=2
[592]	train-logloss:0.367109	val-logloss:0.419006
[16:45:55] d:\build\xgboost\xgboost-0.80.git\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 2 pruned nodes, max_depth=5
[593]	train-logloss:0.367257	val-logloss:0.41916
[16:45:55] d:\build\xg

In [162]:
xgb_preds = model.predict(xgb_test) 
keras_preds = best_model.predict_proba(np.asarray(test_x))

In [163]:
xgb_preds

array([0.17255734, 0.5757907 , 0.11924914, 0.19294663, 0.6172026 ,
       0.11734574, 0.8276754 , 0.14319389, 0.89237195, 0.17571671,
       0.22313778, 0.3843589 , 0.9757859 , 0.09896456, 0.9771061 ,
       0.97567165, 0.1806896 , 0.31458145, 0.50717556, 0.83370507,
       0.5704837 , 0.749037  , 0.98365176, 0.519889  , 0.88647854,
       0.04550392, 0.9765876 , 0.31458145, 0.3843589 , 0.26836655,
       0.09896456, 0.22390467, 0.5417711 , 0.5417711 , 0.62049687,
       0.31458145, 0.58177525, 0.335263  , 0.10313906, 0.4284265 ,
       0.08705541, 0.53220975, 0.10635255, 0.9683498 , 0.9771061 ,
       0.181605  , 0.37978268, 0.17255734, 0.977792  , 0.69814706,
       0.4447308 , 0.28519362, 0.95690686, 0.7590522 , 0.28519362,
       0.1430439 , 0.22313778, 0.181605  , 0.10311866, 0.9689544 ,
       0.16783649, 0.21000879, 0.16783649, 0.7381027 , 0.6327882 ,
       0.98182374, 0.7548502 , 0.29123092, 0.5195701 , 0.84716034,
       0.7381027 , 0.181605  , 0.77009195, 0.5195701 , 0.97995

In [164]:
print(xgb_preds.shape)
print(keras_preds.shape)

(418,)
(418, 1)


In [167]:
keras_preds = 0.5 * keras_preds[:,0]
keras_preds.shape
keras_preds

array([0.01457145, 0.02533494, 0.02519464, 0.01518796, 0.03431762,
       0.01684413, 0.05483408, 0.01950385, 0.03142283, 0.01527001,
       0.0132472 , 0.03384932, 0.09700447, 0.02404288, 0.09803021,
       0.08422038, 0.01834996, 0.01603214, 0.03693372, 0.04410815,
       0.06066146, 0.03574442, 0.0916253 , 0.07287209, 0.08998558,
       0.01341296, 0.10728007, 0.01603214, 0.03384932, 0.02335502,
       0.02020602, 0.03438561, 0.04514514, 0.04514514, 0.06405739,
       0.01603214, 0.06208224, 0.06050642, 0.01451605, 0.0439381 ,
       0.02110328, 0.03002387, 0.01431863, 0.0470717 , 0.09803021,
       0.01332126, 0.04330858, 0.01457145, 0.10266905, 0.05144788,
       0.05264786, 0.03356732, 0.04402005, 0.05028793, 0.03356732,
       0.01033195, 0.0132472 , 0.01332126, 0.01748494, 0.10700908,
       0.0157977 , 0.0157834 , 0.0157977 , 0.04796223, 0.04363322,
       0.06611738, 0.0444245 , 0.04907053, 0.04088915, 0.07190868,
       0.04796223, 0.01332126, 0.0473084 , 0.04088915, 0.10752

In [169]:
preds = keras_preds  + 0.5 * xgb_preds
preds.shape
preds

array([0.10085012, 0.31323028, 0.08481921, 0.11166128, 0.3429189 ,
       0.07551701, 0.46867177, 0.0911008 , 0.4776088 , 0.10312837,
       0.12481609, 0.22602877, 0.5848974 , 0.07352516, 0.58658326,
       0.5720562 , 0.10869476, 0.17332287, 0.2905215 , 0.4609607 ,
       0.3459033 , 0.41026294, 0.58345115, 0.3328166 , 0.5332248 ,
       0.03616492, 0.59557384, 0.17332287, 0.22602877, 0.1575383 ,
       0.06968831, 0.14633794, 0.3160307 , 0.3160307 , 0.37430584,
       0.17332287, 0.35296986, 0.22813793, 0.06608558, 0.25815135,
       0.06463099, 0.29612875, 0.06749491, 0.5312466 , 0.58658326,
       0.10412376, 0.23319992, 0.10085012, 0.5915651 , 0.4005214 ,
       0.27501327, 0.17616414, 0.52247345, 0.42981404, 0.17616414,
       0.08185391, 0.12481609, 0.10412376, 0.06904427, 0.5914863 ,
       0.09971594, 0.1207878 , 0.09971594, 0.41701356, 0.3600273 ,
       0.55702925, 0.4218496 , 0.194686  , 0.3006742 , 0.49548885,
       0.41701356, 0.10412376, 0.43235436, 0.3006742 , 0.59750

In [174]:
predictions = []
predictions

[]

In [175]:

# 导出结果  
threshold = 0.5  
for pred in preds:  
    result = 1 if pred > threshold else 0  
    predictions.append(result)

predictions

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,


In [172]:
predictions = best_model.predict_classes(x_val)

In [176]:
ids = data_test['PassengerId'].copy()
new_output = ids.to_frame()
new_output.head(10)

Unnamed: 0,PassengerId
0,892
1,893
2,894
3,895
4,896
5,897
6,898
7,899
8,900
9,901


In [177]:
new_output['Survived'] = predictions
new_output.sample(10)

Unnamed: 0,PassengerId,Survived
360,1252,0
170,1062,0
224,1116,1
358,1250,0
309,1201,0
308,1200,0
150,1042,1
10,902,0
21,913,0
261,1153,0


In [178]:
new_output.to_csv('./working/my_submit.csv',index=False)