In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 200)

In [2]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

In [3]:
train_test_data = pd.concat([train_data, test_data])

In [4]:
train_test_data.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0.0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1.0,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1.0,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1.0,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0.0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0.0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0.0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0.0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1.0,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1.0,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [5]:
train_test_data.isnull().sum()

PassengerId       0
Survived        418
Pclass            0
Name              0
Sex               0
Age             263
SibSp             0
Parch             0
Ticket            0
Fare              1
Cabin          1014
Embarked          2
dtype: int64

In [6]:
train_test_data.shape

(1309, 12)

In [6]:
train_test_data['Cabin'] = train_test_data['Cabin'].str[:1]

In [7]:
object_columns = ['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked']

for column in object_columns:
    train_test_data[column] = pd.factorize(train_test_data[column])[0]

In [8]:
train_test_data['Cabin'] = train_test_data['Cabin'].replace(-1, np.nan)

In [9]:
train_test_data['Cabin'].fillna(train_test_data.groupby('Pclass')['Cabin'].transform('median'), inplace=True)

In [10]:
train_test_data.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0.0,3,0,0,22.0,1,0,0,7.25,4.0,0
1,2,1.0,1,1,1,38.0,1,0,1,71.2833,0.0,1
2,3,1.0,3,2,1,26.0,0,0,2,7.925,4.0,0
3,4,1.0,1,3,1,35.0,1,0,3,53.1,0.0,0
4,5,0.0,3,4,0,35.0,0,0,4,8.05,4.0,0
5,6,0.0,3,5,0,,0,0,5,8.4583,4.0,2
6,7,0.0,1,6,0,54.0,0,0,6,51.8625,1.0,0
7,8,0.0,3,7,0,2.0,3,1,7,21.075,4.0,0
8,9,1.0,3,8,1,27.0,0,2,8,11.1333,4.0,0
9,10,1.0,2,9,1,14.0,1,0,9,30.0708,6.0,1


In [11]:
train_data = train_test_data[~train_test_data['Survived'].isnull()]
test_data = train_test_data[train_test_data['Survived'].isnull()]

In [12]:
ftr_train = train_data.drop(['PassengerId', 'Survived', 'Name', 'Ticket'], axis=1)
target = train_data['Survived']

In [13]:
from sklearn.model_selection import train_test_split

train_x, valid_x, train_y, valid_y = train_test_split(ftr_train, target, test_size=0.3, random_state=2022)

In [14]:
from bayes_opt import BayesianOptimization
from sklearn.metrics import roc_auc_score
from lightgbm import LGBMClassifier

bayesian_params = {
    'max_depth': (2, 8), 
    'num_leaves': (2, 12), 
    'min_child_samples': (1, 50), 
    'min_child_weight':(1, 20),
    'subsample':(0.5, 1.0),
    'max_bin':(10, 500),
    'reg_lambda':(0.001, 10),
    'reg_alpha': (0.01, 50) 
}

In [15]:
def lgb_roc_eval(max_depth, num_leaves, min_child_samples, min_child_weight, subsample, 
                max_bin, reg_lambda, reg_alpha):
    params = {
        "n_estimators":100, "learning_rate":0.02,
        'max_depth': int(round(max_depth)),
        'num_leaves': int(round(num_leaves)), 
        'min_child_samples': int(round(min_child_samples)),
        'min_child_weight': int(round(min_child_weight)),
        'subsample': max(min(subsample, 1), 0), 
        'max_bin':  max(int(round(max_bin)),10),
        'reg_lambda': max(reg_lambda,0),
        'reg_alpha': max(reg_alpha, 0)
    }
    lgb_model = LGBMClassifier(**params)
    lgb_model.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 50, 
                early_stopping_rounds= 50)
    valid_proba = lgb_model.predict_proba(valid_x)[:, 1]
    roc_auc = roc_auc_score(valid_y, valid_proba)
    
    return roc_auc   

In [16]:
lgbB0 = BayesianOptimization(f=lgb_roc_eval, pbounds=bayesian_params, random_state=2022)
lgbB0.maximize(init_points=5, n_iter=25)

|   iter    |  target   |  max_bin  | max_depth | min_ch... | min_ch... | num_le... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------
[50]	training's auc: 0.845481	training's binary_logloss: 0.551161	valid_1's auc: 0.851845	valid_1's binary_logloss: 0.551627
| [0m 1       [0m | [0m 0.8518  [0m | [0m 14.59   [0m | [0m 4.994   [0m | [0m 6.556   [0m | [0m 1.95    [0m | [0m 8.854   [0m | [0m 24.35   [0m | [0m 8.977   [0m | [0m 0.8237  [0m |
[50]	training's auc: 0.820232	training's binary_logloss: 0.603963	valid_1's auc: 0.852083	valid_1's binary_logloss: 0.595709
| [95m 2       [0m | [95m 0.8521  [0m | [95m 449.5   [0m | [95m 6.327   [0m | [95m 41.74   [0m | [95m 16.72   [0m | [95m 10.34   [0m | [95m 47.85   [0m | [95m 3.681   [0m | [95m 0.7474  [0m |




[50]	training's auc: 0.853615	training's binary_logloss: 0.514307	valid_1's auc: 0.83753	valid_1's binary_logloss: 0.524435
[100]	training's auc: 0.857324	training's binary_logloss: 0.471569	valid_1's auc: 0.84631	valid_1's binary_logloss: 0.483002
| [0m 3       [0m | [0m 0.8463  [0m | [0m 176.4   [0m | [0m 5.717   [0m | [0m 48.9    [0m | [0m 2.832   [0m | [0m 9.442   [0m | [0m 14.63   [0m | [0m 2.987   [0m | [0m 0.8762  [0m |
[50]	training's auc: 0.842129	training's binary_logloss: 0.546798	valid_1's auc: 0.85244	valid_1's binary_logloss: 0.547076




[100]	training's auc: 0.842802	training's binary_logloss: 0.509751	valid_1's auc: 0.853036	valid_1's binary_logloss: 0.509269
| [95m 4       [0m | [95m 0.8527  [0m | [95m 19.15   [0m | [95m 5.142   [0m | [95m 43.36   [0m | [95m 8.388   [0m | [95m 4.122   [0m | [95m 23.76   [0m | [95m 5.647   [0m | [95m 0.6747  [0m |
[50]	training's auc: 0.826181	training's binary_logloss: 0.596646	valid_1's auc: 0.852321	valid_1's binary_logloss: 0.589651
| [0m 5       [0m | [0m 0.8523  [0m | [0m 488.2   [0m | [0m 2.227   [0m | [0m 39.92   [0m | [0m 7.8     [0m | [0m 9.48    [0m | [0m 45.73   [0m | [0m 3.727   [0m | [0m 0.9824  [0m |




[50]	training's auc: 0.839933	training's binary_logloss: 0.512657	valid_1's auc: 0.842946	valid_1's binary_logloss: 0.512864
[100]	training's auc: 0.852839	training's binary_logloss: 0.471963	valid_1's auc: 0.84747	valid_1's binary_logloss: 0.476862
| [0m 6       [0m | [0m 0.8475  [0m | [0m 369.9   [0m | [0m 6.892   [0m | [0m 1.817   [0m | [0m 9.234   [0m | [0m 3.381   [0m | [0m 1.986   [0m | [0m 3.781   [0m | [0m 0.6131  [0m |
[50]	training's auc: 0.832433	training's binary_logloss: 0.554419	valid_1's auc: 0.851399	valid_1's binary_logloss: 0.553024
| [0m 7       [0m | [0m 0.8514  [0m | [0m 13.01   [0m | [0m 4.861   [0m | [0m 9.66    [0m | [0m 1.672   [0m | [0m 7.775   [0m | [0m 25.12   [0m | [0m 9.579   [0m | [0m 0.8863  [0m |
[50]	training's auc: 0.849987	training's binary_logloss: 0.498862	valid_1's auc: 0.843571	valid_1's binary_logloss: 0.509331




| [0m 8       [0m | [0m 0.8482  [0m | [0m 42.36   [0m | [0m 3.28    [0m | [0m 23.81   [0m | [0m 16.49   [0m | [0m 8.566   [0m | [0m 6.481   [0m | [0m 0.2891  [0m | [0m 0.852   [0m |
[50]	training's auc: 0.828626	training's binary_logloss: 0.576826	valid_1's auc: 0.850595	valid_1's binary_logloss: 0.573159
| [0m 9       [0m | [0m 0.8506  [0m | [0m 467.1   [0m | [0m 4.934   [0m | [0m 49.03   [0m | [0m 1.307   [0m | [0m 5.385   [0m | [0m 39.5    [0m | [0m 0.335   [0m | [0m 0.9416  [0m |




[50]	training's auc: 0.76122	training's binary_logloss: 0.598129	valid_1's auc: 0.780595	valid_1's binary_logloss: 0.586525
| [0m 10      [0m | [0m 0.7806  [0m | [0m 14.13   [0m | [0m 4.021   [0m | [0m 46.08   [0m | [0m 3.953   [0m | [0m 2.357   [0m | [0m 42.06   [0m | [0m 0.1262  [0m | [0m 0.9961  [0m |
[50]	training's auc: 0.839033	training's binary_logloss: 0.520355	valid_1's auc: 0.827262	valid_1's binary_logloss: 0.529765
[100]	training's auc: 0.854249	training's binary_logloss: 0.479175	valid_1's auc: 0.846935	valid_1's binary_logloss: 0.488612
| [0m 11      [0m | [0m 0.8469  [0m | [0m 23.08   [0m | [0m 3.578   [0m | [0m 33.29   [0m | [0m 14.94   [0m | [0m 11.81   [0m | [0m 13.29   [0m | [0m 7.665   [0m | [0m 0.6434  [0m |
[50]	training's auc: 0.820232	training's binary_logloss: 0.612316	valid_1's auc: 0.852083	valid_1's binary_logloss: 0.603134
| [0m 12      [0m | [0m 0.8521  [0m | [0m 469.3   [0m | [0m 2.914   [0m | [0m 43.16   



[50]	training's auc: 0.81931	training's binary_logloss: 0.558165	valid_1's auc: 0.837232	valid_1's binary_logloss: 0.545115
| [0m 13      [0m | [0m 0.8372  [0m | [0m 21.05   [0m | [0m 8.0     [0m | [0m 48.93   [0m | [0m 5.889   [0m | [0m 2.0     [0m | [0m 9.213   [0m | [0m 9.827   [0m | [0m 0.5     [0m |




[50]	training's auc: 0.826376	training's binary_logloss: 0.572796	valid_1's auc: 0.850863	valid_1's binary_logloss: 0.569047
| [0m 14      [0m | [0m 0.8509  [0m | [0m 473.6   [0m | [0m 5.44    [0m | [0m 36.32   [0m | [0m 9.146   [0m | [0m 11.4    [0m | [0m 36.91   [0m | [0m 3.27    [0m | [0m 0.8368  [0m |
[50]	training's auc: 0.820232	training's binary_logloss: 0.608444	valid_1's auc: 0.852083	valid_1's binary_logloss: 0.599474
| [0m 15      [0m | [0m 0.8521  [0m | [0m 461.8   [0m | [0m 4.62    [0m | [0m 35.46   [0m | [0m 1.616   [0m | [0m 4.294   [0m | [0m 49.87   [0m | [0m 0.3371  [0m | [0m 0.9043  [0m |




[50]	training's auc: 0.76122	training's binary_logloss: 0.594156	valid_1's auc: 0.780595	valid_1's binary_logloss: 0.582109
| [0m 16      [0m | [0m 0.7806  [0m | [0m 456.1   [0m | [0m 5.712   [0m | [0m 34.81   [0m | [0m 11.05   [0m | [0m 2.0     [0m | [0m 36.19   [0m | [0m 10.0    [0m | [0m 1.0     [0m |
[50]	training's auc: 0.820232	training's binary_logloss: 0.609835	valid_1's auc: 0.852083	valid_1's binary_logloss: 0.600845
| [0m 17      [0m | [0m 0.8521  [0m | [0m 473.0   [0m | [0m 6.48    [0m | [0m 48.94   [0m | [0m 3.081   [0m | [0m 4.736   [0m | [0m 49.79   [0m | [0m 3.335   [0m | [0m 0.8694  [0m |




[50]	training's auc: 0.82919	training's binary_logloss: 0.567253	valid_1's auc: 0.848006	valid_1's binary_logloss: 0.564108
| [0m 18      [0m | [0m 0.8509  [0m | [0m 482.0   [0m | [0m 3.493   [0m | [0m 42.17   [0m | [0m 1.236   [0m | [0m 8.632   [0m | [0m 36.09   [0m | [0m 0.2237  [0m | [0m 0.9736  [0m |
[50]	training's auc: 0.820232	training's binary_logloss: 0.604491	valid_1's auc: 0.852083	valid_1's binary_logloss: 0.596141
| [0m 19      [0m | [0m 0.8521  [0m | [0m 474.5   [0m | [0m 2.014   [0m | [0m 38.88   [0m | [0m 9.891   [0m | [0m 2.668   [0m | [0m 48.73   [0m | [0m 0.09265 [0m | [0m 0.6318  [0m |




[50]	training's auc: 0.820232	training's binary_logloss: 0.607407	valid_1's auc: 0.852083	valid_1's binary_logloss: 0.59864
| [0m 20      [0m | [0m 0.8521  [0m | [0m 482.7   [0m | [0m 2.859   [0m | [0m 46.94   [0m | [0m 18.76   [0m | [0m 9.433   [0m | [0m 49.36   [0m | [0m 1.473   [0m | [0m 0.7005  [0m |
[50]	training's auc: 0.820232	training's binary_logloss: 0.608876	valid_1's auc: 0.852083	valid_1's binary_logloss: 0.599795
| [0m 21      [0m | [0m 0.8521  [0m | [0m 476.6   [0m | [0m 8.0     [0m | [0m 30.78   [0m | [0m 1.0     [0m | [0m 12.0    [0m | [0m 50.0    [0m | [0m 0.001   [0m | [0m 0.5     [0m |




[50]	training's auc: 0.820232	training's binary_logloss: 0.608876	valid_1's auc: 0.852083	valid_1's binary_logloss: 0.599795
| [0m 22      [0m | [0m 0.8521  [0m | [0m 435.7   [0m | [0m 6.939   [0m | [0m 50.0    [0m | [0m 20.0    [0m | [0m 12.0    [0m | [0m 50.0    [0m | [0m 0.001   [0m | [0m 0.5     [0m |
[50]	training's auc: 0.81931	training's binary_logloss: 0.572282	valid_1's auc: 0.837232	valid_1's binary_logloss: 0.559029
| [0m 23      [0m | [0m 0.8372  [0m | [0m 28.19   [0m | [0m 8.0     [0m | [0m 12.23   [0m | [0m 14.08   [0m | [0m 2.0     [0m | [0m 20.12   [0m | [0m 10.0    [0m | [0m 0.6289  [0m |




[50]	training's auc: 0.820232	training's binary_logloss: 0.611365	valid_1's auc: 0.852083	valid_1's binary_logloss: 0.602209
| [0m 24      [0m | [0m 0.8521  [0m | [0m 488.1   [0m | [0m 4.182   [0m | [0m 19.91   [0m | [0m 15.98   [0m | [0m 7.979   [0m | [0m 49.92   [0m | [0m 5.002   [0m | [0m 0.5028  [0m |
[50]	training's auc: 0.82919	training's binary_logloss: 0.564503	valid_1's auc: 0.848006	valid_1's binary_logloss: 0.561716
| [0m 25      [0m | [0m 0.8509  [0m | [0m 494.7   [0m | [0m 5.559   [0m | [0m 24.39   [0m | [0m 6.938   [0m | [0m 7.811   [0m | [0m 32.15   [0m | [0m 5.55    [0m | [0m 0.9432  [0m |




[50]	training's auc: 0.828626	training's binary_logloss: 0.565193	valid_1's auc: 0.850595	valid_1's binary_logloss: 0.562887
| [0m 26      [0m | [0m 0.8506  [0m | [0m 490.1   [0m | [0m 2.845   [0m | [0m 38.36   [0m | [0m 17.3    [0m | [0m 10.23   [0m | [0m 31.7    [0m | [0m 6.296   [0m | [0m 0.5999  [0m |
[50]	training's auc: 0.826181	training's binary_logloss: 0.588638	valid_1's auc: 0.852321	valid_1's binary_logloss: 0.582569
| [0m 27      [0m | [0m 0.8523  [0m | [0m 485.5   [0m | [0m 2.923   [0m | [0m 7.683   [0m | [0m 3.172   [0m | [0m 8.357   [0m | [0m 43.76   [0m | [0m 1.026   [0m | [0m 0.9643  [0m |




[50]	training's auc: 0.828626	training's binary_logloss: 0.555808	valid_1's auc: 0.850595	valid_1's binary_logloss: 0.554712
| [0m 28      [0m | [0m 0.8506  [0m | [0m 494.9   [0m | [0m 7.71    [0m | [0m 2.615   [0m | [0m 15.76   [0m | [0m 10.97   [0m | [0m 29.85   [0m | [0m 2.252   [0m | [0m 0.6614  [0m |
[50]	training's auc: 0.820232	training's binary_logloss: 0.611024	valid_1's auc: 0.852083	valid_1's binary_logloss: 0.601957
| [0m 29      [0m | [0m 0.8521  [0m | [0m 498.4   [0m | [0m 4.122   [0m | [0m 3.151   [0m | [0m 18.51   [0m | [0m 9.075   [0m | [0m 49.76   [0m | [0m 5.456   [0m | [0m 0.6832  [0m |
[50]	training's auc: 0.827943	training's binary_logloss: 0.59988	valid_1's auc: 0.850417	valid_1's binary_logloss: 0.592773
| [0m 30      [0m | [0m 0.8506  [0m | [0m 481.8   [0m | [0m 4.931   [0m | [0m 1.737   [0m | [0m 19.05   [0m | [0m 9.412   [0m | [0m 45.86   [0m | [0m 6.801   [0m | [0m 0.7783  [0m |




In [17]:
lgbB0.res

[{'target': 0.8518452380952382,
  'params': {'max_bin': 14.585720765804705,
   'max_depth': 4.994346865111928,
   'min_child_samples': 6.555800806806923,
   'min_child_weight': 1.949506345374979,
   'num_leaves': 8.854075942430917,
   'reg_alpha': 24.354533530104362,
   'reg_lambda': 8.976674607206252,
   'subsample': 0.8237260353716331}},
 {'target': 0.8520833333333333,
  'params': {'max_bin': 449.5119301675884,
   'max_depth': 6.326809574988113,
   'min_child_samples': 41.7363176180333,
   'min_child_weight': 16.72379330761429,
   'num_leaves': 10.335795840581405,
   'reg_alpha': 47.85264633978365,
   'reg_lambda': 3.681076325612883,
   'subsample': 0.7474188149426438}},
 {'target': 0.8463095238095237,
  'params': {'max_bin': 176.3596425478705,
   'max_depth': 5.716575957469941,
   'min_child_samples': 48.89895228264298,
   'min_child_weight': 2.8322284751214624,
   'num_leaves': 9.442062122546155,
   'reg_alpha': 14.632048710028132,
   'reg_lambda': 2.9874548342985574,
   'subsample

In [18]:
target_list = []
for result in lgbB0.res:
    target = result['target']
    target_list.append(target)

print(target_list)
print('maximum target index', np.argmax(np.array(target_list)))

[0.8518452380952382, 0.8520833333333333, 0.8463095238095237, 0.8527083333333333, 0.8523214285714287, 0.8474702380952381, 0.8513988095238096, 0.848154761904762, 0.8505952380952381, 0.780595238095238, 0.8469345238095238, 0.8520833333333333, 0.837232142857143, 0.8508630952380953, 0.8520833333333333, 0.780595238095238, 0.8520833333333333, 0.8508630952380953, 0.8520833333333333, 0.8520833333333333, 0.8520833333333333, 0.8520833333333333, 0.837232142857143, 0.8520833333333333, 0.8508630952380953, 0.8505952380952381, 0.8523214285714287, 0.8505952380952381, 0.8520833333333333, 0.8505952380952381]
maximum target index 3


In [19]:
max_dict = lgbB0.res[np.argmax(np.array(target_list))]
print(max_dict)

{'target': 0.8527083333333333, 'params': {'max_bin': 19.145226575486895, 'max_depth': 5.142424615909564, 'min_child_samples': 43.35735651281808, 'min_child_weight': 8.388013965015574, 'num_leaves': 4.1219184946689085, 'reg_alpha': 23.764283405999475, 'reg_lambda': 5.647159507234463, 'subsample': 0.6747146479983777}}


In [20]:
clf = LGBMClassifier(
    n_jobs=-1,
    n_estimators=50,
    learning_rate=0.02,
    max_bin=19,
    min_child_samples=43,
    min_child_weight=8,
    num_leaves=4,
    subsample=0.982,
    max_depth=5,
    reg_alpha=23.764,
    reg_lambda=5.647,
    silent=-1,
    verbose=-1
)

clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric='auc', verbose=30,
       early_stopping_rounds=30)

[30]	training's auc: 0.842433	training's binary_logloss: 0.577363	valid_1's auc: 0.850357	valid_1's binary_logloss: 0.578088




LGBMClassifier(learning_rate=0.02, max_bin=19, max_depth=5,
               min_child_samples=43, min_child_weight=8, n_estimators=50,
               num_leaves=4, reg_alpha=23.764, reg_lambda=5.647, silent=-1,
               subsample=0.982, verbose=-1)

In [21]:
preds = clf.predict_proba(test_data.drop(['PassengerId', 'Survived', 'Name', 'Ticket'], axis=1))[:, 1]

In [22]:
preds.tolist()

[0.2937638794965778,
 0.4007266744970496,
 0.2937638794965778,
 0.2937638794965778,
 0.4007266744970496,
 0.2937638794965778,
 0.4007266744970496,
 0.3622788280846253,
 0.4007266744970496,
 0.2937638794965778,
 0.2937638794965778,
 0.3174429555325261,
 0.5883201932799521,
 0.2937638794965778,
 0.5883201932799521,
 0.5883201932799521,
 0.2937638794965778,
 0.2937638794965778,
 0.4007266744970496,
 0.4007266744970496,
 0.3745505990066086,
 0.30612646073154004,
 0.5883201932799521,
 0.3884430176565008,
 0.5883201932799521,
 0.2937638794965778,
 0.5883201932799521,
 0.2937638794965778,
 0.3745505990066086,
 0.2937638794965778,
 0.2937638794965778,
 0.3487884725369975,
 0.4007266744970496,
 0.4007266744970496,
 0.3745505990066086,
 0.2937638794965778,
 0.4007266744970496,
 0.4007266744970496,
 0.2937638794965778,
 0.35031445706076497,
 0.30612646073154004,
 0.3745505990066086,
 0.2937638794965778,
 0.5883201932799521,
 0.5883201932799521,
 0.2937638794965778,
 0.3745505990066086,
 0.2937638

In [23]:
preds = [1 if x > 0.5 else 0 for x in preds]

In [24]:
test_data['Survived'] = preds

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['Survived'] = preds


In [25]:
test_data[['PassengerId', 'Survived']].to_csv('titanic_hyperparameter_tuning_2.csv', index=False)