In [15]:
##### import pandas as pd
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
 
 
def change_obj_to_category(df):
    i=0;
    while i<df.columns.size:
        if (df.dtypes[df.columns[i]] == 'object' or df.dtypes[df.columns[i]] == 'bool'):
            col = df.dtypes.index[i]
            df[col] = df[col].astype('category')
        i = i+1 
        
def change_int_to_float(df):
    i=0;
    while i<df.columns.size:
        if 'int' in str(df.dtypes[df.columns[i]]):
            col = df.dtypes.index[i]
            df[col] = df[col].astype('float64')
        i = i+1
       
def generateXY(df,target_col,var_list):
   
    #make a copy of the required subset and drop rows containing Na
    mdata = df[var_list+[target_col]].copy()
    mdata.dropna(inplace=True)
   
    #Separate target and put it in Y
    Y = mdata[target_col].tolist()
    del mdata[target_col]
   
    #Now, process data and create dummy variables if required with final data in Xvars
    import pandas
    Xvars = pandas.DataFrame()
    import pandas as pd
    for cols in var_list:
        if (str(mdata[cols].dtype) == 'category'):
            dummySer= pd.get_dummies(mdata[cols],prefix=cols+'_')
            Xvars = pd.concat([Xvars,dummySer],axis=1)
        else:
            Xvars =  pd.concat([Xvars,mdata[cols]],axis=1)
   
    X = Xvars.values.tolist()
   
    return X,Y
        
        
def roc_score_model(model,X,Y):
    from sklearn.metrics import roc_auc_score
    return roc_auc_score(Y,pd.DataFrame((model.predict_proba(X)))[1].tolist())
 
def feature_imp(colNames,imps):
    df = pd.DataFrame(columns=('Feature','Importance'),index=[x for x in range(0,len(colNames))])
    i = 0
    for col in colNames:
        df['Feature'][i] = col
        df['Importance'][i] = imps[i]
        i=i+1
   
    df = df.sort_values(by='Importance',ascending=False)
    return df
 
def predict_th(model,tx,ty,threshold=0.5):
    import pandas as pd
    probs = model.predict_proba(tx)[:,1].tolist()
    predictions = []
    for i in range(0,len(ty)):
        if probs[i]>threshold:
            predictions.append(1)
        else:
            predictions.append(0)
   
    return predictions  
 
 
print('Starting data read from csv file.')
match_data = pd.read_csv('train.csv',sep=',')


cols_to_delete = ['Cabin','Ticket','Name','PassengerId']
#single_valued_columns = ['same_gender_partner','partners_religion_at_16','religion_at_16','has_domestic_partnership','has_civil_union','no_domestic_partnership_or_civil_union','attended_same_college','how_met_online','partner_gender','lived_together_before_married','time_dating_until_married','time_met_until_married','time_met_until_dating','marriage_count_combined','zip_pct_white','zip_pct_black','zip_pct_hispanic','zip_median_income','zip_pct_foreign_born','zip_rural']
 
print('Deleting columns containing huge missing values, Single valued items and id columns which are of no use.')
print('No data leakage is present as such.')
match_data.drop(cols_to_delete,inplace=True,axis=1)
#match_data.drop(single_valued_columns,inplace=True,axis=1)
match_data.Survived = match_data.Survived.astype('category')


 
print('Changing the type of columns from object to category.')
change_obj_to_category(match_data)
 
numeric_cols = ['Age',
                 'SibSp','Parch',
                 'Fare']       
 
print('Separating numeric data from the data set to perform numeric imputation on missing values.')
numeric_data = match_data[numeric_cols]
for col in numeric_cols:
    match_data.drop([col],axis=1,inplace=True)
 
match_data = match_data.apply(lambda x:x.fillna(x.value_counts().index[0]))
print('Imputed categorical data with the mode of the respective column.')
numeric_data = numeric_data.fillna(numeric_data.median())
print('Imputed numeric columns to its median value.')
 
#Join numeric data to main data set
match_data = match_data.join(numeric_data)
match_data['FamilySize'] = match_data.SibSp + match_data.Parch +1

match_data['IsAlone']=match_data['FamilySize'].apply(lambda x: 0 if x==1 else 1)

match_data['CategoricalFare'] = pd.qcut(match_data['Fare'],3 )
match_data['CategoricalAge'] = pd.qcut(match_data['Age'], 3)
    
cols_to_delete = ['SibSp','Parch','Fare','Age']

match_data.drop(cols_to_delete,inplace=True,axis=1)

print('Setting aside 20% of the data as hold_out.')
#Now, let's set 20% of the data as holdout data [test data]
from sklearn.model_selection import train_test_split
train_data, hold_out = train_test_split(match_data, train_size = 0.6,random_state=2135)

numeric_cols = [
                 'FamilySize']  
 
print('Using standard scaler of sklearn library to Standardize and scale the numeric inputs.')
#Scale the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
train_data[numeric_cols] = scaler.fit_transform(train_data[numeric_cols])
hold_out[numeric_cols] = scaler.transform(hold_out[numeric_cols])
 
 
print('\nFinding Optimal parameters for 4 different models using GridSearch.')
target_col = 'Survived'
varToUse = train_data.columns.tolist()
varToUse.remove('Survived')
Train_X,Train_Y = generateXY(train_data,target_col,varToUse)
Test_X,Test_Y = generateXY(hold_out,target_col,varToUse)
 
 
print('Optimal Parameters for: ')
from sklearn.pipeline import Pipeline
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
 
#Regularized Logistic Regression
from sklearn.linear_model import SGDClassifier
sgd = SGDClassifier(random_state=2135)
pipeline = Pipeline(steps=[('standardize', preprocessing.StandardScaler())
                           , ('model', sgd) ])
optimized_sgd = GridSearchCV(estimator=pipeline
                            , cv=3
                            , param_grid=dict(model__alpha = [0.0001,0.0002,0.0003,0.0004])
                            , scoring = 'roc_auc'
                            , verbose = 0
                           )
sgdgc = optimized_sgd.fit(Train_X,Train_Y)
print('1. Regularized Logistic Regression: ' + str (sgdgc.best_params_))
 
#Decision Tree
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(random_state=2135)
pipeline = Pipeline(steps=[('standardize', preprocessing.StandardScaler())
                           , ('model', dt) ])
optimized_dt = GridSearchCV(estimator=pipeline
                            , cv=3
                            , param_grid=dict(model__max_depth =  [5,6,7], model__max_features=[1,2,3,4,5,6,7,8,9,10,11])
                            , scoring = 'roc_auc'
                            , verbose = 0
                           )
dtgc = optimized_dt.fit(Train_X,Train_Y)
print('2. Decision Tree: ' + str (dtgc.best_params_))
 
#Random Forest
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=2135)
pipeline = Pipeline(steps=[('standardize', preprocessing.StandardScaler())
                           , ('model', rf) ])
optimized_rf = GridSearchCV(estimator=pipeline
                            , cv=3
                            , param_grid=dict(model__max_depth =  [5,6], model__max_features=[1,2,3,4,5,6,7,8,9,10,11],model__n_estimators=[40,50])
                            , scoring = 'roc_auc'
                            , verbose = 0
                           )
rfgc = optimized_rf.fit(Train_X,Train_Y)
print('3. Random Forest: ' + str (rfgc.best_params_))
 
 
#Gradient Boosting Classifier
from sklearn.ensemble import GradientBoostingClassifier
gbc = GradientBoostingClassifier(random_state=2135)
pipeline = Pipeline(steps=[('standardize', preprocessing.StandardScaler())
                           , ('model', gbc) ])
optimized_gbc = GridSearchCV(estimator=pipeline
                            , cv=3
                            , param_grid=dict(model__max_depth =  [2,3], model__max_features=[1,2,3,4,5,6,7,8,9,10,11],model__n_estimators=[60,70])
                            , scoring = 'roc_auc'
                            , verbose = 0
                           )
gbcgc = optimized_gbc.fit(Train_X,Train_Y)
print('4. Gradient Boosting Classifier: ' + str (gbcgc.best_params_))
 
 
 
print('\nPerforming Cross validation for 4 different models using optimal parameters obtained from GridSearch.')
print('Cross Validation Scores for: ')
#
best_alpha = float(sgdgc.best_params_['model__alpha'])
sgd_cvscore = cross_val_score(SGDClassifier(random_state=2135,alpha=best_alpha),Train_X,Train_Y,cv=3,scoring='roc_auc')
print ('1. Regularized Logistic Regression: %f' %np.mean(sgd_cvscore))
#
best_depth = int(dtgc.best_params_['model__max_depth'])
best_features = int(dtgc.best_params_['model__max_features'])
dtgc_cvscore = cross_val_score(DecisionTreeClassifier(random_state=2135,max_depth=best_depth,max_features=best_features),Train_X,Train_Y,cv=3,scoring='roc_auc')
print ('2. Decision Tree: %f' %np.mean(dtgc_cvscore))
#
best_depthrf = int(rfgc.best_params_['model__max_depth'])
best_featuresrf = int(rfgc.best_params_['model__max_features'])
best_estimatorsrf = int(rfgc.best_params_['model__n_estimators'])
rfgc_cvscore = cross_val_score(RandomForestClassifier(random_state=2135,n_estimators=best_estimatorsrf,max_depth=best_depthrf,max_features=best_featuresrf),Train_X,Train_Y,cv=3,scoring='roc_auc')
print ('3. Random Forest: %f' %np.mean(rfgc_cvscore))
#
best_depthgbc = int(gbcgc.best_params_['model__max_depth'])
best_featuresgbc = int(gbcgc.best_params_['model__max_features'])
best_estimatorsgbc = int(gbcgc.best_params_['model__n_estimators'])
gbcgc_cvscore = cross_val_score(GradientBoostingClassifier(random_state=2135,n_estimators=best_estimatorsgbc,max_depth=best_depthgbc,max_features=best_featuresgbc),Train_X,Train_Y,cv=3,scoring='roc_auc')
print ('4. Gradient Boosting Classifier: %f' %np.mean(gbcgc_cvscore))
 
 
print ('\nBased on the Cross validation ouptuts, the best model that can be selected is: \'Gradient Boosting Classifier\'')
 
print('\nFitting Gradient Boosting classifier for the dataset.')
print('Testing accuracy of this model on Test data(or hold_out data).')
gbc = GradientBoostingClassifier(random_state=2135,n_estimators=best_estimatorsgbc,max_depth=best_depthgbc,max_features=best_featuresgbc)
gbc = gbc.fit(Train_X,Train_Y)
 
print('\nHoldout ROC/AUC accuracy for Gradient Boosting classifier: '+ str(roc_score_model(gbc,Test_X,Test_Y)))
 
feature_imp(list(train_data.columns.values),gbc.feature_importance_)

Starting data read from csv file.
Deleting columns containing huge missing values, Single valued items and id columns which are of no use.
No data leakage is present as such.
Changing the type of columns from object to category.
Separating numeric data from the data set to perform numeric imputation on missing values.
Imputed categorical data with the mode of the respective column.
Imputed numeric columns to its median value.
Setting aside 20% of the data as hold_out.
Using standard scaler of sklearn library to Standardize and scale the numeric inputs.

Finding Optimal parameters for 4 different models using GridSearch.
Optimal Parameters for: 
1. Regularized Logistic Regression: {'model__alpha': 0.0001}
2. Decision Tree: {'model__max_depth': 5, 'model__max_features': 9}
3. Random Forest: {'model__n_estimators': 40, 'model__max_depth': 6, 'model__max_features': 3}
4. Gradient Boosting Classifier: {'model__n_estimators': 70, 'model__max_depth': 3, 'model__max_features': 5}

Performing C

AttributeError: 'GradientBoostingClassifier' object has no attribute 'feature_importance_'

In [8]:
print('Starting data read from csv file.')
match_data = pd.read_csv('test.csv',sep=',')


cols_to_delete = ['Cabin','Ticket','Name','PassengerId']
#single_valued_columns = ['same_gender_partner','partners_religion_at_16','religion_at_16','has_domestic_partnership','has_civil_union','no_domestic_partnership_or_civil_union','attended_same_college','how_met_online','partner_gender','lived_together_before_married','time_dating_until_married','time_met_until_married','time_met_until_dating','marriage_count_combined','zip_pct_white','zip_pct_black','zip_pct_hispanic','zip_median_income','zip_pct_foreign_born','zip_rural']
 
print('Deleting columns containing huge missing values, Single valued items and id columns which are of no use.')
print('No data leakage is present as such.')
match_data.drop(cols_to_delete,inplace=True,axis=1)
#match_data.drop(single_valued_columns,inplace=True,axis=1)
#match_data.Survived = match_data.Survived.astype('category')


 
print('Changing the type of columns from object to category.')
change_obj_to_category(match_data)
 
numeric_cols = ['Age',
                 'SibSp','Parch',
                 'Fare']       
 
print('Separating numeric data from the data set to perform numeric imputation on missing values.')
numeric_data = match_data[numeric_cols]
for col in numeric_cols:
    match_data.drop([col],axis=1,inplace=True)
 
match_data = match_data.apply(lambda x:x.fillna(x.value_counts().index[0]))
print('Imputed categorical data with the mode of the respective column.')
numeric_data = numeric_data.fillna(numeric_data.median())
print('Imputed numeric columns to its median value.')
 
#Join numeric data to main data set
match_data = match_data.join(numeric_data)
match_data['FamilySize'] = match_data.SibSp + match_data.Parch +1

match_data['IsAlone']=match_data['FamilySize'].apply(lambda x: 0 if x==1 else 1)

match_data['CategoricalFare'] = pd.qcut(match_data['Fare'],3 )
match_data['CategoricalAge'] = pd.qcut(match_data['Age'], 3)
    
cols_to_delete = ['SibSp','Parch','Fare','Age']

match_data.drop(cols_to_delete,inplace=True,axis=1)


numeric_cols = [
                 'FamilySize']  
 
print('Using standard scaler of sklearn library to Standardize and scale the numeric inputs.')
#Scale the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
match_data[numeric_cols] = scaler.fit_transform(match_data[numeric_cols])
 
 
print('\nFinding Optimal parameters for 4 different models using GridSearch.')
varToUse = match_data.columns.tolist()
Test_X= generateX(match_data,varToUse)

Starting data read from csv file.
Deleting columns containing huge missing values, Single valued items and id columns which are of no use.
No data leakage is present as such.
Changing the type of columns from object to category.
Separating numeric data from the data set to perform numeric imputation on missing values.
Imputed categorical data with the mode of the respective column.
Imputed numeric columns to its median value.
Using standard scaler of sklearn library to Standardize and scale the numeric inputs.

Finding Optimal parameters for 4 different models using GridSearch.


In [64]:
print('Starting data read from csv file.')
match_data = pd.read_csv('test.csv',sep=',')
cols_to_delete = ['Cabin','Ticket','Name','PassengerId']
#single_valued_columns = ['same_gender_partner','partners_religion_at_16','religion_at_16','has_domestic_partnership','has_civil_union','no_domestic_partnership_or_civil_union','attended_same_college','how_met_online','partner_gender','lived_together_before_married','time_dating_until_married','time_met_until_married','time_met_until_dating','marriage_count_combined','zip_pct_white','zip_pct_black','zip_pct_hispanic','zip_median_income','zip_pct_foreign_born','zip_rural']
 
print('Deleting columns containing huge missing values, Single valued items and id columns which are of no use.')
print('No data leakage is present as such.')
match_data.drop(cols_to_delete,inplace=True,axis=1)
#match_data.drop(single_valued_columns,inplace=True,axis=1)
#match_data.Survived = match_data.Survived.astype('category')
 
print('Changing the type of columns from object to category.')
change_obj_to_category(match_data)
 
numeric_cols = ['Age',
                 'SibSp','Parch',
                 'Fare']       
 
print('Separating numeric data from the data set to perform numeric imputation on missing values.')
numeric_data = match_data[numeric_cols]
for col in numeric_cols:
    match_data.drop([col],axis=1,inplace=True)
 
match_data = match_data.apply(lambda x:x.fillna(x.value_counts().index[0]))
print('Imputed categorical data with the mode of the respective column.')
numeric_data = numeric_data.fillna(numeric_data.median())
print('Imputed numeric columns to its median value.')
 
#Join numeric data to main data set
match_data = match_data.join(numeric_data)

match_data['FamilySize'] = match_data.SibSp + match_data.Parch +1

match_data['IsAlone']=match_data['FamilySize'].apply(lambda x: 0 if x==1 else 1)

    
cols_to_delete = ['SibSp','Parch']

match_data.drop(cols_to_delete,inplace=True,axis=1)

 
print('Setting aside 20% of the data as hold_out.')
#Now, let's set 20% of the data as holdout data [test data]
from sklearn.model_selection import train_test_split
train_data, hold_out = train_test_split(match_data, train_size = 0.8,random_state=2135)

numeric_cols = ['Age',
                 'FamilySize',
                 'Fare']  
 
 
 
print('Using standard scaler of sklearn library to Standardize and scale the numeric inputs.')
#Scale the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
match_data[numeric_cols] = scaler.fit_transform(match_data[numeric_cols])
 
print('\nFinding Optimal parameters for 4 different models using GridSearch.')
varToUse = match_data.columns.tolist()
Train_X= generateX(match_data,varToUse)

Starting data read from csv file.
Deleting columns containing huge missing values, Single valued items and id columns which are of no use.
No data leakage is present as such.
Changing the type of columns from object to category.
Separating numeric data from the data set to perform numeric imputation on missing values.
Imputed categorical data with the mode of the respective column.
Imputed numeric columns to its median value.
Setting aside 20% of the data as hold_out.
Using standard scaler of sklearn library to Standardize and scale the numeric inputs.

Finding Optimal parameters for 4 different models using GridSearch.


In [5]:
def generateX(df,var_list):
   
    #make a copy of the required subset and drop rows containing Na
    mdata = df[var_list].copy()
    mdata.dropna(inplace=True)
   
   
    #Now, process data and create dummy variables if required with final data in Xvars
    import pandas
    Xvars = pandas.DataFrame()
    import pandas as pd
    for cols in var_list:
        if (str(mdata[cols].dtype) == 'category'):
            dummySer= pd.get_dummies(mdata[cols],prefix=cols+'_')
            Xvars = pd.concat([Xvars,dummySer],axis=1)
        else:
            Xvars =  pd.concat([Xvars,mdata[cols]],axis=1)
   
    X = Xvars.values.tolist()
   
    return X

In [9]:
k=gbc.predict_proba(Test_X)[:, 1]

In [10]:
m=pd.DataFrame(k)

In [11]:
m[0]

0      0.046166
1      0.326316
2      0.052622
3      0.091359
4      0.566343
5      0.106188
6      0.715580
7      0.277526
8      0.593164
9      0.421407
10     0.091359
11     0.088903
12     0.955082
13     0.136406
14     0.959163
15     0.955334
16     0.052622
17     0.159244
18     0.352641
19     0.450509
20     0.364673
21     0.227678
22     0.957238
23     0.571167
24     0.923152
25     0.095021
26     0.969840
27     0.159244
28     0.326366
29     0.270127
30     0.136406
31     0.490758
32     0.476698
33     0.466525
34     0.317050
35     0.159244
36     0.348169
37     0.330478
38     0.106188
39     0.385127
40     0.117066
41     0.337892
42     0.080295
43     0.868966
44     0.959163
45     0.094701
46     0.358958
47     0.072278
48     0.957897
49     0.414333
50     0.431700
51     0.165337
52     0.909321
53     0.858383
54     0.165337
55     0.035451
56     0.080295
57     0.094701
58     0.109175
59     0.957897
60     0.094701
61     0.125253
62     0

In [16]:
predict=m[0].apply(lambda x: 0 if x<0.5 else 1)

In [17]:
predict.to_csv("Prediction_titanic.csv")

In [30]:
import pandas as pd
import numpy as np
import re as re
import xgboost as xgb
# Going to use these 4 base models for the stacking
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.cross_validation import KFold

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
PassengerId = test['PassengerId']

#################################################################################################### 
#                                       PRE-PROCESSING                                             #
#################################################################################################### 

# This part essentially ripped from Sina's work as I'm too lazy

full_data = [train, test]
# Check distribution of PCLASS and number survived
print(train[['Pclass', 'Survived']].groupby(['Pclass'], as_index=False).mean())
# Check distribution of Sexes and number survived
print(train[["Sex", "Survived"]].groupby(['Sex'], as_index=False).mean())
# Create new feature FamilySize as a combination of SibSp and Parch
for dataset in full_data:
    dataset['FamilySize'] = dataset['SibSp'] + dataset['Parch'] + 1
# Create new feature IsAlone from FamilySize
for dataset in full_data:
    dataset['IsAlone'] = 0
    dataset.loc[dataset['FamilySize'] == 1, 'IsAlone'] = 1
# Remove all NULLS in the Embarked column
for dataset in full_data:
    dataset['Embarked'] = dataset['Embarked'].fillna('S')
# Remove all NULLS in the Fare column and create a new feature CategoricalFare
for dataset in full_data:
    dataset['Fare'] = dataset['Fare'].fillna(train['Fare'].median())
train['CategoricalFare'] = pd.qcut(train['Fare'], 5)
# Create a New feature CategoricalAge
for dataset in full_data:
    age_avg = dataset['Age'].mean()
    age_std = dataset['Age'].std()
    age_null_count = dataset['Age'].isnull().sum()
    age_null_random_list = np.random.randint(age_avg - age_std, age_avg + age_std, size=age_null_count)
    dataset['Age'][np.isnan(dataset['Age'])] = age_null_random_list
    dataset['Age'] = dataset['Age'].astype(int)
train['CategoricalAge'] = pd.cut(train['Age'], 4)
# Define function to extract titles from passenger names
def get_title(name):
    title_search = re.search(' ([A-Za-z]+)\.', name)
    # If the title exists, extract and return it.
    if title_search:
        return title_search.group(1)
    return ""
# Create a new feature Title, containing the titles of passenger names
for dataset in full_data:
    dataset['Title'] = dataset['Name'].apply(get_title)
# Group all non-common titles into one single grouping "Rare"
for dataset in full_data:
    dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
    dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')
    dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')
    dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')
# Encoding our features
for dataset in full_data:
    # Mapping Sex
    dataset['Sex'] = dataset['Sex'].map( {'female': 0, 'male': 1} ).astype(int)
    
    # Mapping titles
    title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 1, "Master": 2, "Rare": 3}
    dataset['Title'] = dataset['Title'].map(title_mapping)
    dataset['Title'] = dataset['Title'].fillna(0)
    
    # Mapping Embarked
    dataset['Embarked'] = dataset['Embarked'].map( {'S': 0, 'C': 1, 'Q': 2} ).astype(int)
    
    # Mapping Fare
    dataset.loc[ dataset['Fare'] <= 8, 'Fare'] 						        = 0
    dataset.loc[(dataset['Fare'] > 8) & (dataset['Fare'] <= 16), 'Fare'] = 1
    dataset.loc[(dataset['Fare'] > 16) & (dataset['Fare'] <= 24), 'Fare']   = 2
    dataset.loc[(dataset['Fare'] > 24) & (dataset['Fare'] <= 32), 'Fare']       = 3
    dataset.loc[ dataset['Fare'] > 32, 'Fare'] 							        = 4
    dataset['Fare'] = dataset['Fare'].astype(int)
    
    # Mapping Age
    dataset.loc[ dataset['Age'] <= 16, 'Age'] 					       = 0
    dataset.loc[(dataset['Age'] > 16) & (dataset['Age'] <= 32), 'Age'] = 1
    dataset.loc[(dataset['Age'] > 32) & (dataset['Age'] <= 48), 'Age'] = 2
    dataset.loc[(dataset['Age'] > 48) & (dataset['Age'] <= 64), 'Age'] = 3
    dataset.loc[ dataset['Age'] > 64, 'Age']                           = 4

# Feature Selection
drop_elements = ['PassengerId', 'Name', 'Ticket', 'Cabin', 'SibSp']
#drop_elements = ['PassengerId', 'Name', 'Ticket', 'Cabin', 'SibSp','Parch', 'FamilySize']
train = train.drop(drop_elements, axis = 1)
train = train.drop(['CategoricalAge', 'CategoricalFare'], axis = 1)
test  = test.drop(drop_elements, axis = 1)

#################################################################################################### 
#                                         STACKING                                                 #
#################################################################################################### 

ntrain = train.shape[0]
ntest = test.shape[0]
SEED = 0 # for reproducibility
NFOLDS = 5 # set folds for out-of-fold prediction
kf = KFold(ntrain, n_folds= NFOLDS, random_state=SEED)

# Write some Python helper functions that collects a lot of the SKlearn methods under one roof. 
# Totally ripped from Faron's Stacking starter ;)
class SklearnHelper(object):
    def __init__(self, clf, seed=0, params=None):
        params['random_state'] = seed
        self.clf = clf(**params)

    def train(self, x_train, y_train):
        self.clf.fit(x_train, y_train)

    def predict(self, x):
        return self.clf.predict(x)


def get_oof(clf, x_train, y_train, x_test):
    oof_train = np.zeros((ntrain,))
    oof_test = np.zeros((ntest,))
    oof_test_skf = np.empty((NFOLDS, ntest))

    for i, (train_index, test_index) in enumerate(kf):
        x_tr = x_train[train_index]
        y_tr = y_train[train_index]
        x_te = x_train[test_index]

        clf.train(x_tr, y_tr)

        oof_train[test_index] = clf.predict(x_te)
        oof_test_skf[i, :] = clf.predict(x_test)

    oof_test[:] = oof_test_skf.mean(axis=0)
    return oof_train.reshape(-1, 1), oof_test.reshape(-1, 1)
    
# Assign the parameters for each of our 4 base models
rf_params = {
    'n_jobs': -1,
    'n_estimators': 575,
     'warm_start': True, 
     #'max_features': 0.2,
    'max_depth': 5,
    'min_samples_leaf': 2,
    'max_features' : 'sqrt',
    'verbose': 3 
}
et_params = {
    'n_jobs': -1,
    'n_estimators':575,
    #'max_features': 0.5,
    'max_depth': 5,
    'min_samples_leaf': 3,
    'verbose': 3
}
ada_params = {
    'n_estimators': 575,
    'learning_rate' : 0.95
}

gb_params = {
    'n_estimators': 575,
     #'max_features': 0.2,
    'max_depth': 5,
    'min_samples_leaf': 3,
    'verbose': 3
}
svc_params = {
    'kernel' : 'linear',
    'C' : 0.025
    }


# Create 4 objects that represent our 4 models
rf = SklearnHelper(clf=RandomForestClassifier, seed=SEED, params=rf_params)
et = SklearnHelper(clf=ExtraTreesClassifier, seed=SEED, params=et_params)
ada = SklearnHelper(clf=AdaBoostClassifier, seed=SEED, params=ada_params)
gb = SklearnHelper(clf=GradientBoostingClassifier, seed=SEED, params=gb_params)
svc = SklearnHelper(clf=SVC, seed=SEED, params=svc_params)

# Create Numpy arrays of train, test and target ( Survived) dataframes to feed into our models
y_train = train['Survived'].ravel()
train = train.drop(['Survived'], axis=1)
x_train = train.values
x_test = test.values

# Create our OOF train and test predictions. These base results will be used as new features
et_oof_train, et_oof_test = get_oof(et, x_train, y_train, x_test)
rf_oof_train, rf_oof_test = get_oof(rf,x_train, y_train, x_test)
ada_oof_train, ada_oof_test = get_oof(ada, x_train, y_train, x_test)
gb_oof_train, gb_oof_test = get_oof(gb,x_train, y_train, x_test)
svc_oof_train, svc_oof_test = get_oof(svc,x_train, y_train, x_test)

x_train = np.concatenate(( et_oof_train, rf_oof_train, ada_oof_train, gb_oof_train, svc_oof_train), axis=1)
x_test = np.concatenate(( et_oof_test, rf_oof_test, ada_oof_test, gb_oof_test, svc_oof_test), axis=1)
print("{},{}".format(x_train.shape, x_test.shape))

# Finally, we use an Xgboost classifier and feed it our oof train and test values as new features
gbm = xgb.XGBClassifier(learning_rate = 0.1,
 n_estimators= 16000,
 max_depth= 4,
 min_child_weight= 2,
 #gamma=1,
 gamma=1,                        
 subsample=0.8,
 colsample_bytree=0.8,
 objective= 'binary:logistic',
 nthread= -1,
 scale_pos_weight=1).fit(x_train, y_train)
predictions1 = gbm.predict(x_test)

   Pclass  Survived
0       1  0.629630
1       2  0.472826
2       3  0.242363
      Sex  Survived
0  female  0.742038
1    male  0.188908
building tree 1 of 575
building tree 2 of 575
building tree 3 of 575
building tree 4 of 575
building tree 5 of 575
building tree 6 of 575
building tree 7 of 575
building tree 8 of 575
building tree 9 of 575
building tree 10 of 575
building tree 11 of 575
building tree 12 of 575
building tree 13 of 575
building tree 14 of 575
building tree 15 of 575
building tree 16 of 575
building tree 17 of 575
building tree 18 of 575
building tree 19 of 575
building tree 20 of 575
building tree 21 of 575
building tree 22 of 575
building tree 23 of 575
building tree 24 of 575
building tree 25 of 575
building tree 26 of 575
building tree 27 of 575
building tree 28 of 575
building tree 29 of 575
building tree 30 of 575
building tree 31 of 575
building tree 32 of 575
building tree 33 of 575
building tree 34 of 575
building tree 35 of 575
building tree 36 of 575
build

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:    0.1s


building tree 469 of 575building tree 470 of 575

building tree 471 of 575
building tree 472 of 575
building tree 473 of 575
building tree 474 of 575
building tree 475 of 575
building tree 476 of 575
building tree 477 of 575
building tree 478 of 575
building tree 479 of 575
building tree 480 of 575
building tree 481 of 575
building tree 482 of 575
building tree 483 of 575
building tree 484 of 575
building tree 485 of 575
building tree 486 of 575
building tree 487 of 575
building tree 488 of 575
building tree 489 of 575
building tree 490 of 575
building tree 491 of 575
building tree 492 of 575
building tree 493 of 575
building tree 494 of 575
building tree 495 of 575
building tree 496 of 575
building tree 497 of 575
building tree 498 of 575
building tree 499 of 575
building tree 500 of 575
building tree 501 of 575
building tree 502 of 575
building tree 503 of 575
building tree 504 of 575
building tree 505 of 575
building tree 506 of 575
building tree 507 of 575
building tree 508 of 575


[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 575 out of 575 | elapsed:    0.3s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished


building tree 1 of 575
building tree 2 of 575
building tree 3 of 575
building tree 4 of 575
building tree 5 of 575
building tree 6 of 575
building tree 7 of 575
building tree 8 of 575
building tree 9 of 575
building tree 10 of 575
building tree 11 of 575
building tree 12 of 575
building tree 13 of 575
building tree 14 of 575
building tree 15 of 575
building tree 16 of 575
building tree 17 of 575
building tree 18 of 575
building tree 19 of 575
building tree 20 of 575
building tree 21 of 575
building tree 22 of 575
building tree 23 of 575
building tree 24 of 575
building tree 25 of 575
building tree 26 of 575
building tree 27 of 575
building tree 28 of 575
building tree 29 of 575
building tree 30 of 575
building tree 31 of 575
building tree 32 of 575
building tree 33 of 575
building tree 34 of 575
building tree 35 of 575
building tree 36 of 575
building tree 37 of 575
building tree 38 of 575
building tree 39 of 575
building tree 40 of 575
building tree 41 of 575
building tree 42 of 575
b

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:    0.1s


building tree 428 of 575
building tree 429 of 575
building tree 430 of 575
building tree 431 of 575
building tree 432 of 575
building tree 433 of 575
building tree 434 of 575
building tree 435 of 575
building tree 436 of 575
building tree 437 of 575
building tree 438 of 575
building tree 439 of 575
building tree 440 of 575
building tree 441 of 575
building tree 442 of 575
building tree 443 of 575
building tree 444 of 575
building tree 445 of 575
building tree 446 of 575
building tree 447 of 575
building tree 448 of 575
building tree 449 of 575
building tree 450 of 575
building tree 451 of 575
building tree 452 of 575
building tree 453 of 575
building tree 454 of 575
building tree 455 of 575
building tree 456 of 575
building tree 457 of 575
building tree 458 of 575
building tree 459 of 575
building tree 460 of 575
building tree 461 of 575
building tree 462 of 575
building tree 463 of 575
building tree 464 of 575
building tree 465 of 575
building tree 466 of 575
building tree 467 of 575


[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 575 out of 575 | elapsed:    0.3s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished


building tree 1 of 575building tree 2 of 575
building tree 3 of 575
building tree 4 of 575

building tree 5 of 575
building tree 6 of 575
building tree 7 of 575
building tree 8 of 575
building tree 9 of 575
building tree 10 of 575
building tree 11 of 575
building tree 12 of 575
building tree 13 of 575
building tree 14 of 575
building tree 15 of 575
building tree 16 of 575
building tree 17 of 575
building tree 18 of 575
building tree 19 of 575
building tree 20 of 575
building tree 21 of 575
building tree 22 of 575
building tree 23 of 575
building tree 24 of 575
building tree 25 of 575
building tree 26 of 575
building tree 27 of 575
building tree 28 of 575
building tree 29 of 575
building tree 30 of 575
building tree 31 of 575
building tree 32 of 575
building tree 33 of 575
building tree 34 of 575
building tree 35 of 575
building tree 36 of 575
building tree 37 of 575
building tree 38 of 575
building tree 39 of 575
building tree 40 of 575
building tree 41 of 575
building tree 42 of 575
b

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:    0.2s


building tree 345 of 575building tree 346 of 575
building tree 347 of 575

building tree 348 of 575
building tree 349 of 575
building tree 350 of 575
building tree 351 of 575
building tree 352 of 575
building tree 353 of 575
building tree 354 of 575
building tree 355 of 575
building tree 356 of 575
building tree 357 of 575
building tree 358 of 575
building tree 359 of 575
building tree 360 of 575
building tree 361 of 575
building tree 362 of 575
building tree 363 of 575
building tree 364 of 575
building tree 365 of 575
building tree 366 of 575
building tree 367 of 575
building tree 368 of 575
building tree 369 of 575
building tree 370 of 575
building tree 371 of 575
building tree 372 of 575
building tree 373 of 575
building tree 374 of 575
building tree 375 of 575
building tree 376 of 575
building tree 377 of 575
building tree 378 of 575
building tree 379 of 575
building tree 380 of 575
building tree 381 of 575
building tree 382 of 575
building tree 383 of 575
building tree 384 of 575


[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 575 out of 575 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished


building tree 1 of 575building tree 2 of 575
building tree 3 of 575
building tree 4 of 575

building tree 5 of 575
building tree 6 of 575
building tree 7 of 575
building tree 8 of 575
building tree 9 of 575
building tree 10 of 575
building tree 11 of 575
building tree 12 of 575
building tree 13 of 575
building tree 14 of 575
building tree 15 of 575
building tree 16 of 575
building tree 17 of 575
building tree 18 of 575
building tree 19 of 575
building tree 20 of 575
building tree 21 of 575
building tree 22 of 575
building tree 23 of 575
building tree 24 of 575
building tree 25 of 575
building tree 26 of 575
building tree 27 of 575
building tree 28 of 575
building tree 29 of 575
building tree 30 of 575
building tree 31 of 575
building tree 32 of 575
building tree 33 of 575
building tree 34 of 575
building tree 35 of 575
building tree 36 of 575
building tree 37 of 575
building tree 38 of 575
building tree 39 of 575
building tree 40 of 575
building tree 41 of 575
building tree 42 of 575
b

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:    0.2s


building tree 354 of 575
building tree 355 of 575
building tree 356 of 575
building tree 357 of 575
building tree 358 of 575
building tree 359 of 575
building tree 360 of 575
building tree 361 of 575
building tree 362 of 575
building tree 363 of 575
building tree 364 of 575
building tree 365 of 575
building tree 366 of 575
building tree 367 of 575
building tree 368 of 575
building tree 369 of 575
building tree 370 of 575
building tree 371 of 575
building tree 372 of 575
building tree 373 of 575
building tree 374 of 575
building tree 375 of 575
building tree 376 of 575
building tree 377 of 575
building tree 378 of 575
building tree 379 of 575
building tree 380 of 575
building tree 381 of 575
building tree 382 of 575
building tree 383 of 575
building tree 384 of 575
building tree 385 of 575
building tree 386 of 575
building tree 387 of 575
building tree 388 of 575
building tree 389 of 575
building tree 390 of 575
building tree 391 of 575
building tree 392 of 575
building tree 393 of 575


[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 575 out of 575 | elapsed:    0.3s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished


building tree 1 of 575
building tree 2 of 575
building tree 3 of 575
building tree 4 of 575
building tree 5 of 575
building tree 6 of 575
building tree 7 of 575
building tree 8 of 575
building tree 9 of 575
building tree 10 of 575
building tree 11 of 575
building tree 12 of 575
building tree 13 of 575
building tree 14 of 575
building tree 15 of 575
building tree 16 of 575
building tree 17 of 575
building tree 18 of 575
building tree 19 of 575
building tree 20 of 575
building tree 21 of 575
building tree 22 of 575
building tree 23 of 575
building tree 24 of 575
building tree 25 of 575
building tree 26 of 575
building tree 27 of 575
building tree 28 of 575
building tree 29 of 575
building tree 30 of 575
building tree 31 of 575
building tree 32 of 575
building tree 33 of 575
building tree 34 of 575
building tree 35 of 575
building tree 36 of 575
building tree 37 of 575
building tree 38 of 575
building tree 39 of 575
building tree 40 of 575
building tree 41 of 575
building tree 42 of 575
b

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:    0.2s


building tree 366 of 575
building tree 367 of 575
building tree 368 of 575
building tree 369 of 575
building tree 370 of 575
building tree 371 of 575
building tree 372 of 575
building tree 373 of 575
building tree 374 of 575
building tree 375 of 575
building tree 376 of 575
building tree 377 of 575
building tree 378 of 575
building tree 379 of 575
building tree 380 of 575
building tree 381 of 575
building tree 382 of 575
building tree 383 of 575
building tree 384 of 575
building tree 385 of 575
building tree 386 of 575
building tree 387 of 575
building tree 388 of 575
building tree 389 of 575
building tree 390 of 575
building tree 391 of 575
building tree 392 of 575
building tree 393 of 575
building tree 394 of 575
building tree 395 of 575
building tree 396 of 575
building tree 397 of 575
building tree 398 of 575
building tree 399 of 575
building tree 400 of 575
building tree 401 of 575
building tree 402 of 575
building tree 403 of 575
building tree 404 of 575
building tree 405 of 575


[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 575 out of 575 | elapsed:    0.3s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished


building tree 1 of 575
building tree 2 of 575
building tree 3 of 575
building tree 4 of 575
building tree 5 of 575
building tree 6 of 575
building tree 7 of 575
building tree 8 of 575
building tree 9 of 575
building tree 10 of 575
building tree 11 of 575
building tree 12 of 575
building tree 13 of 575
building tree 14 of 575
building tree 15 of 575
building tree 16 of 575
building tree 17 of 575
building tree 18 of 575
building tree 19 of 575
building tree 20 of 575
building tree 21 of 575
building tree 22 of 575
building tree 23 of 575
building tree 24 of 575
building tree 25 of 575
building tree 26 of 575
building tree 27 of 575
building tree 28 of 575
building tree 29 of 575
building tree 30 of 575
building tree 31 of 575
building tree 32 of 575
building tree 33 of 575
building tree 34 of 575
building tree 35 of 575
building tree 36 of 575
building tree 37 of 575
building tree 38 of 575
building tree 39 of 575
building tree 40 of 575
building tree 41 of 575
building tree 42 of 575
b

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:    0.2s


building tree 276 of 575building tree 277 of 575

building tree 278 of 575
building tree 279 of 575
building tree 280 of 575
building tree 281 of 575
building tree 282 of 575
building tree 283 of 575
building tree 284 of 575
building tree 285 of 575
building tree 286 of 575
building tree 287 of 575
building tree 288 of 575
building tree 289 of 575
building tree 290 of 575
building tree 291 of 575
building tree 292 of 575
building tree 293 of 575
building tree 294 of 575
building tree 295 of 575
building tree 296 of 575
building tree 297 of 575
building tree 298 of 575
building tree 299 of 575
building tree 300 of 575
building tree 301 of 575
building tree 302 of 575
building tree 303 of 575
building tree 304 of 575
building tree 305 of 575
building tree 306 of 575
building tree 307 of 575
building tree 308 of 575
building tree 309 of 575
building tree 310 of 575
building tree 311 of 575
building tree 312 of 575
building tree 313 of 575
building tree 314 of 575
building tree 315 of 575


[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 575 out of 575 | elapsed:    0.4s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s


building tree 557 of 575
building tree 558 of 575
building tree 559 of 575
building tree 560 of 575
building tree 561 of 575
building tree 562 of 575
building tree 563 of 575
building tree 564 of 575
building tree 565 of 575
building tree 566 of 575
building tree 567 of 575
building tree 568 of 575
building tree 569 of 575
building tree 570 of 575
building tree 571 of 575
building tree 572 of 575
building tree 573 of 575
building tree 574 of 575
building tree 575 of 575


[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished
  warn("Warm-start fitting without increasing n_estimators does not "
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 280 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 504 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 575 out of 575 | elapsed:    0.1s finished
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 120 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 280 tasks      | elaps

      Iter       Train Loss   Remaining Time 
         1           1.2422            3.24s
         2           1.1603            2.15s
         3           1.0933            1.72s
         4           1.0375            1.60s
         5           0.9854            1.52s
         6           0.9410            1.45s
         7           0.9044            1.40s
         8           0.8724            1.34s
         9           0.8443            1.30s
        10           0.8204            1.27s
        11           0.7997            1.22s
        12           0.7827            1.19s
        13           0.7664            1.20s
        14           0.7507            1.19s
        15           0.7381            1.17s
        16           0.7273            1.16s
        17           0.7160            1.15s
        18           0.7067            1.13s
        19           0.6986            1.12s
        20           0.6915            1.10s
        21           0.6841            1.09s
        2

       264           0.4782            0.48s
       265           0.4780            0.47s
       266           0.4776            0.47s
       267           0.4774            0.47s
       268           0.4773            0.47s
       269           0.4771            0.47s
       270           0.4768            0.47s
       271           0.4767            0.46s
       272           0.4766            0.46s
       273           0.4764            0.46s
       274           0.4763            0.46s
       275           0.4761            0.46s
       276           0.4759            0.46s
       277           0.4757            0.45s
       278           0.4755            0.45s
       279           0.4753            0.45s
       280           0.4752            0.45s
       281           0.4751            0.45s
       282           0.4750            0.45s
       283           0.4749            0.45s
       284           0.4746            0.44s
       285           0.4744            0.44s
       286

       506           0.4564            0.11s
       507           0.4564            0.11s
       508           0.4564            0.11s
       509           0.4563            0.11s
       510           0.4563            0.10s
       511           0.4563            0.10s
       512           0.4563            0.10s
       513           0.4563            0.10s
       514           0.4562            0.10s
       515           0.4562            0.10s
       516           0.4562            0.09s
       517           0.4561            0.09s
       518           0.4561            0.09s
       519           0.4561            0.09s
       520           0.4561            0.09s
       521           0.4561            0.09s
       522           0.4560            0.08s
       523           0.4560            0.08s
       524           0.4560            0.08s
       525           0.4560            0.08s
       526           0.4559            0.08s
       527           0.4559            0.08s
       528

       141           0.4875            0.83s
       142           0.4869            0.83s
       143           0.4864            0.82s
       144           0.4858            0.82s
       145           0.4851            0.82s
       146           0.4841            0.82s
       147           0.4835            0.82s
       148           0.4831            0.81s
       149           0.4826            0.81s
       150           0.4822            0.81s
       151           0.4816            0.81s
       152           0.4810            0.81s
       153           0.4806            0.80s
       154           0.4803            0.80s
       155           0.4797            0.80s
       156           0.4795            0.80s
       157           0.4792            0.80s
       158           0.4785            0.80s
       159           0.4780            0.80s
       160           0.4775            0.79s
       161           0.4773            0.79s
       162           0.4770            0.79s
       163

       387           0.4402            0.33s
       388           0.4402            0.33s
       389           0.4401            0.32s
       390           0.4400            0.32s
       391           0.4399            0.32s
       392           0.4399            0.32s
       393           0.4398            0.32s
       394           0.4398            0.32s
       395           0.4397            0.31s
       396           0.4397            0.31s
       397           0.4396            0.31s
       398           0.4395            0.31s
       399           0.4395            0.31s
       400           0.4395            0.30s
       401           0.4395            0.30s
       402           0.4394            0.30s
       403           0.4394            0.30s
       404           0.4394            0.30s
       405           0.4394            0.29s
       406           0.4393            0.29s
       407           0.4392            0.29s
       408           0.4392            0.29s
       409

        41           0.6350            0.92s
        42           0.6337            0.93s
        43           0.6316            0.92s
        44           0.6304            0.92s
        45           0.6276            0.92s
        46           0.6259            0.91s
        47           0.6228            0.91s
        48           0.6211            0.90s
        49           0.6188            0.90s
        50           0.6174            0.89s
        51           0.6142            0.89s
        52           0.6130            0.88s
        53           0.6113            0.88s
        54           0.6095            0.87s
        55           0.6089            0.86s
        56           0.6059            0.86s
        57           0.6041            0.85s
        58           0.6027            0.85s
        59           0.6009            0.84s
        60           0.6004            0.83s
        61           0.5992            0.83s
        62           0.5980            0.82s
        63

       319           0.4578            0.38s
       320           0.4576            0.38s
       321           0.4574            0.38s
       322           0.4573            0.38s
       323           0.4569            0.38s
       324           0.4566            0.38s
       325           0.4564            0.38s
       326           0.4562            0.37s
       327           0.4560            0.37s
       328           0.4559            0.37s
       329           0.4554            0.37s
       330           0.4553            0.37s
       331           0.4549            0.37s
       332           0.4544            0.36s
       333           0.4542            0.36s
       334           0.4540            0.36s
       335           0.4540            0.36s
       336           0.4539            0.36s
       337           0.4538            0.36s
       338           0.4537            0.36s
       339           0.4536            0.35s
       340           0.4534            0.35s
       341

      Iter       Train Loss   Remaining Time 
         1           1.2219            1.32s
         2           1.1381            1.20s
         3           1.0692            1.08s
         4           1.0123            1.02s
         5           0.9630            0.97s
         6           0.9210            0.93s
         7           0.8855            0.94s
         8           0.8555            0.92s
         9           0.8294            0.94s
        10           0.8063            0.93s
        11           0.7863            0.91s
        12           0.7687            0.89s
        13           0.7529            0.88s
        14           0.7397            0.87s
        15           0.7274            0.86s
        16           0.7162            0.85s
        17           0.7069            0.85s
        18           0.6984            0.87s
        19           0.6916            0.89s
        20           0.6853            0.89s
        21           0.6801            0.88s
        2

       239           0.4760            0.57s
       240           0.4758            0.57s
       241           0.4757            0.57s
       242           0.4755            0.57s
       243           0.4751            0.57s
       244           0.4749            0.56s
       245           0.4747            0.56s
       246           0.4745            0.56s
       247           0.4743            0.56s
       248           0.4742            0.56s
       249           0.4738            0.55s
       250           0.4730            0.55s
       251           0.4729            0.55s
       252           0.4727            0.55s
       253           0.4724            0.55s
       254           0.4722            0.55s
       255           0.4720            0.55s
       256           0.4719            0.54s
       257           0.4718            0.54s
       258           0.4714            0.54s
       259           0.4712            0.54s
       260           0.4711            0.54s
       261

       458           0.4523            0.21s
       459           0.4523            0.21s
       460           0.4522            0.21s
       461           0.4522            0.20s
       462           0.4522            0.20s
       463           0.4521            0.20s
       464           0.4521            0.20s
       465           0.4521            0.20s
       466           0.4521            0.19s
       467           0.4521            0.19s
       468           0.4520            0.19s
       469           0.4520            0.19s
       470           0.4520            0.19s
       471           0.4520            0.19s
       472           0.4519            0.18s
       473           0.4519            0.18s
       474           0.4519            0.18s
       475           0.4519            0.18s
       476           0.4518            0.18s
       477           0.4518            0.18s
       478           0.4518            0.17s
       479           0.4517            0.17s
       480

        85           0.5738            0.94s
        86           0.5734            0.94s
        87           0.5726            0.93s
        88           0.5710            0.93s
        89           0.5707            0.92s
        90           0.5700            0.91s
        91           0.5677            0.91s
        92           0.5674            0.91s
        93           0.5672            0.90s
        94           0.5662            0.90s
        95           0.5654            0.89s
        96           0.5648            0.89s
        97           0.5637            0.88s
        98           0.5613            0.88s
        99           0.5611            0.87s
       100           0.5601            0.87s
       101           0.5590            0.86s
       102           0.5588            0.86s
       103           0.5585            0.85s
       104           0.5571            0.85s
       105           0.5558            0.85s
       106           0.5548            0.85s
       107

       355           0.4668            0.35s
       356           0.4668            0.35s
       357           0.4667            0.35s
       358           0.4666            0.35s
       359           0.4665            0.35s
       360           0.4665            0.34s
       361           0.4664            0.34s
       362           0.4664            0.34s
       363           0.4664            0.34s
       364           0.4664            0.34s
       365           0.4663            0.33s
       366           0.4662            0.33s
       367           0.4660            0.33s
       368           0.4659            0.33s
       369           0.4658            0.33s
       370           0.4658            0.33s
       371           0.4657            0.32s
       372           0.4656            0.32s
       373           0.4654            0.32s
       374           0.4653            0.32s
       375           0.4653            0.32s
       376           0.4652            0.32s
       377

(891, 5),(418, 5)


In [22]:
predictions

array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       0, 0,

In [31]:
predictions1

array([0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       0, 0,

In [32]:
#################################################################################################### 
#                           PRODUCING SUBMISSION FILE                                              #
#################################################################################################### 
StackingSubmission = pd.DataFrame({ 'PassengerId': PassengerId,
                            'Survived': predictions1 })
StackingSubmission.to_csv("StackingSubmission.csv", index=False)