In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
#get data from .csv files
train_data = pd.read_csv('./train.csv')
test_data = pd.read_csv('./test.csv')
test_data.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [3]:
#get X,y from data
y_train_full = train_data.Survived
features = ['Pclass','Sex','Age','SibSp','Parch','Fare','Cabin','Embarked']
X_train_full = train_data[features]
X_test = test_data[features]

In [4]:
#split data
from sklearn.model_selection import train_test_split
train_X,val_X,train_y,val_y = train_test_split(X_train_full,y_train_full,random_state=1)

In [5]:
#preprocessor for pipeline
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

numerical_transformer = SimpleImputer(strategy='mean')

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, ['Age']),
        ('cat', categorical_transformer, ['Sex','Embarked','Cabin'])
    ])

In [6]:
#define model
from sklearn.neural_network import MLPClassifier

model = MLPClassifier(solver='adam',hidden_layer_sizes=(50,50),random_state=1,max_iter=10000,alpha=0.01)

In [7]:
#pipeline 
my_pipeline = Pipeline(steps=[('preprocessor', preprocessor),('model', model)])

my_pipeline.fit(train_X, train_y)

preds = my_pipeline.predict(val_X)
preds_train = my_pipeline.predict(train_X)

train_acc = np.mean((preds_train==train_y))
val_acc = np.mean((preds==val_y))

print(train_acc,val_acc)

0.8517964071856288 0.7892376681614349


In [8]:
#output to a .csv file
pred_test = my_pipeline.predict(X_test)

output = pd.DataFrame({'PassengerId': test_data.PassengerId,
                       'Survived': pred_test})
output.to_csv('submission.csv', index=False)