In [1]:
import os
import pandas as pd

TITANIC_PATH = os.path.join("data", "titanic")

def load_titanic_data(filename, titanic_path=TITANIC_PATH):
    csv_path = os.path.join(titanic_path, filename)
    return pd.read_csv(csv_path)


train_data = load_titanic_data("train.csv")
test_data = load_titanic_data("test.csv")

In [2]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score

def transform(dataframe):
    df = dataframe.copy()
    
    #####################################################################
    
    df.drop(['PassengerId'], axis=1, inplace=True)
    df.drop(['Ticket'], axis=1, inplace=True)
    
    #####################################################################
    
    df['Family_Size'] = df['Parch'] + df['SibSp']
    
    #####################################################################
    
    df['Title'] = df['Name'].str.extract('([A-Za-z]+)\.', expand=True)
    mapping = {'Mlle': 'Miss', 'Major': 'Mr', 'Col': 'Mr', 'Sir': 'Mr',
           'Don': 'Mr', 'Mme': 'Mrs', 'Jonkheer': 'Mr', 'Lady': 'Mrs',
           'Capt': 'Mr', 'Countess': 'Mrs', 'Ms': 'Miss', 'Dona': 'Mrs'}
    df.replace({'Title': mapping}, inplace=True)
    
    df.drop(['Name'], axis=1, inplace=True)
    
    #####################################################################
    
    title_ages = dict(df.groupby('Title')['Age'].median())
    df['age_med'] = df['Title'].apply(lambda x: title_ages[x])
    df['Age'].fillna(df['age_med'], inplace=True)
    
    df.drop(['age_med'], axis=1, inplace=True)
    
    #####################################################################
    
    class_fares = df.groupby(['Pclass', 'Family_Size']).Fare.median()
    df['fare_med'] = df.apply(lambda x: class_fares[(x.Pclass, x.Family_Size)], axis=1)
    df['Fare'].fillna(df['fare_med'], inplace=True)
    
    df.drop(['fare_med'], axis=1, inplace=True)
    
    #####################################################################
    
    df.Cabin = df.Cabin.notnull()
    
    binary = ['Sex', 'Cabin']
    
    for var in binary:
        df[var] = df[var].astype('category')
        df[var] = df[var].cat.codes
    
    #####################################################################
    
    df.Embarked = df.Embarked.fillna("S")
    
    categorical = ['Embarked', 'Title']

    for var in categorical:
        df = pd.concat([df, pd.get_dummies(df[var], prefix=var)], axis=1)
    
    df.drop(categorical, axis=1, inplace=True)
    
    #####################################################################
    
#     continuous = ['Age', 'Fare', 'Parch', 'Pclass', 'SibSp', 'Family_Size']

#     scaler = StandardScaler()

#     for var in continuous:
#         df[var] = df[var].astype('float64')
#         df[var] = scaler.fit_transform(df[var].values.reshape(-1, 1))
        
    #####################################################################
    
    return StandardScaler().fit_transform(df)

In [3]:
train_data = transform(train_data)

X, y = train_data.drop(['Survived'], axis=1), train_data['Survived']

print("X.shape: {} y.shape: {}".format(X.shape, y.shape))

X.shape: (891, 17) y.shape: (891,)


In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [None]:
# model

In [29]:
X_final = transform(test_data)

predictions = model.predict(X_final)

predictions_final = (predictions > 0.5).astype('int').reshape(-1)

print(predictions_final)

output = pd.DataFrame({'PassengerId': test_data.PassengerId, 'Survived': predictions_final})
output.to_csv('submission.csv', index=False)

print("Your submission was successfully saved!")

[0 1 0 0 1 0 0 0 1 0 0 0 1 0 1 1 0 0 0 1 0 1 1 0 1 0 1 0 0 0 0 0 1 1 1 0 0
 0 0 0 0 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 0 1 0 0 0 0 1 1 0 0 1 1 0 0 0 1
 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0
 0 1 0 1 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 1 0 0 1 1 0 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1
 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0
 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 0 1 0 1 1 1 0 1 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0
 1 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0
 1 0 0 0 0 0 1 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0 0 1 0 0 1 1 1 1 0 1 1 0 0 1 0
 0 1 0 0 1 1 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 1 0
 0 0 0 0 1 0 0 1 0 0 1]
Your submission was successfully saved!
