# Imports

## General

In [1]:
import pandas as pd
import numpy as np

## Linear regression

In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import KFold
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

## Logistic regression

In [3]:
from sklearn.linear_model import LogisticRegression

## SVM Classifier

In [4]:
from sklearn.svm import SVC

# Read data

In [5]:
df_titanic = pd.DataFrame(pd.read_csv("train.csv", index_col="PassengerId"))

# Preprocess data

In [6]:
df_titanic.drop(["Ticket", "Cabin", "Name"], axis=1, inplace=True)
df_titanic.Age.fillna(df_titanic.Age.median(), inplace=True)
df_titanic.loc[df_titanic["Sex"] == "male", "Sex"] = 0
df_titanic.loc[df_titanic["Sex"] == "female", "Sex"] = 1
df_titanic.Embarked.fillna("S", inplace=True)
df_titanic.loc[df_titanic["Embarked"] == "S", "Embarked"] = 0
df_titanic.loc[df_titanic["Embarked"] == "C", "Embarked"] = 1
df_titanic.loc[df_titanic["Embarked"] == "Q", "Embarked"] = 2

# Predictors

In [7]:
predictors = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]

# Normalize data

In [8]:
for n, index in enumerate(predictors):
    df_titanic[index] = (df_titanic.loc[:, index] - df_titanic.loc[:, index].mean())/df_titanic.loc[:, index].std()

# Initialize regression algorithm

In [9]:
alg = Pipeline([('poly', PolynomialFeatures(degree=2)),
                ('linear', LinearRegression(fit_intercept=False, n_jobs=-1))])

# Cross-validation folds

In [10]:
kf_passengers = KFold(df_titanic.shape[0], n_folds=4, random_state=1)

# Make predictions

In [11]:
predictions = []
for train, test in kf_passengers:
    train_predictors = (df_titanic[predictors].iloc[train, :])
    train_target = df_titanic["Survived"].iloc[train]
    alg.fit(train_predictors, train_target)
    test_predictions = alg.predict(df_titanic[predictors].iloc[test, :])
    predictions.append(test_predictions)

predictions = np.concatenate(predictions, axis=0)
predictions[predictions > .5] = 1
predictions[predictions <= .5] = 0
predictions_linreg = predictions

# Initialize regression algorithm

In [12]:
alg = LogisticRegression(random_state=1)
alg = Pipeline([('poly', PolynomialFeatures(degree=2)),
                ('linear', LogisticRegression(fit_intercept=False, n_jobs=-1))])

In [13]:
predictions = []
for train, test in kf_passengers:
    train_predictors = (df_titanic[predictors].iloc[train, :])
    train_target = df_titanic["Survived"].iloc[train]
    alg.fit(train_predictors, train_target)
    test_predictions = alg.predict(df_titanic[predictors].iloc[test, :])
    predictions.append(test_predictions)

predictions = np.concatenate(predictions, axis=0)
predictions[predictions > .5] = 1
predictions[predictions <= .5] = 0
predictions_logreg = predictions

# SVM classifier

In [14]:
clf = SVC(kernel='rbf')
predictions = []
for train, test in kf_passengers:
    train_predictors = (df_titanic[predictors].iloc[train, :])
    train_target = df_titanic["Survived"].iloc[train]
    clf.fit(train_predictors, train_target)
    test_predictions = clf.predict(df_titanic[predictors].iloc[test, :])
    predictions.append(test_predictions)
predictions = np.concatenate(predictions, axis=0)
predictions_SVM = predictions

In [17]:
predictions = (predictions_SVM+predictions_linreg+predictions_logreg)/3
predictions[predictions > .5] = 1
predictions[predictions <= .5] = 0
accuracy = (predictions == df_titanic.Survived).sum()/len(predictions)
print('Accuracy on the training set:', accuracy)

Accuracy on the training set: 0.830527497194


In [18]:
predictions = (predictions_SVM+predictions_linreg+predictions_logreg)/3
predictions

array([ 0.        ,  1.        ,  0.        ,  1.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  1.        ,
        1.        ,  1.        ,  0.        ,  0.        ,  0.33333333,
        1.        ,  0.        ,  0.        ,  0.        ,  1.        ,
        0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.66666667,  1.        ,  0.        ,
        0.        ,  1.        ,  1.        ,  0.        ,  0.33333333,
        0.        ,  0.        ,  0.        ,  0.        ,  1.        ,
        0.        ,  1.        ,  0.        ,  1.        ,  1.        ,
        0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  1.        ,  1.        ,  0.        ,
        0.        ,  1.        ,  0.        ,  1.        ,  0.        ,
        0.        ,  1.        ,  0.        ,  0.        ,  0.33333333,
        0.        ,  1.        ,  0.        ,  0.        ,  0.  