### Modelling

Currently setting this up to play around with some of the following:
* Implementing models
* Implementing different preproccessing techniques
* Visualising validation metrics
* Analysing feature importance
* Pickling models
* etc.

In [1]:
# Standard imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
%matplotlib inline

# Modelling
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.metrics import accuracy_score, precision_score, f1_score

# Metrics

In [2]:
# Constants
test_size=0.3
random_state=101

In [3]:
df = pd.read_pickle("titanic_train_clean")

In [4]:
X = df.drop(["Survived"], axis=1)
y = df["Survived"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

In [5]:
model = make_pipeline(
    StandardScaler(),
    LogisticRegressionCV(verbose=2)
)

In [6]:
model.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.2s finished


Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('logisticregressioncv',
                 LogisticRegressionCV(Cs=10, class_weight=None, cv=None,
                                      dual=False, fit_intercept=True,
                                      intercept_scaling=1.0, l1_ratios=None,
                                      max_iter=100, multi_class='auto',
                                      n_jobs=None, penalty='l2',
                                      random_state=None, refit=True,
                                      scoring=None, solver='lbfgs', tol=0.0001,
                                      verbose=2))],
         verbose=False)

In [7]:
predictions = model.predict(X_test)

In [8]:
print(f"accuracy: {accuracy_score(y_test, predictions)}")
print(f"precision: {precision_score(y_test, predictions)}")
print(f"f1 score: {f1_score(y_test, predictions)}")

accuracy: 0.8127340823970037
precision: 0.813953488372093
f1 score: 0.736842105263158
