In [57]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [58]:
# Load the passenger data
passengers = pd.read_csv('./Project Datasets/passengers.csv')

In [59]:
# Update sex column to numerical
passengers['Sex'] = passengers['Sex'].map({'male':0, 'female':1})
#print(passengers['Sex'])

In [60]:
# Fill the nan values in the age column
#print(passengers['Age'].values)
#print(passengers['Age'].mean())
passengers['Age'] = passengers['Age'].fillna(value = passengers['Age'].mean(), inplace = False)

In [61]:
# Create a first class column
passengers['FirstClass'] = passengers['Pclass'].apply(lambda x: 1 if x == 1 else 0)
#print(passengers['Pclass'])
#print(passengers['FirstClass'])

In [62]:
# Create a second class column
passengers['SecondClass'] = passengers['Pclass'].apply(lambda x: 1 if x == 2 else 0)
#print(passengers)

In [63]:
# Select the desired features
features = passengers[['Sex','Age','FirstClass','SecondClass']]
survival = passengers['Survived']

In [64]:
# Perform train, test, split
#train_features, test_features, train_labels, test_labels
X_train, X_test, y_train, y_test = train_test_split(features, survival, test_size = .2)

In [65]:
# Scale the feature data so it has mean = 0 and standard deviation = 1
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [66]:
# Create and train the model
model = LogisticRegression()
model.fit(X_train, y_train)

In [67]:
# Score the model on the train data
print(model.score(X_train, y_train))

0.8047752808988764


In [68]:
# Score the model on the test data
print(model.score(X_test, y_test))

0.770949720670391


In [69]:
# Analyze the coefficients
print(model.coef_)
print(list(zip(['Sex','Age','FirstClass','SecondClass'],model.coef_[0])))

[[ 1.25841942 -0.41722801  0.93492446  0.46648911]]
[('Sex', 1.2584194174177277), ('Age', -0.41722801455662895), ('FirstClass', 0.9349244634327034), ('SecondClass', 0.4664891108748319)]


In [70]:
# Sample passenger features
Jack = np.array([0.0,20.0,0.0,0.0])
Rose = np.array([1.0,17.0,1.0,0.0])
You = np.array([0.0,29.0,0.0,1.0])

In [71]:
# Combine passenger arrays
sample_passengers = np.array([Jack,Rose,You])

In [72]:
# Scale the sample passenger features
sample_passengers = scaler.transform(sample_passengers)



In [73]:
# Make survival predictions!
# Jake, no ... obviously.
# Rose, yes ... good for her.
# You, yes ... I didn't survive.
print(model.predict(sample_passengers))
print(model.predict_proba(sample_passengers))

[0 1 0]
[[0.88842304 0.11157696]
 [0.05412908 0.94587092]
 [0.77204559 0.22795441]]
