In [None]:
import codecademylib3_seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the passenger data
passengers = pd.read_csv('passengers.csv')
#print(passengers) #Pclass, Sex, Age

# Update sex column to numerical (male is 0, female is 1)
passengers['Sex'] = passengers['Sex'].map({'male': 0, 'female': 1})
print(passengers)

# Fill the nan values in the age column
print(passengers.Age.values)
passengers['Age'].fillna(value = passengers['Age'].mean(), inplace=True)
print(passengers.Age.values)

# Create a first class column
passengers['FirstClass'] = passengers['Pclass'].apply(lambda x: 1 if x == 1 else 0)
print(passengers.FirstClass)

# Create a second class column
passengers['SecondClass'] = passengers['Pclass'].apply(lambda x: 1 if x == 2 else 0)
print(passengers.SecondClass)

# Select the desired features
features = passengers[['Sex', 'Age', 'FirstClass', 'SecondClass']]
survival = passengers['Survived']

# Perform train, test, split
x_train, x_test, y_train, y_test = train_test_split(features, survival, test_size = 0.2, random_state = 42)

# Scale the feature data so it has mean = 0 and standard deviation = 1
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Create and train the model
model = LogisticRegression()
model.fit(x_train, y_train)

# Score the model on the train data
print(model.score(x_train, y_train))

# Score the model on the test data
print(model.score(x_test, y_test))

# Analyze the coefficients
print(model.coef_) #Sex is the most important feature in predicting survival. First Class is the second most important. Second Class is the third, and age is the fourth most important feature in predicting survival

# Sample passenger features
Jack = np.array([0.0,20.0,0.0,0.0])
Rose = np.array([1.0,17.0,1.0,0.0])
You = np.array([1.9, 23.0, 1.0, 0.0])

# Combine passenger arrays
sample_passengers = np.array([Jack, Rose, You])

# Scale the sample passenger features
sample_passengers = scaler.transform(sample_passengers)
print(sample_passengers)

# Make survival predictions!
print(model.predict(sample_passengers))
print(model.predict_proba(sample_passengers))