In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression

In [2]:
#Loading the data
titanic = pd.read_csv("Logistic-Regression/titanic.csv")

#One-hot encoding categorical variables
titanic = pd.get_dummies(titanic, columns = ["Sex", "Embarked"], drop_first = True)

In [3]:
#Establishing variables to be utilized and dependent y variable

x = titanic[["Pclass", "Sex_male", "SibSp", "ParCh", "Fare", "Embarked_Q", "Embarked_S"]]
y = titanic["Survived"]

In [4]:
#Establish parameters for Logistic Regression function

lr = LogisticRegression(penalty=None, max_iter=10000)

In [5]:
#Train my model

model_1 = lr.fit(x,y)

In [6]:
y_int = model_1.intercept_[0]
print(f"The Y-intercept is {y_int}")

The Y-intercept is 3.5102517811687632


In [7]:
#Evaluate the coeffecients in the model, provide some intrepetation for a few models

coef_dict = dict(zip(model_1.feature_names_in_, model_1.coef_[0]))
coef_dict

{'Pclass': -0.8411983155828998,
 'Sex_male': -2.7320960369541756,
 'SibSp': -0.23407187116223502,
 'ParCh': -0.07432788709763255,
 'Fare': 0.002453243904122762,
 'Embarked_Q': -0.20764540161623551,
 'Embarked_S': -0.4703463526215942}

In [8]:
print(f"All else being equal, if a passenger is male the log odds of survival decreases by {coef_dict['Sex_male']}.")
print(f"All else being equal, every unit of fare a ticket increases the log odds of survival increases by {coef_dict['Fare']}.")
print(f"All else being equal, every additional parent or child onboard decreases the log odds of survival by {coef_dict['ParCh']}.")

All else being equal, if a passenger is male the log odds of survival decreases by -2.7320960369541756.
All else being equal, every unit of fare a ticket increases the log odds of survival increases by 0.002453243904122762.
All else being equal, every additional parent or child onboard decreases the log odds of survival by -0.07432788709763255.


Lets predict the probability of survival for a passener who meets the following criteria with my logistic regression model:\
• Is female\
• Has a first class ticket that cost $62.50\
• Has a spouse, no siblings, no parents, and two children aboard\
• Embarked from Queenstown

In [9]:
#Inputting this passengers charateristics into my model

pass_surv_logodds = model_1.intercept_[0] + coef_dict['Sex_male'] * 0 + coef_dict['Fare'] * 62.5 + coef_dict['Pclass'] * 1 + coef_dict['SibSp'] * 1 + coef_dict["ParCh"] * 2 + coef_dict["Embarked_Q"] * 1 

In [10]:
#Log Odds of Survival

pass_surv_logodds

2.2320081626198007

In [11]:
#Transforming Log Odds into probability

import math

odds = math.exp(pass_surv_logodds)
probability = 1 / (1 + odds)

print(f"The probabilty of survival of this passenger is {probability:.2%}")

The probabilty of survival of this passenger is 9.69%
