# Survival Analysis on Titanic Dataset

Importing the required libraries

In [224]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

## Reading the dataset

In [3]:
file_path = "Titanic.csv"
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,last,first,gender,age,class,fare,embarked,survived
0,Braund,Mr. Owen Harris,M,22.0,3,7.25,Southampton,no
1,Cumings,Mrs. John Bradley (Florence Briggs Thayer),F,38.0,1,71.2833,Cherbourg,yes
2,Heikkinen,Miss Laina,F,26.0,3,7.925,Southampton,yes
3,Futrelle,Mrs. Jacques Heath (Lily May Peel),F,35.0,1,53.1,Southampton,yes
4,Allen,Mr. William Henry,M,35.0,3,8.05,Southampton,no


Checking for the null values in the dataset:

In [122]:
df.describe()

Unnamed: 0,age,class,fare
count,714.0,891.0,891.0
mean,29.699118,2.308642,32.204208
std,14.526497,0.836071,49.693429
min,0.42,1.0,0.0
25%,20.125,2.0,7.9104
50%,28.0,3.0,14.4542
75%,38.0,3.0,31.0
max,80.0,3.0,512.3292


Alternatively, we can use the following method as well:

In [126]:
df.isnull().any()

last        False
first       False
gender      False
age          True
class       False
fare        False
embarked    False
survived    False
dtype: bool

or further applying .any():

In [6]:
df.isnull().any().any()

True

Converting the frame values to strings:

In [20]:
mask = (df['survived'] == 'yes') & (df['gender'] == 'M')
df[mask].count()

last        109
first       109
gender      109
age          93
class       109
fare        109
embarked    109
survived    109
dtype: int64

In [144]:
survived = df[df['survived']=='yes'].groupby('class').count()
total_survived = df['survived'] == 'yes'
total_survived = total_survived.sum()
total_died = df['survived'] == 'no'
total_died = total_died.sum()
total_passengers = df['gender'].count()
# print(total_passengers)

Computing proportions:

In [152]:
survived_prop = survived['survived']/total_passengers
died_prop = df[df['survived'] == 'no'].groupby('class')['survived'].count()/total_passengers 

In [156]:
survived_prop

class
1    0.152637
2    0.097643
3    0.133558
Name: survived, dtype: float64

In [157]:
died_prop

class
1    0.089787
2    0.108866
3    0.417508
Name: survived, dtype: float64

In [161]:
new_df = pd.DataFrame({'Survived': survived_prop,
                      'Died': died_prop,
                      'Total_Prop' : survived_prop+died_prop})

In [165]:
new_df

Unnamed: 0_level_0,Survived,Died,Total_Prop
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.152637,0.089787,0.242424
2,0.097643,0.108866,0.20651
3,0.133558,0.417508,0.551066


In [213]:
survived_gender = df.groupby(['gender']).count()/total_passengers
survived_gender['died'] =df[df['survived'] == 'no'].groupby('gender')['survived'].count()/total_passengers 
survived_gender = survived_gender[['survived', 'died']]
survived_gender.reset_index(inplace = True)
survived_gender

Unnamed: 0,gender,survived,died
0,F,0.352413,0.090909
1,M,0.647587,0.525253


In [301]:
survived_class = df.groupby(['pclass']).count()/total_passengers
survived_class['died'] =df[df['survived'] == 'no'].groupby('pclass')['survived'].count()/total_passengers 
survived_class = survived_class[['survived', 'died']]
survived_class.reset_index(inplace = True)
survived_class

Unnamed: 0,pclass,survived,died
0,1,0.242424,0.089787
1,2,0.20651,0.108866
2,3,0.551066,0.417508


In terms of pure numbers:

In [309]:
# Removing null values:
df = df.dropna()
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 714 entries, 0 to 890
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   last      714 non-null    object 
 1   first     714 non-null    object 
 2   gender    714 non-null    object 
 3   age       714 non-null    float64
 4   pclass    714 non-null    int64  
 5   fare      714 non-null    float64
 6   embarked  714 non-null    object 
 7   survived  714 non-null    object 
dtypes: float64(2), int64(1), object(5)
memory usage: 50.2+ KB


Conditional Probabilities:

In [376]:
def survival_probability(gender, pclass):
    mask1 = (df['gender'] == gender) & (df['pclass'] == pclass) & (df['survived']== 'yes')
    mask2 = (df['gender'] == gender) & (df['pclass'] == pclass)
    gender_class_survived = df[mask1].shape[0]
    gender_class_total = df[mask2].shape[0]
    probability = gender_class_survived/gender_class_total
    return probability

# Calculate the conditional probabilities for each gender and passenger class
probabilities = {
    'P(Survived | female, class : 1)': survival_probability('F', 1),
    'P(Survived | female, class : 2)': survival_probability('F', 2),
    'P(Survived | female, class : 3)': survival_probability('F', 3),
    'P(Survived | male, class : 1)': survival_probability('M', 1),
    'P(Survived | male, class : 2)': survival_probability('M', 2),
    'P(Survived | male, class : 3)': survival_probability('M', 3),
}

for key, value in probabilities.items():
    print(f'{key}: {value:.4f}')

P(Survived | female, class : 1): 0.9647
P(Survived | female, class : 2): 0.9189
P(Survived | female, class : 3): 0.4608
P(Survived | male, class : 1): 0.3960
P(Survived | male, class : 2): 0.1515
P(Survived | male, class : 3): 0.1502


In [254]:
df.query("gender == 'M' and survived == 'yes'")

Unnamed: 0,last,first,gender,age,pclass,fare,embarked,survived
17,Williams,Mr. Charles Eugene,M,,2,13.0000,Southampton,yes
21,Beesley,Mr. Lawrence,M,34.0,2,13.0000,Southampton,yes
23,Sloper,Mr. William Thompson,M,28.0,1,35.5000,Southampton,yes
36,Mamee,Mr. Hanna,M,,3,7.2292,Cherbourg,yes
55,Woolner,Mr. Hugh,M,,1,35.5000,Southampton,yes
...,...,...,...,...,...,...,...,...
838,Chip,Mr. Chang,M,32.0,3,56.4958,Southampton,yes
839,Marechal,Mr. Pierre,M,,1,29.7000,Cherbourg,yes
857,Daly,Mr. Peter Denis,M,51.0,1,26.5500,Southampton,yes
869,Johnson,Master Harold Theodor,M,4.0,3,11.1333,Southampton,yes


In [252]:
df.rename(columns={"class": "pclass"}, inplace=True)
df.query("gender == 'M' and pclass == 1")

Unnamed: 0,last,first,gender,age,pclass,fare,embarked,survived
6,McCarthy,Mr. Timothy J,M,54.0,1,51.8625,Southampton,no
23,Sloper,Mr. William Thompson,M,28.0,1,35.5000,Southampton,yes
27,Fortune,Mr. Charles Alexander,M,19.0,1,263.0000,Southampton,no
30,Uruchurtu,Don. Manuel E,M,40.0,1,27.7208,Cherbourg,no
34,Meyer,Mr. Edgar Joseph,M,28.0,1,82.1708,Cherbourg,no
...,...,...,...,...,...,...,...,...
839,Marechal,Mr. Pierre,M,,1,29.7000,Cherbourg,yes
857,Daly,Mr. Peter Denis,M,51.0,1,26.5500,Southampton,yes
867,Roebling,Mr. Washington Augustus II,M,31.0,1,50.4958,Southampton,no
872,Carlsson,Mr. Frans Olof,M,33.0,1,5.0000,Southampton,no
