In [1]:
# 📌 Step 1: Importing the pandas library
# Pandas helps us work with structured data (tables like Excel sheets)
import pandas as pd

In [2]:
# 📌 Step 2: Load the Titanic dataset
# This table has passenger details we'll analyze.
df = pd.read_csv("C:/Users/dbda.STUDENTSDC/Music/LabPractice/Notebooks/Datasets/titanic.csv")

# ✅ Now, let's check what our dataset looks like.
df.head()  # Shows the first 5 rows

Unnamed: 0,survived,pclass,gender,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [3]:
df.count()

survived    891
pclass      891
gender      891
age         714
sibsp       891
parch       891
fare        891
embarked    889
deck        203
dtype: int64

In [4]:
# 📌 Step 3: Select relevant columns
# We only need 'gender' and 'survived' for probability calculations.
df = df[['gender', 'survived']]

# 📌 Step 4: Remove missing values
# Some rows might have empty data—we drop them to avoid errors.
df.dropna(inplace=True)  # 'inplace=True' means changes are directly applied to 'df'

In [5]:
# 📌 Step 5: Get total number of rows (passengers)
# 'len(df)' counts the total entries in our dataset.
total = len(df)

# 📌 Step 6: Print the total count
print(f"Total passengers: {total}")  # Displays the total number of passengers


Total passengers: 891


# Marginal Probability

In [6]:
# 📌 Step 7: Marginal probability of gender
# Marginal probability = Probability of an event happening, ignoring other conditions.
p_female = len(df[df['gender'] == 'female']) / total  # Probability of a female passenger
p_male = len(df[df['gender'] == 'male']) / total  # Probability of a male passenger

# 📌 Print useful details for verification
print(len(df[df['gender'] == 'female']))  # Total number of female passengers

# Optional prints for further clarity:
# print(df['gender'] == 'female')  # Shows which rows match 'female'
# print(round(p_female, 2))  # Rounded probability for better readability
# print(df[df['gender'] == 'female'])  # Displays only female passenger rows

314


In [7]:
# 📌 Step 8: Marginal probability of survival
# Chance of survival, ignoring other factors.
p_survived = len(df[df['survived'] == 1]) / total  # Probability of survival
p_not_survived = len(df[df['survived'] == 0]) / total  # Probability of not surviving

# 📌 Print the probabilities
print(p_survived)  # Shows survival probability
print(p_not_survived)  # Shows non-survival probability

# ✅ The sum should be 1, as they cover all possible outcomes.
print(p_survived + p_not_survived)

0.3838383838383838
0.6161616161616161
1.0


In [8]:
# 📌 Step 9: Print marginal probabilities
print("\nMarginal Probabilities:")  # Header for clarity

# Using formatted strings (f-strings) to round values to 3 decimal places
print(f"P(gender = female): {p_female:.3f}")  # Probability of female passenger
print(f"P(gender = male): {p_male:.3f}")  # Probability of male passenger
print(f"P(survived = 1): {p_survived:.3f}")  # Probability of survival
print(f"P(survived = 0): {p_not_survived:.3f}")  # Probability of not surviving


Marginal Probabilities:
P(gender = female): 0.352
P(gender = male): 0.648
P(survived = 1): 0.384
P(survived = 0): 0.616


# Joint Probability

In [9]:
# 📌 Step 10: Calculate joint probabilities
# Joint probability is the chance of two events happening together.
p_female_survived = len(df[(df['gender'] == 'female') & (df['survived'] == 1)]) / total  # Probability of female surviving
p_female_not_survived = len(df[(df['gender'] == 'female') & (df['survived'] == 0)]) / total  # Probability of female not surviving
p_male_survived = len(df[(df['gender'] == 'male') & (df['survived'] == 1)]) / total  # Probability of male surviving
p_male_not_survived = len(df[(df['gender'] == 'male') & (df['survived'] == 0)]) / total  # Probability of male not surviving

In [10]:
print("\nJoint Probabilities:")
print(f"P(gender = female, survived = 1): {p_female_survived:.3f}")
print(f"P(gender = female, survived = 0): {p_female_not_survived:.3f}")
print(f"P(gender = male, survived = 1): {p_male_survived:.3f}")
print(f"P(gender = male, survived = 0): {p_male_not_survived:.3f}")


Joint Probabilities:
P(gender = female, survived = 1): 0.262
P(gender = female, survived = 0): 0.091
P(gender = male, survived = 1): 0.122
P(gender = male, survived = 0): 0.525


# Conditional Probability

In [11]:
print("\nConditional Probabilities:")

# 📌 Step 11: Calculate conditional probability
# Conditional probability answers: "Given that a passenger is female/male, what is the probability they survived?"

# P(Survived | Female) = P(Survive AND Female) / P(Female)
p_survived_given_female = p_female_survived / p_female
print(f"P(Survived = 1 | Gender = Female): {p_survived_given_female:.3f}")  # Probability of survival, given female

# P(Survived | Male) = P(Survive AND Male) / P(Male)
p_survived_given_male = p_male_survived / p_male
print(f"P(Survived = 1 | Gender = Male): {p_survived_given_male:.3f}")  # Probability of survival, given male



Conditional Probabilities:
P(Survived = 1 | Gender = Female): 0.742
P(Survived = 1 | Gender = Male): 0.189


In [12]:
# p_female_survived: This is a joint probability. It answers: "What is the probability that a randomly chosen passenger was both female AND survived?"

# Calculated as: len(df[(df['gender'] == 'female') & (df['survived'] == 1)]) / total

# p_survived_given_female: This is a conditional probability. It answers: "What is the probability that a randomly chosen passenger survived, GIVEN THAT they were female?"

# Calculated as: p_female_survived / p_female

# Bayes' Theorem

In [13]:
print("\nBayes' Theorem Calculations: ")
# P(Female | Survived) = [p(Survived | Female) * P(Female)] / P(Survived)

# This answers: "What is the probability that a randomly chosen passenger was female, GIVEN THAT they survived?"

p_female_given_survived = (p_survived_given_female * p_female) / p_survived
print(f"P(Gender = Female | Survived = 1): {p_female_given_survived:.3f}")


Bayes' Theorem Calculations: 
P(Gender = Female | Survived = 1): 0.681


In [14]:
# P(Male | Survived) = [p(Survived | Male) * P(Male)] / P(Survived)

# This answers: "What is the probability that a randomly chosen passenger was male, GIVEN THAT they survived?"

p_female_given_survived = (p_survived_given_male * p_male) / p_survived
print(f"P(Gender = Male | Survived = 1): {p_female_given_survived:.3f}")

P(Gender = Male | Survived = 1): 0.319
