In [1]:
import pandas as pd

In [2]:
titanic = pd.read_csv("titanic_cleaned.csv")
fares = pd.read_csv("ticket_fares.csv")

In [3]:
df = titanic.merge(fares, on="Ticket" , how="left") 
df = df.drop(columns=["Fare_x"])
df = df.rename(columns={"Fare_y" : "Fare"})

In [4]:
bins = [0,12,19,59,120]
labels = ["Child","Teen","Adult","Senior"]

df["Age Group"] = pd.cut(df["Age"] , bins=bins, labels=labels , right=True)

In [5]:
survivalWomenChildren = df.groupby(["Sex","Age Group"])["Survived"].mean()
print(survivalWomenChildren)

Sex     Age Group
female  Child        0.388350
        Teen         0.753247
        Adult        0.699561
        Senior       1.000000
male    Child        0.320000
        Teen         0.086957
        Adult        0.202624
        Senior       0.111111
Name: Survived, dtype: float64


  survivalWomenChildren = df.groupby(["Sex","Age Group"])["Survived"].mean()


In [6]:
survivalbyClass = df.groupby("Pclass")["Survived"].mean()
print(survivalbyClass)

Pclass
1    0.689055
2    0.480000
3    0.239057
Name: Survived, dtype: float64


In [7]:
df["FareBin"] = pd.qcut(df["Fare"] , 4 , labels=["low","medium","high" , "VeryHigh"])


In [8]:
survivalbyFare = df.groupby("FareBin")["Survived"].mean()
print(survivalbyFare)

FareBin
low         0.229426
medium      0.441687
high        0.336449
VeryHigh    0.609418
Name: Survived, dtype: float64


  survivalbyFare = df.groupby("FareBin")["Survived"].mean()


In [9]:
with open("report.txt", "w") as f:
    
    f.write("Hypothesis 1: Women and Children First\n")
    f.write(str(survivalWomenChildren))
    f.write("\n\nConclusion:\n")
    f.write(
        "Based on the survival rates for each combination of Sex and AgeGroup, "
        "the results strongly support the 'women and children first' hypothesis. "
        "Female passengers had much higher survival rates than male passengers "
        "across almost all age groups. Among children, females also survived at a "
        "higher rate than males. Male adults had the lowest survival rates of all groups. "
        "These trends match the historical accounts of the Titanic evacuation policy, "
        "where women and children were given priority for seats on the lifeboats. "
        "Overall, the grouped survival data clearly confirms that gender and age "
        "played a major role in determining survival.\n\n"
    )


    f.write("Hypothesis 2: Wealthy Passengers Survived More\n")
    f.write("Method A (By Passenger Class):\n")
    f.write(str(survivalbyClass))
    f.write("\n\nMethod B (By Fare Bins):\n")
    f.write(str(survivalbyFare))
    f.write("\n\nConclusion:\n")
    f.write(
        "The analysis of survival rates by passenger class shows that "
        "first-class passengers had the highest survival rate, followed by "
        "second-class, while third-class passengers had the lowest survival rate. "
        "To further verify this, passengers were also grouped into fare ranges using fare bins. "
        "The results were consistent: passengers who paid higher fares had a noticeably "
        "higher chance of surviving. These two methods together confirm that wealthier "
        "passengers were more likely to survive the disaster, most likely due to better "
        "cabin locations and earlier access to lifeboats.\n"
    )