In [514]:
# TITANIC DATA ANALYSIS RESULTS
# =============================

# Total passengers: 891
# Overall survival rate: 38.4%

# --- Survival by Gender ---
# Male survival rate: 18.9%
# Female survival rate: 74.2%

# --- Survival by Class ---
# 1st Class: 62.9%
# 2nd Class: 47.3%
# 3rd Class: 24.2%

# --- Age Stats ---
# Average Age: 29.7
# Median Age: 28
# Youngest Passenger: 0.42
# Oldest Passenger: 80

# --- Fare Analysis ---
# Average Fare: £32.20
# Highest Fare: £512.33
# Average Fare of Survivors: £48.40
# Average Fare of Non-Survivors: £22.20

# --- Family Size Impact ---
# Solo travelers survival: 30.5%
# Passengers with family survival: 50.1%

# --- Embarkation Port ---
# C: 168 passengers, survival 55%
# Q: 77 passengers, survival 38%
# S: 644 passengers, survival 34%

In [515]:
import csv
import statistics as stats

with open("train.csv", "r", encoding="utf-8", newline='') as file:
    passengers = list(csv.DictReader(file))

In [516]:
def get_column(data, key, cast=str):
    return [cast(row[key]) for row in data if row[key].strip()]

male = [p for p in passengers if p["Sex"] == "male"]
female = [p for p in passengers if p["Sex"] == "female"]
pclass1 = [p for p in passengers if p["Pclass"] == "1"]
pclass2 = [p for p in passengers if p["Pclass"] == "2"]
pclass3 = [p for p in passengers if p["Pclass"] == "3"]
survived = [p for p in passengers if p["Survived"] == "1"]
died = [p for p in passengers if p["Survived"] == "0"]
solo = [p for p in passengers if p["SibSp"] == "0" and p["Parch"] == "0"]
family = [p for p in passengers if p["SibSp"] == "1" or p["Parch"] == "1"]
embark_c = [p for p in passengers if p["Embarked"] == "C"]
embark_q = [p for p in passengers if p["Embarked"] == "Q"]
embark_s = [p for p in passengers if p["Embarked"] == "S"]

male_survival = get_column(male, "Survived", float)
female_survival = get_column(female, "Survived", float)
class1_survival = get_column(pclass1, "Survived", float)
class2_survival = get_column(pclass2, "Survived", float)
class3_survival = get_column(pclass3, "Survived", float)
survivors = get_column(survived, "Survived", float)
non_survivors = get_column(died, "Survived", float)
solo_survivors = get_column(solo, "Survived", float)
family_survivors = get_column(family, "Survived", float)
embark_c_survival = get_column(embark_c, "Survived", float)
embark_q_survival = get_column(embark_q, "Survived", float)
embark_s_survival =get_column(embark_s, "Survived", float)
survivors_fares = get_column(survived, "Fare", float)
non_survivors_fares = get_column(died, "Fare", float)

ages = get_column(passengers, "Age", float)
fares = get_column(passengers, "Fare", float)

In [517]:
total_survivors = len(survivors)
total_non_survivors = len(non_survivors)
total_passengers = total_survivors + total_non_survivors
overall_survival_rate = total_survivors/total_passengers*100
male_survival = stats.mean(male_survival)*100
female_survival = stats.mean(female_survival)*100
class_survival = {
    "1st": stats.mean(class1_survival)*100,
    "2nd": stats.mean(class2_survival)*100,
    "3rd": stats.mean(class3_survival)*100}
avg_age = stats.mean(ages)
median_age = stats.median(ages)
youngest = min(ages)
oldest = max(ages)
avg_fare = stats.mean(fares)
max_fare = max(fares)
avg_fare_survivors = stats.mean(survivors_fares)
avg_fare_non_survivors = stats.mean(non_survivors_fares)
solo_survival = stats.mean(solo_survivors)*100
family_survival = stats.mean(family_survivors)*100
embark_stats = {
    "C": {"count": len(embark_c), "survival": stats.mean(embark_c_survival)*100},
    "Q": {"count": len(embark_q), "survival": stats.mean(embark_q_survival)*100},
    "S": {"count": len(embark_s), "survival": stats.mean(embark_s_survival)*100},
}

report = f"""
Titanic Data Analysis Resu
=============================

 Total passengers: {total_passengers}
 Total survivors: {total_survivors}
 Total non-survivors: {total_non_survivors}
 Overall survival rate: {overall_survival_rate:.1f}%

 --- Survival by Gender ---
 Male survival rate: {male_survival:.1f}%
 Female survival rate: {female_survival:.1f}%

 --- Survival by Class ---
 1st Class: {class_survival["1st"]:.1f}%
 2nd Class: {class_survival["2nd"]:.1f}%
 3rd Class: {class_survival["3rd"]:.1f}%

 --- Age Stats ---
 Average Age: {avg_age:.1f}
 Median Age: {median_age}
 Youngest Passenger: {youngest}
 Oldest Passenger: {oldest}

 --- Fare Analysis ---
 Average Fare: £{avg_fare:.2f}
 Highest Fare: £{max_fare:.2f}
 Average Fare of Survivors: £{avg_fare_survivors:.2f}
 Average Fare of Non-Survivors: £{avg_fare_non_survivors:.2f}

 --- Family Size Impact ---
 Solo travelers survival: {solo_survival:.1f}%
 Passengers with family survival: {family_survival:.1f}%

 --- Embarkation Port ---
 C: {embark_stats["C"]["count"]} passengers, survival {embark_stats["C"]["survival"]:.2f}%
 Q: {embark_stats["Q"]["count"]} passengers, survival {embark_stats["Q"]["survival"]:.2f}%
 S: {embark_stats["S"]["count"]} passengers, survival {embark_stats["S"]["survival"]:.2f}%

  """

# with open("titanic_analysis.txt", "w", encoding="utf-8") as f:
#     f.write(report)

# print("Report successfully saved as 'titanic_analysis.txt'")

In [518]:
print(report)


Titanic Data Analysis Resu

 Total passengers: 891
 Total survivors: 342
 Total non-survivors: 549
 Overall survival rate: 38.4%

 --- Survival by Gender ---
 Male survival rate: 18.9%
 Female survival rate: 74.2%

 --- Survival by Class ---
 1st Class: 63.0%
 2nd Class: 47.3%
 3rd Class: 24.2%

 --- Age Stats ---
 Average Age: 29.7
 Median Age: 28.0
 Youngest Passenger: 0.42
 Oldest Passenger: 80.0

 --- Fare Analysis ---
 Average Fare: £32.20
 Highest Fare: £512.33
 Average Fare of Survivors: £48.40
 Average Fare of Non-Survivors: £22.12

 --- Family Size Impact ---
 Solo travelers survival: 30.4%
 Passengers with family survival: 53.0%

 --- Embarkation Port ---
 C: 168 passengers, survival 55.36%
 Q: 77 passengers, survival 38.96%
 S: 644 passengers, survival 33.70%

  
