In [549]:
# TITANIC DATA ANALYSIS RESULTS
# =============================

# Total passengers: 891
# Overall survival rate: 38.4%

# --- Survival by Gender ---
# Male survival rate: 18.9%
# Female survival rate: 74.2%

# --- Survival by Class ---
# 1st Class: 62.9%
# 2nd Class: 47.3%
# 3rd Class: 24.2%

# --- Age Stats ---
# Average Age: 29.7
# Median Age: 28
# Youngest Passenger: 0.42
# Oldest Passenger: 80

# --- Fare Analysis ---
# Average Fare: £32.20
# Highest Fare: £512.33
# Average Fare of Survivors: £48.40
# Average Fare of Non-Survivors: £22.20

# --- Family Size Impact ---
# Solo travelers survival: 30.5%
# Passengers with family survival: 50.1%

# --- Embarkation Port ---
# C: 168 passengers, survival 55%
# Q: 77 passengers, survival 38%
# S: 644 passengers, survival 34%

In [550]:
import csv
import statistics as stats
from typing import List, Dict, Any

# ============================
# TITANIC DATA ANALYSIS SCRIPT
# ============================

def load_passengers(filename: str) -> List[Dict[str, Any]]:
    """Load Titanic CSV data into a list of dictionaries."""
    with open(filename, "r", encoding="utf-8", newline="") as file:
        return list(csv.DictReader(file))


def get_column(data: List[Dict[str, Any]], key: str, cast=str) -> List:
    """Safely extract and cast a column, skipping empty values."""
    return [cast(row[key]) for row in data if row[key].strip()]


def safe_mean(values: List[float]) -> float:
    """Return the mean of a list or 0.0 if empty."""
    return stats.mean(values) if values else 0.0


def percentage(part: float, whole: float) -> float:
    """Compute percentage safely, avoiding division by zero."""
    return (part / whole * 100) if whole else 0.0


# ----------------------------
# Load data
# ----------------------------
passengers = load_passengers("train.csv")

# Segmentation
male = [p for p in passengers if p["Sex"] == "male"]
female = [p for p in passengers if p["Sex"] == "female"]
pclass1 = [p for p in passengers if p["Pclass"] == "1"]
pclass2 = [p for p in passengers if p["Pclass"] == "2"]
pclass3 = [p for p in passengers if p["Pclass"] == "3"]

survived = [p for p in passengers if p["Survived"] == "1"]
died = [p for p in passengers if p["Survived"] == "0"]

solo = [p for p in passengers if p["SibSp"] == "0" and p["Parch"] == "0"]
family = [p for p in passengers if p["SibSp"] != "0" or p["Parch"] != "0"]

embark_c = [p for p in passengers if p["Embarked"] == "C"]
embark_q = [p for p in passengers if p["Embarked"] == "Q"]
embark_s = [p for p in passengers if p["Embarked"] == "S"]

# ----------------------------
# Core computations
# ----------------------------
ages = get_column(passengers, "Age", float)
fares = get_column(passengers, "Fare", float)

male_survival = safe_mean(get_column(male, "Survived", float)) * 100
female_survival = safe_mean(get_column(female, "Survived", float)) * 100

class_survival = {
    "1st": safe_mean(get_column(pclass1, "Survived", float)) * 100,
    "2nd": safe_mean(get_column(pclass2, "Survived", float)) * 100,
    "3rd": safe_mean(get_column(pclass3, "Survived", float)) * 100,
}

total_survivors = len(survived)
total_non_survivors = len(died)
total_passengers = total_survivors + total_non_survivors
overall_survival_rate = percentage(total_survivors, total_passengers)

avg_age = safe_mean(ages)
median_age = stats.median(ages)
youngest = min(ages) if ages else 0
oldest = max(ages) if ages else 0

avg_fare = safe_mean(fares)
max_fare = max(fares) if fares else 0
avg_fare_survivors = safe_mean(get_column(survived, "Fare", float))
avg_fare_non_survivors = safe_mean(get_column(died, "Fare", float))

solo_survival = safe_mean(get_column(solo, "Survived", float)) * 100
family_survival = safe_mean(get_column(family, "Survived", float)) * 100

embark_stats = {
    "C": {"count": len(embark_c), "survival": safe_mean(get_column(embark_c, "Survived", float)) * 100},
    "Q": {"count": len(embark_q), "survival": safe_mean(get_column(embark_q, "Survived", float)) * 100},
    "S": {"count": len(embark_s), "survival": safe_mean(get_column(embark_s, "Survived", float)) * 100},
}

# ----------------------------
# Report formatting
# ----------------------------
report = f"""
TITANIC DATA ANALYSIS RESULTS
=============================

Total passengers: {total_passengers}
Overall survival rate: {overall_survival_rate:.1f}%

--- Survival by Gender ---
Male survival rate: {male_survival:.1f}%
Female survival rate: {female_survival:.1f}%

--- Survival by Class ---
1st Class: {class_survival["1st"]:.1f}%
2nd Class: {class_survival["2nd"]:.1f}%
3rd Class: {class_survival["3rd"]:.1f}%

--- Age Stats ---
Average Age: {avg_age:.1f}
Median Age: {median_age}
Youngest Passenger: {youngest}
Oldest Passenger: {oldest}

--- Fare Analysis ---
Average Fare: £{avg_fare:.2f}
Highest Fare: £{max_fare:.2f}
Average Fare of Survivors: £{avg_fare_survivors:.2f}
Average Fare of Non-Survivors: £{avg_fare_non_survivors:.2f}

--- Family Size Impact ---
Solo travelers survival: {solo_survival:.1f}%
Passengers with family survival: {family_survival:.1f}%

--- Embarkation Port ---
C: {embark_stats["C"]["count"]} passengers, survival {embark_stats["C"]["survival"]:.1f}%
Q: {embark_stats["Q"]["count"]} passengers, survival {embark_stats["Q"]["survival"]:.1f}%
S: {embark_stats["S"]["count"]} passengers, survival {embark_stats["S"]["survival"]:.1f}%
"""

# ----------------------------
# Export
# ----------------------------
output_file = "titanic_analysis.txt"
with open(output_file, "w", encoding="utf-8") as f:
    f.write(report)

print(f"Report successfully saved as '{output_file}'")

Report successfully saved as 'titanic_analysis.txt'
