# Titanic: Three Data Visualizations (Python Review)

**Research question:** *Which factors are related to survival on the Titanic?*

We create three visualizations. At least one plot customizes color/size/shape beyond defaults.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

titanic = pd.read_csv("/mnt/data/titanic.csv")
cols = {c.lower(): c for c in titanic.columns}
survived = cols.get("survived"); sex = cols.get("sex"); pclass = cols.get("pclass"); age = cols.get("age"); fare = cols.get("fare")
titanic.head()

## Figure 1 — Bar chart: Survival rate by sex

In [None]:
df1 = titanic[[survived, sex]].dropna()
sr_by_sex = df1.groupby(sex)[survived].mean().sort_values(ascending=False)

plt.figure(figsize=(6,4))
bars = plt.bar(sr_by_sex.index, sr_by_sex.values, color=["#4C78A8", "#F58518"])  # customized colors
plt.ylim(0,1); plt.title("Titanic Survival Rate by Sex"); plt.ylabel("Survival Rate")
for b in bars:
    plt.text(b.get_x()+b.get_width()/2, b.get_height()+0.02, f"{b.get_height():.2f}", ha="center", va="bottom", fontsize=10)
plt.grid(axis='y', alpha=0.3); plt.tight_layout(); plt.show()

## Figure 2 — Bar chart: Survival rate by passenger class

In [None]:
df2 = titanic[[survived, pclass]].dropna()
df2[pclass] = pd.to_numeric(df2[pclass], errors="coerce")
sr_by_cls = df2.groupby(pclass)[survived].mean().sort_index()

plt.figure(figsize=(6,4))
bars = plt.bar(sr_by_cls.index.astype(str), sr_by_cls.values, color=["#1b9e77","#d95f02","#7570b3"])  # custom palette
plt.ylim(0,1); plt.title("Titanic Survival Rate by Passenger Class"); plt.xlabel("Passenger Class"); plt.ylabel("Survival Rate")
for b in bars:
    plt.text(b.get_x()+b.get_width()/2, b.get_height()+0.02, f"{b.get_height():.2f}", ha="center", va="bottom", fontsize=10)
plt.grid(axis='y', alpha=0.3); plt.tight_layout(); plt.show()

## Figure 3 — Scatter: Age vs Fare (color=survived, shape=sex, size~class)

In [None]:
df3 = titanic[[age, fare, survived, sex, pclass]].dropna()

color_map = {0:"#f94144", 1:"#277da1"}  # customized colors
marker_map = {"male":"o", "female":"^"}  # customized marker shapes

plt.figure(figsize=(7,5))
for sx, mk in marker_map.items():
    sub = df3[df3[sex] == sx]
    plt.scatter(sub[age], sub[fare],
                c=sub[survived].map(color_map),
                s=50 + (4 - pd.to_numeric(sub[pclass], errors='coerce'))*10,  # size varies by class (1>2>3)
                marker=mk, alpha=0.6, edgecolors="k", linewidths=0.3, label=f"{sx.title()}")
plt.xlabel("Age"); plt.ylabel("Fare"); plt.title("Age vs Fare (color=Survived, shape=Sex, size~Class)")
legend1 = plt.legend(title="Sex", loc="upper right")
import matplotlib.lines as mlines
h_surv = [mlines.Line2D([], [], color="#277da1", marker='s', linestyle='None', markersize=8, label="Survived=1"),
          mlines.Line2D([], [], color="#f94144", marker='s', linestyle='None', markersize=8, label="Survived=0")]
plt.gca().add_artist(legend1)
plt.legend(handles=h_surv, title="Survived", loc="lower right")
plt.grid(alpha=0.3); plt.tight_layout(); plt.show()