In [12]:

import pandas as pd
import plotly.graph_objects as go
from matplotlib import pyplot as plt


# --- Carga de datos (ajusta la ruta si hace falta) ---
titanic = pd.read_csv("DATOS/Titanic-Dataset.csv")

## Normalizar columnas
titanic.columns = [c.strip().title() for c in titanic.columns]

# Mapear valores a etiquetas legibles
map_pclass = {1: "1ª clase", 2: "2ª clase", 3: "3ª clase"}
map_sex    = {"female": "Mujer", "male": "Hombre"}
map_surv   = {0: "No sobrevivió", 1: "Sobrevivió"}

titanic["Pclass_lbl"]   = titanic["Pclass"].map(map_pclass)
titanic["Sex_lbl"]      = titanic["Sex"].map(map_sex)
titanic["Survived_lbl"] = titanic["Survived"].map(map_surv)

# --- NUEVO ORDEN: Supervivencia → Clase → Sexo ---

# Nodos (etiquetas únicas, en orden por etapas)
labels_surv   = ["No sobrevivió", "Sobrevivió"]
labels_pclass = ["1ª clase", "2ª clase", "3ª clase"]
labels_sex    = ["Mujer", "Hombre"]

labels = labels_surv + labels_pclass + labels_sex
index = {lab: i for i, lab in enumerate(labels)}

# Enlaces etapa 1: Supervivencia → Clase
flows_1 = (titanic
           .groupby(["Survived_lbl", "Pclass_lbl"])
           .size()
           .reset_index(name="value"))

# Enlaces etapa 2: Clase → Sexo
flows_2 = (titanic
           .groupby(["Pclass_lbl", "Sex_lbl"])
           .size()
           .reset_index(name="value"))

# Construir arrays source/target/value
source = []
target = []
value  = []
colors_links = []

color_stage1 = "rgba(255,127,14,0.5)"   # Supervivencia→Clase
color_stage2 = "rgba(31,119,180,0.5)"   # Clase→Sexo

for _, row in flows_1.iterrows():
    source.append(index[row["Survived_lbl"]])
    target.append(index[row["Pclass_lbl"]])
    value.append(int(row["value"]))
    colors_links.append(color_stage1)

for _, row in flows_2.iterrows():
    source.append(index[row["Pclass_lbl"]])
    target.append(index[row["Sex_lbl"]])
    value.append(int(row["value"]))
    colors_links.append(color_stage2)

# Colores de nodos por etapa (opcional)
node_colors = (
    ["#fdd0a2"] * len(labels_surv) +
    ["#9ecae1"] * len(labels_pclass) +
    ["#c7e9c0"] * len(labels_sex)
)

# Crear figura Sankey
fig = go.Figure(data=[go.Sankey(
    arrangement="snap",
    node=dict(
        pad=16,
        thickness=20,
        line=dict(width=0.5, color="gray"),
        label=labels,
        color=node_colors
    ),
    link=dict(
        source=source,
        target=target,
        value=value,
        color=colors_links
    )
)])

fig.update_layout(
    title_text="Diagrama Alluvial (Sankey): Supervivencia → Clase → Sexo",
    font_size=12,
    width=950,
    height=600
)

fig.show()

# Guardar versión interactiva opcional
fig.write_html("alluvial_titanic.html")


