In [1]:
%%writefile app.py

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Dataset y aeropuertos
from nycflights13 import flights, airports

# -----------------------------
# Preparaci√≥n de datos
# -----------------------------
df = flights.copy()
df_clean = df.dropna(subset=["dep_delay"]).copy()
df_clean["on_time"] = (df_clean["dep_delay"] <= 0).astype(int)

# Map manual de carriers (c√≥digos -> nombre legible)
carrier_map = {
    "9E": "Endeavor Air",
    "AA": "American Airlines",
    "AS": "Alaska Airlines",
    "B6": "JetBlue Airways",
    "DL": "Delta Air Lines",
    "EV": "ExpressJet",
    "F9": "Frontier Airlines",
    "FL": "AirTran Airways",
    "HA": "Hawaiian Airlines",
    "MQ": "Envoy Air",
    "OO": "SkyWest Airlines",
    "UA": "United Airlines",
    "US": "US Airways",
    "VX": "Virgin America",
    "WN": "Southwest Airlines",
    "YV": "Mesa Airlines",
}

# Mapeo aeropuerto -> nombre completo (desde nycflights13)
airport_map = airports.set_index("faa")["name"].to_dict()

df_clean["carrier_name"] = df_clean["carrier"].map(carrier_map).fillna(df_clean["carrier"])
df_clean["origin_name"] = df_clean["origin"].map(airport_map).fillna(df_clean["origin"])
df_clean["dest_name"] = df_clean["dest"].map(airport_map).fillna(df_clean["dest"])

# -----------------------------
# Funciones Montecarlo (emp√≠rico)
# -----------------------------
def run_mc_empirical(real_delays, N=10000):
    simulated = np.random.choice(real_delays, size=N, replace=True)
    return {
        "p_on_time": float(np.mean(simulated <= 0)),
        "p_delay_gt_30": float(np.mean(simulated > 30)),
        "mean_delay": float(simulated.mean()),
        "p50": float(np.percentile(simulated, 50)),
        "p90": float(np.percentile(simulated, 90)),
        "p95": float(np.percentile(simulated, 95)),
        "p99": float(np.percentile(simulated, 99)),
    }

def run_mc_for_airline(df_subset, carrier, N=10000, min_obs=30):
    sub = df_subset[df_subset["carrier"] == carrier]
    if len(sub) < min_obs:
        return None
    metrics = run_mc_empirical(sub["dep_delay"].values, N)
    metrics.update({
        "carrier": carrier,
        "carrier_name": carrier_map.get(carrier, carrier),
        "n_obs": len(sub)
    })
    return metrics

# -----------------------------
# Interfaz Streamlit
# -----------------------------
st.set_page_config(page_title="Simulaci√≥n de Puntualidad A√©rea", layout="wide")
st.title("‚úàÔ∏è Simulaci√≥n Montecarlo de Puntualidad A√©rea")
st.write("App interactiva basada en datos reales (nycflights13).")

# Sidebar: filtros
st.sidebar.header("üéõÔ∏è Filtros")
selected_carriers = st.sidebar.multiselect(
    "Aerol√≠neas",
    options=sorted(df_clean["carrier"].unique()),
    format_func=lambda x: carrier_map.get(x, x),
)
selected_origin = st.sidebar.selectbox(
    "Origen",
    ["(Todos)"] + sorted(df_clean["origin"].unique()),
    format_func=lambda x: airport_map.get(x, x) if x != "(Todos)" else x
)
selected_dest = st.sidebar.selectbox(
    "Destino",
    ["(Todos)"] + sorted(df_clean["dest"].unique()),
    format_func=lambda x: airport_map.get(x, x) if x != "(Todos)" else x
)
hour_range = st.sidebar.slider("Franja horaria de salida", 0, 23, (0, 23))
N_sim = st.sidebar.slider("Simulaciones Montecarlo", 3000, 30000, 10000, step=1000)
st.sidebar.write("---")

# Filtrar datos seg√∫n selecci√≥n
df_case = df_clean.copy()
if selected_origin != "(Todos)":
    df_case = df_case[df_case["origin"] == selected_origin]
if selected_dest != "(Todos)":
    df_case = df_case[df_case["dest"] == selected_dest]
df_case = df_case[df_case["hour"].between(hour_range[0], hour_range[1])]
if len(df_case) == 0:
    st.warning("No hay datos hist√≥ricos para esa combinaci√≥n. Cambi√° los filtros.")
    st.stop()

# KPIs generales
st.subheader("üìä An√°lisis general del rango seleccionado")
col1, col2, col3 = st.columns(3)
col1.metric("Vuelos disponibles", len(df_case))
col2.metric("Retraso medio hist√≥rico", f"{df_case['dep_delay'].mean():.1f} min")
col3.metric("Prob. de salir a tiempo", f"{df_case['on_time'].mean()*100:.1f}%")
st.write("---")

# Resultados por aerol√≠nea
st.subheader("‚úàÔ∏è Resultados por Aerol√≠nea (Montecarlo)")
if not selected_carriers:
    st.info("Seleccion√° al menos una aerol√≠nea en la barra lateral.")
    st.stop()

results = []
missing_carriers = []
for c in selected_carriers:
    r = run_mc_for_airline(df_case, c, N_sim)
    if r is None:
        missing_carriers.append(c)
    else:
        results.append(r)

if missing_carriers:
    st.warning(
        "Las siguientes aerol√≠neas no tienen suficientes datos: " +
        ", ".join(carrier_map.get(c, c) for c in missing_carriers)
    )
if len(results) == 0:
    st.error("No hay resultados v√°lidos para las aerol√≠neas seleccionadas.")
    st.stop()

results_df = pd.DataFrame(results)
expected_cols = ["carrier_name", "n_obs", "p_on_time", "p_delay_gt_30", "mean_delay", "p50", "p90", "p95", "p99"]
for col in expected_cols:
    if col not in results_df.columns:
        results_df[col] = np.nan

display_df = results_df[expected_cols].rename(columns={
    "carrier_name": "Aerol√≠nea",
    "n_obs": "Observaciones",
    "p_on_time": "Prob. a tiempo",
    "p_delay_gt_30": "Prob. >30 min",
    "mean_delay": "Retraso promedio (min)",
    "p50": "Mediana",
    "p90": "P90",
    "p95": "P95",
    "p99": "P99"
})
st.dataframe(display_df)

# Gr√°fico comparativo
st.write("### üìâ Comparaci√≥n visual de puntualidad")
plot_df = results_df.dropna(subset=["p_on_time"])
if plot_df.empty:
    st.warning("No hay m√©tricas v√°lidas para graficar.")
else:
    fig, ax = plt.subplots(figsize=(8,4))
    ax.bar(plot_df["carrier_name"], plot_df["p_on_time"]*100)
    ax.set_ylabel("Probabilidad de salir a tiempo (%)")
    ax.set_title("Comparaci√≥n Montecarlo por Aerol√≠nea")
    plt.xticks(rotation=45)
    st.pyplot(fig)
st.write("---")

# Resumen final (robusto)
st.subheader("üìù Resumen para el usuario")
valid_results = results_df.dropna(subset=["p_on_time"])
if valid_results.empty:
    st.warning("No hay datos suficientes para calcular un resumen confiable.")
else:
    best = valid_results.loc[valid_results["p_on_time"].idxmax()]
    st.success(
        f"""Mejor aerol√≠nea en esta selecci√≥n: {best['carrier_name']}
Probabilidad de salir a tiempo: {best['p_on_time']*100:.1f}%

Interpretaci√≥n:
- <30% ‚Üí Alta chance de retraso fuerte.
- 30%‚Äì50% ‚Üí Precauci√≥n si ten√©s conexi√≥n.
- 50%‚Äì75% ‚Üí Confiable.
- >75% ‚Üí Muy probable que salga puntual.
"""
    )

Writing app.py
