
# TP – Visualización con Streamlit (MySQL + World Bank)

Este notebook crea un **dashboard en Streamlit** conectado a tu base `worldbank`:
1) Instalar dependencias
2) Configurar y probar conexión a MySQL
3) Generar `app.py` listo para `streamlit run app.py`


In [None]:

# 1) Dependencias (ejecutar una vez)
%pip install -q streamlit sqlalchemy pymysql pandas altair


## Configuración de conexión

In [None]:

# 2) Ajustá credenciales de tu MySQL local
USER = "root"          # <- tu usuario
PASSWORD = "tu_clave"  # <- tu password
HOST = "127.0.0.1"     # o 'localhost'
DB = "worldbank"       # <- tu base

print("Credenciales listas. Modificalas si hace falta.")


## Probar conexión

In [None]:

import pandas as pd
from sqlalchemy import create_engine, text

engine = create_engine(f"mysql+pymysql://{USER}:{PASSWORD}@{HOST}/{DB}")
pd.read_sql(text("SELECT DATABASE() AS db, NOW() AS ahora;"), engine)


## Generar `app.py` (Streamlit)

In [None]:

# 3) Generar app.py con las credenciales de esta notebook
from pathlib import Path

app_template = """
import pandas as pd
import altair as alt
import streamlit as st
from sqlalchemy import create_engine, text

USER = "{USER}"
PASSWORD = "{PASSWORD}"
HOST = "{HOST}"
DB = "{DB}"

st.set_page_config(page_title="WorldBank – Economía", layout="wide")
st.title("📊 Dashboard – Indicadores Económicos (World Bank)")

@st.cache_resource
def get_engine():
    return create_engine(f"mysql+pymysql://{USER}:{PASSWORD}@{HOST}/{DB}")

def q(sql, params=None):
    eng = get_engine()
    return pd.read_sql(text(sql), eng, params=params)

def pick(cols, candidates):
    cl = [c.lower() for c in cols]
    for cand in candidates:
        if cand.lower() in cl:
            return cols[cl.index(cand.lower())]
    return None

obs_cols = q("SHOW COLUMNS FROM observations;")["Field"].tolist()
ind_cols = q("SHOW COLUMNS FROM indicators;")["Field"].tolist()
try:
    ctr_cols = q("SHOW COLUMNS FROM countries;")["Field"].tolist()
except Exception:
    ctr_cols = []

o_country_iso3 = pick(obs_cols, ["country_iso3","country_code","iso3"])
o_country_name = pick(obs_cols, ["country_name","country"])
o_indicator_code = pick(obs_cols, ["indicator_code","indicator","code"])
o_year = pick(obs_cols, ["year","anio"])
o_value = pick(obs_cols, ["value","valor"])

i_code = pick(ind_cols, ["indicator_code","indicator","code","id"])
i_name = pick(ind_cols, ["indicator_name","name","nombre"])
i_unit = pick(ind_cols, ["unit","unidad"])

c_iso3 = pick(ctr_cols, ["iso3","country_code"])
c_name = pick(ctr_cols, ["name","country_name","pais"])

def get_countries():
    if c_iso3 and c_name and o_country_iso3:
        sql = f"""
            SELECT DISTINCT c.{c_name} AS name
            FROM countries c
            JOIN observations o ON c.{c_iso3} = o.{o_country_iso3}
            WHERE c.{c_name} IS NOT NULL AND c.{c_name} <> ''
            ORDER BY 1;
        """
        df = q(sql)
        if not df.empty:
            return df["name"].tolist()
    if o_country_name:
        sql = f"""
            SELECT DISTINCT o.{o_country_name} AS name
            FROM observations o
            WHERE o.{o_country_name} IS NOT NULL AND o.{o_country_name} <> ''
            ORDER BY 1;
        """
        df = q(sql)
        if not df.empty:
            return df["name"].tolist()
    if o_country_iso3:
        sql = f"""
            SELECT DISTINCT o.{o_country_iso3} AS name
            FROM observations o
            WHERE o.{o_country_iso3} IS NOT NULL AND o.{o_country_iso3} <> ''
            ORDER BY 1;
        """
        df = q(sql)
        if not df.empty:
            return df["name"].tolist()
    return []

def get_indicators():
    codes = ("NY.GDP.MKTP.CD","NY.GDP.PCAP.CD","NE.EXP.GNFS.ZS","NY.GDP.DEFL.KD.ZG")
    if not i_code:
        return pd.DataFrame(columns=["indicator_code","indicator_name","unit","label"])
    name_expr = f"i.{i_name}" if i_name else "NULL"
    unit_expr = f"i.{i_unit}" if i_unit else "NULL"
    sql = f"""
        SELECT {i_code} AS indicator_code,
               COALESCE({name_expr}, '') AS indicator_name,
               COALESCE({unit_expr}, '') AS unit
        FROM indicators
        WHERE {i_code} IN :codes
        ORDER BY 1;
    """
    df = q(sql, {"codes": codes})
    if df.empty:
        return df.assign(label="")
    df["label"] = df["indicator_code"] + " – " + df["indicator_name"].fillna("")
    return df

def get_year_bounds():
    col = o_year or "year"
    df = q(f"SELECT MIN({col}) AS y1, MAX({col}) AS y2 FROM observations;")
    if df.empty or pd.isna(df.iloc[0]["y1"]) or pd.isna(df.iloc[0]["y2"]):
        return (2000, 2024)
    return (int(df.iloc[0]["y1"]), int(df.iloc[0]["y2"]))

def load_timeseries(ind_code, countries_sel, y1, y2):
    if not countries_sel:
        return pd.DataFrame()

    if c_iso3 and c_name and o_country_iso3:
        country_expr = f"c.{c_name}"
        join_c = f"JOIN countries c ON c.{c_iso3} = o.{o_country_iso3}"
    elif o_country_name:
        country_expr = f"o.{o_country_name}"
        join_c = ""
    elif o_country_iso3:
        country_expr = f"o.{o_country_iso3}"
        join_c = ""
    else:
        return pd.DataFrame()

    name_expr = f"i.{i_name}" if i_name else "NULL"
    unit_expr = f"i.{i_unit}" if i_unit else "NULL"

    sql = f"""
        SELECT
            {country_expr} AS country_name,
            o.{o_year}     AS year,
            o.{o_value}    AS value,
            i.{i_code}     AS indicator_code,
            {name_expr}    AS indicator_name,
            COALESCE({unit_expr}, '') AS unit
        FROM observations o
        {join_c}
        JOIN indicators i ON i.{i_code} = o.{o_indicator_code}
        WHERE i.{i_code} = :code
          AND {country_expr} IN :countries
          AND o.{o_year} BETWEEN :y1 AND :y2
        ORDER BY 1, 2;
    """
    df = q(sql, {"code": ind_code, "countries": tuple(countries_sel), "y1": int(y1), "y2": int(y2)})
    if df.empty:
        return df
    df["year"] = pd.to_numeric(df["year"], errors="coerce")
    df["value"] = pd.to_numeric(df["value"], errors="coerce")
    df = df.dropna(subset=["year","value"])
    df["year"] = df["year"].astype(int)
    return df

countries = get_countries()
inds = get_indicators()
y1, y2 = get_year_bounds()

with st.sidebar:
    st.header("Filtros")
    sel_countries = st.multiselect("Países", options=countries, default=countries[:3] if len(countries)>=3 else countries)
    sel_ind_label = st.selectbox("Indicador", options=inds["label"].tolist() if not inds.empty else [])
    year_range = st.slider("Rango de años", min_value=y1, max_value=y2, value=(max(y1, y2-10), y2), step=1)

if not countries or inds.empty:
    st.warning("No hay países o indicadores disponibles. Verificá los datos en la base.")
else:
    if not sel_countries:
        st.info("Elegí al menos un país en la barra lateral.")
    else:
        sel_code = sel_ind_label.split(" – ", 1)[0] if sel_ind_label else None
        if not sel_code:
            st.warning("Seleccioná un indicador.")
        else:
            df = load_timeseries(sel_code, sel_countries, year_range[0], year_range[1])
            if df.empty:
                st.warning("No hay datos para el filtro elegido.")
            else:
                unit = ""
                if "unit" in df.columns and df["unit"].notna().any():
                    unit = df["unit"].dropna().iloc[0]
                title = f"{sel_code} – {unit}" if unit else sel_code

                line = (alt.Chart(df)
                        .mark_line(point=True)
                        .encode(
                            x=alt.X("year:O", title="Año"),
                            y=alt.Y("value:Q", title=f"Valor {{'('+unit+')' if unit else ''}}"),
                            color=alt.Color("country_name:N", title="País"),
                            tooltip=["country_name","year","value"]
                        )
                        .properties(height=360, title=title))
                st.altair_chart(line, use_container_width=True)

                last_year = int(df["year"].max())
                rank = df[df["year"] == last_year].sort_values("value", ascending=False)

                bar = (alt.Chart(rank)
                       .mark_bar()
                       .encode(
                           x=alt.X("value:Q", title=f"Valor {{'('+unit+')' if unit else ''}}"),
                           y=alt.Y("country_name:N", sort='-x', title="País"),
                           tooltip=["country_name","value"]
                       )
                       .properties(height=320, title=f"Ranking {{last_year}}"))
                st.altair_chart(bar, use_container_width=True)

                st.subheader("Tabla")
                st.dataframe(df.sort_values(["country_name","year"]), use_container_width=True)
"""

# Reemplazar credenciales dentro del template
app_code = app_template.replace("{USER}", USER).replace("{PASSWORD}", PASSWORD).replace("{HOST}", HOST).replace("{DB}", DB)
Path("app.py").write_text(app_code, encoding="utf-8")

print("✅ Generado app.py")
print("Cómo ejecutarlo:")
print("  1) pip install streamlit altair sqlalchemy pymysql pandas")
print("  2) streamlit run app.py")
