# EDA: Adopción de Internet y desigualdad

In [None]:
%pip install pandas seaborn requests pycountry

In [None]:
import pandas as pd, requests, pycountry
import seaborn as sns, matplotlib.pyplot as plt

COUNTRIES = "ARG;BOL;BRA;CHL;COL;ECU;PRY;PER;URY;VEN"
BASE = "https://api.worldbank.org/v2/country/{countries}/indicator/{indicator}?format=json&per_page=20000"
def wb_series(indicator):
    r = requests.get(BASE.format(countries=COUNTRIES, indicator=indicator))
    r.raise_for_status()
    data = r.json()[1]
    df = pd.DataFrame(data)[["country","date","value"]]
    df["country"] = df["country"].apply(lambda x: x["value"])
    df["date"] = pd.to_numeric(df["date"], errors="coerce")
    df["value"] = pd.to_numeric(df["value"], errors="coerce")
    return df.dropna(subset=["date"]).astype({"date":"int"})


## EDA: adopción de Internet

In [None]:
internet = wb_series("IT.NET.USER.ZS")  # % de población
internet = internet.query("date >= 2000").dropna(subset=["value"])
plt.figure(figsize=(9,5))
sns.lineplot(data=internet, x="date", y="value", hue="country")
plt.title("Usuarios de Internet (% pobl.) — Sudamérica")
plt.xlabel("Año"); plt.ylabel("% de población"); plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left")
plt.tight_layout()


## Cruce con desigualdad (Gini)

In [None]:
gini = wb_series("SI.POV.GINI")
gini_ult = gini.sort_values(["country","date"]).groupby("country").tail(1)
internet_ult = internet.sort_values(["country","date"]).groupby("country").tail(1)
df = pd.merge(internet_ult, gini_ult, on="country", suffixes=("_internet","_gini"))
df = df.rename(columns={"value_internet":"internet_pct","value_gini":"gini"})
plt.figure(figsize=(6,5))
sns.regplot(data=df, x="internet_pct", y="gini")
for _, r in df.iterrows():
    plt.text(r["internet_pct"]+0.2, r["gini"]+0.2, r["country"][:3])
plt.title("Internet vs. Gini (último año disponible)")
plt.xlabel("% usuarios Internet"); plt.ylabel("Índice Gini")
plt.tight_layout()
df.sort_values("internet_pct", ascending=False).head()


# Caso Bolivia: trayectoria y benchmarks

In [None]:
bol = internet.query("country == 'Bolivia'").copy()
peer = internet.query("country in ['Peru','Paraguay','Chile','Argentina','Brazil']") 
plt.figure(figsize=(9,5))
sns.lineplot(data=peer, x="date", y="value", hue="country", alpha=0.4)
sns.lineplot(data=bol, x="date", y="value", color="black", linewidth=2, label="Bolivia")
plt.title("Bolivia vs. pares regionales — Usuarios de Internet (%)")
plt.xlabel("Año"); plt.ylabel("% de población"); plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left")
plt.tight_layout()
