In [28]:
import pandas as pd
import requests

# --- 1. Leer el archivo limpio (usa una de las tres opciones arriba) ---
file_path = r"C:\Deysi\ironHack\ironHack\projects\Educacion\data\Datos_de_asistencia_escolar.csv"
df = pd.read_csv(file_path)

# --- 2. Convertir la columna de asistencia escolar a num√©rica ---
df['school_attendance'] = pd.to_numeric(df['school_attendance'], errors='coerce')

# --- 3. Eliminar valores nulos o no v√°lidos ---
df = df.dropna(subset=['school_attendance'])

# --- 4. Ordenar por menor asistencia escolar (m√°s pobreza educativa) ---
df_sorted = df.sort_values(by='school_attendance', ascending=True)

# --- 5. Mostrar los 30 pa√≠ses m√°s cr√≠ticos ---
top30 = df_sorted.head(30)

print("üè´ Los 30 pa√≠ses con menor asistencia escolar (mayor pobreza educativa):\n")
print(top30[['country_name', 'country_code', 'school_attendance', 'year']].to_string(index=False))

# --- 1. Definir URL de la API ---
# Indicador: "SI.POV.DDAY" ‚Üí poblaci√≥n que vive con menos de $2.15/d√≠a (ajustado por paridad de poder adquisitivo)
# Puedes ajustar el rango de a√±os o los pa√≠ses
url = "https://api.worldbank.org/v2/country/all/indicator/SI.POV.DDAY?format=json&date=2010:2022&per_page=20000"

# --- 2. Descargar los datos ---
response = requests.get(url)
data = response.json()[1]  # el √≠ndice [1] contiene los registros

# --- 3. Convertir a DataFrame ---
wb_df = pd.DataFrame(data)

# --- 4. Extraer y limpiar columnas relevantes ---
wb_df = wb_df[['country', 'date', 'value']]
wb_df['country_name'] = wb_df['country'].apply(lambda x: x['value'])
wb_df['country_code'] = wb_df['country'].apply(lambda x: x['id'])
wb_df.rename(columns={'value': 'poverty_rate', 'date': 'year'}, inplace=True)
wb_df['year'] = wb_df['year'].astype(int)

# --- 5. Mantener columnas √∫tiles ---
wb_df = wb_df[['country_code', 'country_name', 'year', 'poverty_rate']]

# --- 6. Limpiar valores nulos ---
wb_df.dropna(subset=['poverty_rate'], inplace=True)

# --- 7. Mostrar algunos resultados ---
print("‚úÖ Datos del Banco Mundial cargados correctamente:")
print(wb_df.head(10))

print("\nN√∫mero de pa√≠ses registrados:", wb_df['country_name'].nunique())


print("A√±os en UNICEF:", sorted(unicef_df['year'].unique()))
print("A√±os en Banco Mundial:", sorted(wb_df['year'].unique()))

print("UNICEF country codes:", sorted(unicef_df['country_code'].unique()))
print("World Bank country codes:", sorted(wb_df['country_code'].unique()))
common = set(unicef_df['country_code']).intersection(set(wb_df['country_code']))
print("C√≥digos comunes:", common)
print("N√∫mero:", len(common))



üè´ Los 30 pa√≠ses con menor asistencia escolar (mayor pobreza educativa):

Empty DataFrame
Columns: [country_name, country_code, school_attendance, year]
Index: []
‚úÖ Datos del Banco Mundial cargados correctamente:
   country_code         country_name  year  poverty_rate
78           Z4  East Asia & Pacific  2022           2.4
79           Z4  East Asia & Pacific  2021           2.6
80           Z4  East Asia & Pacific  2020           2.7
81           Z4  East Asia & Pacific  2019           2.6
82           Z4  East Asia & Pacific  2018           3.3
83           Z4  East Asia & Pacific  2017           4.2
84           Z4  East Asia & Pacific  2016           5.1
85           Z4  East Asia & Pacific  2015           6.1
86           Z4  East Asia & Pacific  2014           7.8
87           Z4  East Asia & Pacific  2013           9.1

N√∫mero de pa√≠ses registrados: 180
A√±os en UNICEF: [np.int64(2010), np.int64(2011), np.int64(2012), np.int64(2013), np.int64(2014), np.int64(2015), np.i

In [None]:
import pandas as pd
import requests

# --- 1. Cargar datos del Banco Mundial (pobreza) ---
url = "https://api.worldbank.org/v2/country/all/indicator/SI.POV.DDAY?format=json&date=2010:2022&per_page=20000"
response = requests.get(url)
data = response.json()[1]

wb_df = pd.DataFrame(data)
wb_df = wb_df[['country', 'date', 'value']]
wb_df['country_name'] = wb_df['country'].apply(lambda x: x['value'])
wb_df['country_code'] = wb_df['country'].apply(lambda x: x['id'])
wb_df.rename(columns={'value': 'poverty_rate', 'date': 'year'}, inplace=True)
wb_df['year'] = wb_df['year'].astype(int)
wb_df = wb_df[['country_code', 'country_name', 'year', 'poverty_rate']]
wb_df.dropna(subset=['poverty_rate'], inplace=True)

print(f"‚úÖ Datos del Banco Mundial cargados: {wb_df.shape[0]} registros")

# --- 2. Cargar datos de UNICEF (asistencia escolar) ---
file_path = r"C:\Deysi\ironHack\ironHack\projects\Educacion\data\Datos_de_asistencia_escolar.csv"
unicef_df = pd.read_csv(file_path)  

unicef_df['year'] = pd.to_numeric(unicef_df['year'], errors='coerce')
unicef_df['school_attendance'] = pd.to_numeric(unicef_df['school_attendance'], errors='coerce')

# --- Filtrar pa√≠ses reales en el Banco Mundial ---
wb_df = wb_df[wb_df['country_code'].str.match(r'^[A-Z]{3}$', na=False)]

print("‚úÖ Despu√©s de filtrar, quedan:", wb_df['country_name'].nunique(), "pa√≠ses reales")
print(wb_df.head(5))

# --- Verificar coincidencias de c√≥digos ---
common_codes = set(unicef_df['country_code']).intersection(set(wb_df['country_code']))
print(f"üîç C√≥digos de pa√≠s comunes entre UNICEF y Banco Mundial: {len(common_codes)}")

# --- Hacer de nuevo el merge ---
merged = pd.merge(
    wb_df,
    unicef_df,
    on=['country_code', 'year'],
    how='inner'
)

print(f"‚úÖ Datos combinados: {merged.shape[0]} filas, {merged['country_name_x'].nunique()} pa√≠ses coinciden")

# --- Revisar algunos datos ---
print(merged[['country_code', 'country_name_x', 'year', 'poverty_rate', 'school_attendance']].head(10))


# --- 3. Fusionar ambos datasets ---
merged = pd.merge(
    wb_df,
    unicef_df,
    on=['country_code', 'year'],
    how='inner',
    suffixes=('_wb', '_unicef')
)

print(f"‚úÖ Datos combinados: {merged.shape[0]} filas, {merged['country_name_wb'].nunique()} pa√≠ses")

# --- 4. Limpiar y ordenar ---
merged = merged[['country_code', 'country_name_wb', 'year', 'poverty_rate', 'school_attendance']]
merged.rename(columns={'country_name_wb': 'country_name'}, inplace=True)
merged.dropna(inplace=True)

# --- 5. Mostrar primeros resultados ---
print("\nüìä Ejemplo de datos combinados:")
print(merged.head(10))

# --- 6. Guardar CSV ---
output_path = r"C:\Deysi\ironHack\ironHack\projects\Educacion\data\merged_unicef_worldbank.csv"
merged.to_csv(output_path, index=False)
print(f"\nüíæ Archivo combinado guardado en: {output_path}")

# --- 7. (Opcional) Analizar correlaci√≥n ---
corr = merged[['poverty_rate', 'school_attendance']].corr().iloc[0, 1]
print(f"\nüìà Correlaci√≥n entre pobreza y asistencia escolar: {corr:.2f}")



In [None]:
import pandas as pd
import requests

# --- 1. Cargar datos de UNICEF (asistencia escolar) ---
file_path = r"C:\Deysi\ironHack\ironHack\projects\Educacion\data\Datos_de_asistencia_escolar.csv"
unicef_df = pd.read_csv(file_path)

# Asegurar formato correcto
unicef_df['year'] = pd.to_numeric(unicef_df['year'], errors='coerce')
unicef_df['school_attendance'] = pd.to_numeric(unicef_df['school_attendance'], errors='coerce')
unicef_df['country_code'] = unicef_df['country_code'].str.strip().str.upper()
unicef_df.dropna(subset=['school_attendance', 'year'], inplace=True)

# --- 2. Seleccionar los 30 pa√≠ses con menor asistencia escolar ---
top30 = unicef_df.sort_values(by='school_attendance', ascending=True).groupby('country_code').first().reset_index()
top30 = top30.nsmallest(30, 'school_attendance')
print("üè´ Los 30 pa√≠ses con menor asistencia escolar:\n")
print(top30[['country_name', 'country_code', 'school_attendance', 'year']])

# Crear lista de c√≥digos ISO3
country_codes = top30['country_code'].unique().tolist()
country_str = ";".join(country_codes)

# --- 3. Descargar datos del Banco Mundial (pobreza extrema) ---
print("\nüåç Descargando datos del Banco Mundial...")
url = f"https://api.worldbank.org/v2/country/{country_str}/indicator/SI.POV.DDAY?format=json&date=2010:2022&per_page=20000"
response = requests.get(url)
data = response.json()[1]

# Convertir a DataFrame
wb_df = pd.DataFrame(data)[['country', 'date', 'value']]
wb_df['country_name'] = wb_df['country'].apply(lambda x: x['value'])
wb_df['country_code'] = wb_df['country'].apply(lambda x: x['id'])
wb_df.rename(columns={'value': 'poverty_rate', 'date': 'year'}, inplace=True)
wb_df['year'] = wb_df['year'].astype(int)
wb_df = wb_df[['country_code', 'country_name', 'year', 'poverty_rate']]
wb_df.dropna(subset=['poverty_rate'], inplace=True)

print(f"‚úÖ Datos del Banco Mundial cargados: {wb_df.shape[0]} registros para {wb_df['country_name'].nunique()} pa√≠ses")

# --- 4. Fusionar ambos datasets ---
merged = pd.merge(
    wb_df,
    unicef_df,
    on=['country_code', 'year'],
    how='inner',
    suffixes=('_wb', '_unicef')
)

# --- 5. Limpiar y organizar ---
merged = merged[['country_code', 'country_name_wb', 'year', 'poverty_rate', 'school_attendance']]
merged.rename(columns={'country_name_wb': 'country_name'}, inplace=True)
merged.dropna(inplace=True)

print(f"\n‚úÖ Datos combinados: {merged.shape[0]} filas, {merged['country_name'].nunique()} pa√≠ses coinciden")
print("\nüìä Ejemplo de datos combinados:")
print(merged.head(10))

# --- 6. Guardar CSV final ---
output_path = r"C:\Deysi\ironHack\ironHack\projects\Educacion\data\Datos_de_asistencia_escolar.csv"
merged.to_csv(output_path, index=False)
print(f"\nüíæ Archivo combinado guardado en: {output_path}")

# --- 7. Analizar correlaci√≥n ---
corr = merged[['poverty_rate', 'school_attendance']].corr().iloc[0, 1]
print(f"\nüìà Correlaci√≥n entre pobreza y asistencia escolar: {corr:.2f}")
