In [290]:
# Importar libreria
import pandas as pd 


In [245]:
def process_csv(input_file, output_file):
    # Leer el archivo CSV
    df = pd.read_csv(input_file, skiprows=4)
    
    # Eliminar la columna "Unnamed: 66"
    df.drop("Unnamed: 66", axis=1, inplace=True)
    
    # Exportar el archivo CSV modificado
    df.to_csv(output_file, index=False)

In [249]:
# Llamar a la función y pasarle los nombres de los archivos de entrada y salida como argumentos:
process_csv("bm_migracion_neta.csv", "bm_mn.csv")
process_csv("API_BX.TRF.PWKR.DT.GD.ZS_DS2_es_csv_v2_5370777.csv", "bm_remesas.csv")
process_csv("API_FP.CPI.TOTL.ZG_DS2_es_csv_v2_5358950.csv", "bm_inflacion.csv")
process_csv("API_NY.GDP.PCAP.CD_DS2_es_csv_v2_5359609.csv", "bm_pbi.csv")

In [254]:
# Cargar el dataset
bm_ingresos =pd.read_csv("Metadata_Country_API_SM.POP.NETM_DS2_es_csv_v2_5368209.csv")

# # Eliminar la columna "Unnamed: 4"
bm_ingresos.drop("Unnamed: 4", axis=1, inplace=True)

# Exportar el archivo CSV modificado
bm_ingresos.to_csv('bm_ingresos.csv', index=False)

### Web scraping

In [48]:
# URL de la tabla de paises de américa en Wikipedia
url = 'https://es.wikipedia.org/wiki/Anexo:Pa%C3%ADses_de_Am%C3%A9rica_por_superficie'

# Descargar la tabla HTML y leerla en un DataFrame de pandas
df = pd.read_html(url)[0]

In [52]:
df.head(2)

Unnamed: 0,0,1,2,3
0,Puesto,País,Superficie[1]​,Capital
1,1,Canadá,9 984 670 km²,Ottawa


In [53]:
# Eliminar las columnas que no se necesitan
df = df.drop(columns=[0, 2,3])

# Eliminar la primera fila
df = df.drop(index=0)
df.columns = ["country"]
data = df.sort_values(by=['country'])

# Filtrar las filas que contengan la palabra "nota"
data = data[~data['country'].str.contains('nota')]

In [60]:
#data.country.unique()

In [232]:
america = ['Antigua y Barbuda', 'Argentina', 'Bahamas', 'Barbados', 'Belice',
       'Bolivia', 'Brasil', 'Canadá', 'Chile', 'Colombia', 'Costa Rica',
       'Cuba', 'Dominica', 'Ecuador', 'El Salvador', 'Estados Unidos',
       'Granada', 'Guatemala', 'Guyana', 'Haití', 'Honduras', 'Jamaica',
       'México', 'Nicaragua', 'Panamá', 'Paraguay', 'Perú',
       'República Dominicana', 'San Cristóbal y Nieves',
       'San Vicente y las Granadinas', 'Santa Lucía', 'Surinam',
       'Trinidad y Tobago', 'Uruguay', 'Venezuela']

### Migración neta

In [250]:
def procesar_file(archivo_csv, america):
    # Leer el archivo CSV 
    df = pd.read_csv(archivo_csv)
    
    # Seleccionar las 4 primeras columnas y las columnas de 2007 en adelante
    df_2007 = pd.concat([df.iloc[:, :4], df.loc[:, '2007':]], axis=1)
    
    # Filtrar el DataFrame con la lista de países de América y ordenarlo por país
    df_america = df_2007[df_2007["Country Name"].isin(america)]
    df_america = df_america.sort_values(by=['Country Name'])
    
    # Eliminar las columnas que no vamos a usar
    df_america = df_america.drop(columns=["Indicator Name","Indicator Code"])
    
    # Devolver el DataFrame filtrado y procesado
    return df_america

In [251]:
bm_mn = procesar_file("bm_mn.csv", america)
bm_mn.head(2)

Unnamed: 0,Country Name,Country Code,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
12,Antigua y Barbuda,ATG,372.0,369.0,338.0,295.0,247.0,203.0,156.0,108.0,60.0,16.0,0.0,0.0,0.0,0.0,0.0
9,Argentina,ARG,-18051.0,-12450.0,-7869.0,-4970.0,-1011.0,3127.0,5369.0,5801.0,5522.0,5215.0,4926.0,4690.0,4478.0,2344.0,2344.0


In [291]:
# Cambiar el nombre de las columnas 
df_mn = bm_mn.rename(columns={'Country Name': 'country', 'Country Code': 'country_id'})

In [292]:
df_mn.head(2)

Unnamed: 0,country,country_id,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
12,Antigua y Barbuda,ATG,372.0,369.0,338.0,295.0,247.0,203.0,156.0,108.0,60.0,16.0,0.0,0.0,0.0,0.0,0.0
9,Argentina,ARG,-18051.0,-12450.0,-7869.0,-4970.0,-1011.0,3127.0,5369.0,5801.0,5522.0,5215.0,4926.0,4690.0,4478.0,2344.0,2344.0


### Ingreso

In [260]:
# Cargamos el dataset y mostramos el dataframe
bm_ingresos =pd.read_csv("bm_ingresos.csv")
bm_ingresos.head(2)

Unnamed: 0,Country Name,Country Code,Region,Income_Group
0,Aruba,ABW,,Ingreso alto
1,,AFE,,Agregados


In [261]:
# Eliminar las columnas que no vamos a usar
bm_ingresos = bm_ingresos.drop(columns=["Country Code","Region"])

# Renombrar las columnas 
bm_ingresos = bm_ingresos.rename(columns={'Country Name': 'country','Income_Group': 'income_group'})

# Filtrar el DataFrame con la lista de países de América
bm_ingresos = bm_ingresos[bm_ingresos["country"].isin(america)]
bm_ingresos = bm_ingresos.sort_values(by=['country'])

# Crear un diccionario de mapeo de valores antiguos a nuevos valores
mapeo = {'Ingreso alto': 'alto', 'Ingreso mediano alto': 'mediano alto', 'Países de ingreso mediano bajo': 'mediano bajo', 'No clasificado': 'no clasificado'}

# Reemplazar los valores de la columna "income_group" con los nuevos valores utilizando el diccionario de mapeo
bm_ingresos['tipo_ingreso'] = bm_ingresos['income_group'].replace(mapeo)

# Eliminar las columnas que no vamos a usar
df_ingresos = bm_ingresos.drop(columns=["income_group"])

In [263]:
df_ingresos.head(2)

Unnamed: 0,country,tipo_ingreso
12,Antigua y Barbuda,alto
9,Argentina,mediano alto


### Remesas

In [264]:
# Llamamos a la función y mostramos el dataframe
bm_remesas = procesar_file("bm_remesas.csv", america)
bm_remesas.head(2)

Unnamed: 0,Country Name,Country Code,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
12,Antigua y Barbuda,ATG,1.583176,1.593232,1.681855,1.758306,1.783751,1.738093,1.788259,2.566805,2.337442,1.858969,1.636268,2.040436,2.185159,2.643288,3.076953
9,Argentina,ARG,0.210934,0.195029,0.188765,0.152091,0.131541,0.105779,0.096917,0.096015,0.083133,0.070234,0.074567,0.099547,0.125381,0.168859,0.187098


In [271]:
# Renombrar las columnas 
df_remesas = bm_remesas.rename(columns={'Country Name': 'country', 'Country Code': 'country_id'})

# Eliminar las columnas que no vamos a usar
df_remesas = df_remesas.drop(columns=["country_id"])

In [275]:
df_remesas.head(2)

Unnamed: 0,country,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
12,Antigua y Barbuda,1.583176,1.593232,1.681855,1.758306,1.783751,1.738093,1.788259,2.566805,2.337442,1.858969,1.636268,2.040436,2.185159,2.643288,3.076953
9,Argentina,0.210934,0.195029,0.188765,0.152091,0.131541,0.105779,0.096917,0.096015,0.083133,0.070234,0.074567,0.099547,0.125381,0.168859,0.187098


### Inflación

In [277]:
# Llamamos a la función y mostramos el dataframe
bm_inflacion = procesar_file("bm_inflacion.csv", america)
bm_inflacion.head(2)

Unnamed: 0,Country Name,Country Code,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
12,Antigua y Barbuda,ATG,1.416053,5.333806,-0.55016,3.370025,3.45675,3.37688,1.059498,1.089442,0.968993,-0.489438,2.432488,1.207158,1.431356,0.62599,2.062996
9,Argentina,ARG,,,,,,,,,,,,,,,


In [278]:
# Renombrar las columnas 
df_inflacion = bm_inflacion.rename(columns={'Country Name': 'country', 'Country Code': 'country_id'})

# Eliminar las columnas que no vamos a usar
df_inflacion = df_inflacion.drop(columns=["country_id"])

In [279]:
df_inflacion.head(2)

Unnamed: 0,country,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
12,Antigua y Barbuda,1.416053,5.333806,-0.55016,3.370025,3.45675,3.37688,1.059498,1.089442,0.968993,-0.489438,2.432488,1.207158,1.431356,0.62599,2.062996
9,Argentina,,,,,,,,,,,,,,,


### PBI per capita

In [281]:
# Llamamos a la función y mostramos el dataframe
bm_pbi = procesar_file("bm_pbi.csv", america)
bm_pbi.head(2)

Unnamed: 0,Country Name,Country Code,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
12,Antigua y Barbuda,ATG,16006.136111,16457.104063,14530.59869,13404.516016,13117.146941,13686.476585,13350.149137,14004.811212,14861.882707,15862.651663,16110.556281,17527.169629,18319.456054,14787.635775,15781.395702
9,Argentina,ARG,7210.595548,8977.506851,8184.389889,10385.964432,12848.864197,13082.664326,13080.254732,12334.798245,13789.060425,12790.242473,14613.041825,11795.159387,9963.672506,8496.424142,10636.120196


In [283]:
# Renombrar las columnas 
df_pbi = bm_pbi.rename(columns={'Country Name': 'country', 'Country Code': 'country_id'})

# Eliminar las columnas que no vamos a usar
df_pbi = df_pbi.drop(columns=["country_id"])

In [284]:
df_pbi.head(2)

Unnamed: 0,country,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
12,Antigua y Barbuda,16006.136111,16457.104063,14530.59869,13404.516016,13117.146941,13686.476585,13350.149137,14004.811212,14861.882707,15862.651663,16110.556281,17527.169629,18319.456054,14787.635775,15781.395702
9,Argentina,7210.595548,8977.506851,8184.389889,10385.964432,12848.864197,13082.664326,13080.254732,12334.798245,13789.060425,12790.242473,14613.041825,11795.159387,9963.672506,8496.424142,10636.120196


### Exportar los datasets

In [297]:
# Exportar los dataframe a formato csv
df_pbi.to_csv('df_pbi.csv', index=False)
df_inflacion.to_csv('df_inflacion.csv', index=False)
df_remesas.to_csv('df_remesas.csv', index=False)
df_ingresos.to_csv('df_ingresos.csv', index=False)
df_mn.to_csv('df_migracion_neta.csv', index=False)