In [2]:
import numpy as np
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
os.chdir('/users/yoga7i/downloads')
df = pd.read_csv('SYB67_230_202411_GDP and GDP Per Capita.csv', skiprows=1)

In [3]:
df.head()

Unnamed: 0,Region/Country/Area,Unnamed: 1,Year,Series,Value,Footnotes,Source
0,1,"Total, all countries or areas",1995,GDP in current prices (millions of US dollars),31290901,,"United Nations Statistics Division, New York, ..."
1,1,"Total, all countries or areas",2005,GDP in current prices (millions of US dollars),47816593,,"United Nations Statistics Division, New York, ..."
2,1,"Total, all countries or areas",2010,GDP in current prices (millions of US dollars),66633612,,"United Nations Statistics Division, New York, ..."
3,1,"Total, all countries or areas",2015,GDP in current prices (millions of US dollars),75440153,,"United Nations Statistics Division, New York, ..."
4,1,"Total, all countries or areas",2020,GDP in current prices (millions of US dollars),85483570,,"United Nations Statistics Division, New York, ..."


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6774 entries, 0 to 6773
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Region/Country/Area  6774 non-null   int64 
 1   Unnamed: 1           6774 non-null   object
 2   Year                 6774 non-null   int64 
 3   Series               6774 non-null   object
 4   Value                6774 non-null   object
 5   Footnotes            294 non-null    object
 6   Source               6774 non-null   object
dtypes: int64(2), object(5)
memory usage: 370.6+ KB


In [5]:
df.dtypes

Region/Country/Area     int64
Unnamed: 1             object
Year                    int64
Series                 object
Value                  object
Footnotes              object
Source                 object
dtype: object

In [6]:
df.shape

(6774, 7)

In [7]:
# Renombrar la columna unnamed: 1 a Region/Country
df = df.rename(columns={"Unnamed: 1": "Region/Country"})
df.head()

Unnamed: 0,Region/Country/Area,Region/Country,Year,Series,Value,Footnotes,Source
0,1,"Total, all countries or areas",1995,GDP in current prices (millions of US dollars),31290901,,"United Nations Statistics Division, New York, ..."
1,1,"Total, all countries or areas",2005,GDP in current prices (millions of US dollars),47816593,,"United Nations Statistics Division, New York, ..."
2,1,"Total, all countries or areas",2010,GDP in current prices (millions of US dollars),66633612,,"United Nations Statistics Division, New York, ..."
3,1,"Total, all countries or areas",2015,GDP in current prices (millions of US dollars),75440153,,"United Nations Statistics Division, New York, ..."
4,1,"Total, all countries or areas",2020,GDP in current prices (millions of US dollars),85483570,,"United Nations Statistics Division, New York, ..."


In [8]:
# Eliminar columnas no necesarias
df = df.drop(columns=['Footnotes', 
                      'Source'])

In [9]:
df.head()

Unnamed: 0,Region/Country/Area,Region/Country,Year,Series,Value
0,1,"Total, all countries or areas",1995,GDP in current prices (millions of US dollars),31290901
1,1,"Total, all countries or areas",2005,GDP in current prices (millions of US dollars),47816593
2,1,"Total, all countries or areas",2010,GDP in current prices (millions of US dollars),66633612
3,1,"Total, all countries or areas",2015,GDP in current prices (millions of US dollars),75440153
4,1,"Total, all countries or areas",2020,GDP in current prices (millions of US dollars),85483570


In [10]:
# Filtrar registros con Year > 2015
df_filtrado = df[df["Year"] > 2015]

In [11]:
df_filtrado.head()

Unnamed: 0,Region/Country/Area,Region/Country,Year,Series,Value
4,1,"Total, all countries or areas",2020,GDP in current prices (millions of US dollars),85483570
5,1,"Total, all countries or areas",2021,GDP in current prices (millions of US dollars),97329051
6,1,"Total, all countries or areas",2022,GDP in current prices (millions of US dollars),100834796
11,1,"Total, all countries or areas",2020,GDP per capita (US dollars),10905
12,1,"Total, all countries or areas",2021,GDP per capita (US dollars),12309


In [12]:
# Lista de valores que conservar
keep_rows = [
    'Mexico',
    'United States',
    'Canada',
    'Brazil',
    'Colombia',
    'Argentina',
    'Chile'
]

# Filtrar el DataFrame
df_filtrado = df_filtrado[df_filtrado['Region/Country'].isin(keep_rows)]

In [13]:
df_filtrado.head()

Unnamed: 0,Region/Country/Area,Region/Country,Year,Series,Value
1040,32,Argentina,2020,GDP in current prices (millions of US dollars),385741
1041,32,Argentina,2021,GDP in current prices (millions of US dollars),487903
1042,32,Argentina,2022,GDP in current prices (millions of US dollars),631133
1047,32,Argentina,2020,GDP per capita (US dollars),8565
1048,32,Argentina,2021,GDP per capita (US dollars),10776


In [14]:
df_filtrado.shape

(72, 5)

In [15]:
df_filtrado.tail()

Unnamed: 0,Region/Country/Area,Region/Country,Year,Series,Value
4270,484,Mexico,2021,GDP in constant 2015 prices (millions of US do...,1236244.0
4271,484,Mexico,2022,GDP in constant 2015 prices (millions of US do...,1285063.0
4276,484,Mexico,2020,GDP real rates of growth (percent),-8.6
4277,484,Mexico,2021,GDP real rates of growth (percent),5.7
4278,484,Mexico,2022,GDP real rates of growth (percent),3.9


In [16]:
import plotly.express as px

# Convertir 'Value' a numérico (quitando comas)
df_filtrado['Value'] = df_filtrado['Value'].astype(str).str.replace(',', '').astype(float)

# Filtrar solo Imports CIF
gdp_rrg = df_filtrado[df_filtrado['Series'] == 'GDP real rates of growth (percent)']

# Seleccionar México y Argentina/Brasil/Chile/Colombia
subset = gdp_rrg[gdp_rrg['Region/Country'].isin(['Mexico', 'Argentina', 'Brazil', 'Chile', 'Colombia'])]

# Gráfico de barras
fig = px.bar(
    subset,
    x="Year",
    y="Value",
    color="Region/Country",
    barmode="group",
    title="Tasas reales de crecimiento del PIB: México vs Argentina/Brasil/Chile/Colombia",
    labels={"Value": "Tasas reales de crecimiento del PIB (porcentaje)", "Year": "Año"}
)
fig.update_traces(texttemplate='%{y:.3}', textposition='outside')
fig.show()

In [25]:
import plotly.express as px

# Convertir 'Value' a numérico (quitando comas)
df_filtrado['Value'] = df_filtrado['Value'].astype(str).str.replace(',', '').astype(float)

# Filtrar solo Imports CIF
gdp_pc = df_filtrado[df_filtrado['Series'] == 'GDP per capita (US dollars)']

# Seleccionar México y Argentina/Brasil/Chile/Colombia
subset = gdp_pc[gdp_pc['Region/Country'].isin(['Mexico', 'Argentina', 'Brazil', 'Chile', 'Colombia'])]

# Gráfico de barras
fig = px.bar(
    subset,
    x="Year",
    y="Value",
    color="Region/Country",
    barmode="group",
    title="PIB per capita (USD): México vs Argentina/Brasil/Chile/Colombia",
    labels={"Value": "PIB per capita (USD)", "Year": "Año"}
)
fig.update_traces(texttemplate='%{y:.5}', textposition='outside')
fig.show()

In [23]:
import plotly.express as px

# Convertir 'Value' a numérico (quitando comas)
df_filtrado['Value'] = df_filtrado['Value'].astype(str).str.replace(',', '').astype(float)

# Filtrar solo Imports CIF
gdp_cp = df_filtrado[df_filtrado['Series'] == 'GDP in current prices (millions of US dollars)']

# Seleccionar México y Argentina/Brasil/Chile/Colombia
subset = gdp_cp[gdp_cp['Region/Country'].isin(['Mexico', 'Argentina', 'Brazil', 'Chile', 'Colombia'])]

# Gráfico de barras
fig = px.bar(
    subset,
    x="Year",
    y="Value",
    color="Region/Country",
    barmode="group",
    title="PIB en precios corrientes (millones de dólares USD): México vs Argentina/Brasil/Chile/Colombia",
    labels={"Value": "PIB en precios corrientes (millones de dólares USD)", "Year": "Año"}
)
fig.update_traces(texttemplate='%{y:.8}', textposition='outside')
fig.show()