# EDA World Inflation Rate Dataset

In [2]:
#import requeried libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import gaussian_kde

import plotly.express as px
import plotly.graph_objects as go

In [3]:
kaggle = False

if kaggle:
    path_inflation_countries = '/kaggle/input/global-exports-of-goods-and-services-1960-present/export_goods_services_countries_dataset.csv'
else:
    path_inflation_countries = './data/global_inflation_countries.csv'
    path_inflation_organizations = './data/global_inflation_organizations.csv'
    
height = 800

In [4]:
#load file of inflation countries
df_inflation_countries = pd.read_csv(path_inflation_countries)

df_inflation_organizations = pd.read_csv(path_inflation_organizations)

#get dataframe of inflation of the world
df_inflation_world = df_inflation_organizations[df_inflation_organizations['country_name'] == 'MUNDO']

df_inflation_world = df_inflation_world[df_inflation_world['year'] >= 1980]

# EDA Inflation

In [5]:
df_inflation_countries.describe()

Unnamed: 0,year,inflation_rate
count,13760.0,13760.0
mean,1991.5,14.733366
std,18.473624,258.045563
min,1960.0,-17.640424
25%,1975.75,0.0
50%,1991.5,1.807483
75%,2007.25,6.69466
max,2023.0,23773.131774


In [6]:
df_inflation_world.describe()

Unnamed: 0,year,inflation_rate
count,44.0,44.0
mean,2001.5,5.24274
std,12.845233,2.844394
min,1980.0,0.0
25%,1990.75,2.995784
50%,2001.5,4.816317
75%,2012.25,7.267467
max,2023.0,12.442437


# Trend Historical Inflation of the world

In [7]:
fig = px.line(df_inflation_world, 
              x='year', 
              y='inflation_rate', 
              title='Inflation rate of the world', 
              markers=True,
              labels={'inflation_rate': 'Inflation rate (%)'},
              height=height)

# Calcular la línea de tendencia
z = np.polyfit(df_inflation_world['year'], df_inflation_world['inflation_rate'], 1)
p = np.poly1d(z)

# Agregar la línea de tendencia al gráfico
fig.add_trace(go.Scatter(
    x=df_inflation_world['year'],
    y=p(df_inflation_world['year']),
    mode='lines',
    name='Tendencia',
    line=dict(color='red')
))

if kaggle:
    fig.show(renderer='iframe_connected')
else:
    fig.show()

# Inflation of 2023 by countries

In [8]:
df_inflation_2023 = df_inflation_countries[df_inflation_countries['year'] == 2023]
df_region_inflation_2023 = df_inflation_countries[df_inflation_countries['year'] == 2023] \
                                                    .groupby('region', as_index=False)['inflation_rate'].mean()
                                                    
mean_inflation_2023 = df_inflation_2023['inflation_rate'].mean()
median_inflation_2023 = df_inflation_2023['inflation_rate'].median()

print('The mean inflation in 2023 was -> ', mean_inflation_2023)
print('The median inflation in 2023 was -> ', median_inflation_2023)

The mean inflation in 2023 was ->  7.426734368727893
The median inflation in 2023 was ->  4.22535211267609


# Distribution inflation in 2023

In [9]:
fig = px.histogram(df_inflation_2023,
                     x='inflation_rate',
                     nbins=50,
                     title='Histogram of inflation rate in 2023',
                     labels={'inflation_rate': 'Inflation rate (%)'},
                     height=height)

if kaggle:
    fig.show(renderer='iframe_connected')
else:
    fig.show()

In [10]:
boxplot_fig = px.box(df_inflation_2023, 
                     y="inflation_rate", 
                     title="Boxplot de la Inflación en 2023",
                     height=height)

if kaggle:
    fig.show(renderer='iframe_connected')
else:
    fig.show()

In [11]:
#get the top and low countries with inflation rate
df_inflation_top_2023 = df_inflation_2023.nlargest(20, "inflation_rate")

#get low inflation in countries when inflation rate is less than 0
df_inlation_low_2023 = df_inflation_2023[df_inflation_2023['inflation_rate']!=0].nsmallest(20, "inflation_rate")

# Top 20 countries with the highest inflation in 2023

In [12]:
fig = px.bar(df_inflation_top_2023,
             x='country_name',
             y='inflation_rate',
             height=height,
             orientation='v',
             title='Top 20 countries with the highest inflation rate in 2023',
             labels={'inflation_rate': 'Inflation rate (%)',
                     'country_name': 'Country'})

if kaggle:
    fig.show(renderer='iframe_connected')
else:
    fig.show()

# Top 20 countries with the lowest inflation

In [13]:
fig = px.bar(df_inlation_low_2023.sort_values('inflation_rate', ascending=False),
             x='country_name',
             y='inflation_rate',
             height=height,
             orientation='v',
             title='Top 20 countries with the highest inflation rate in 2023',
             labels={'inflation_rate': 'Inflation rate (%)',
                     'country_name': 'Country'},
             )

if kaggle:
    fig.show(renderer='iframe_connected')
else:
    fig.show()

# inflation 2023 by Countries

In [14]:
fig = px.choropleth(df_inflation_2023[(df_inflation_2023['inflation_rate']!=0) & (df_inflation_2023['country_name']!='LIBANO')],
                              locations = 'country_code',
                              color = 'inflation_rate',
                              hover_name = 'country_name',
                              color_continuous_scale='Plasma_r',)

fig.update_layout(height = 800,
                            width =1500,
                            margin={"r":0,"t":0,"l":0,"b":0},
                            title_text = 'Inflation rate of the world in 2023')

if kaggle:
    fig.show(renderer='iframe_connected')
else:
    fig.show()

# Correlation Analisis

In [None]:
fig = px.density_heatmap(df_inflation_2023,
                         )

fig.show()

ValueError: Plotly Express cannot process wide-form data with columns of different type.

# Bar Race Animation on inflation of the world

In [46]:
latin_countries = ['COLOMBIA', 'ARGENTINA', 'BRAZIL', 'CHILE', 'PERU', 'URUGUAY', 'PARAGUAY', 'VENEZUELA', 'ECUADOR', 'BOLIVIA']

In [15]:
import bar_chart_race as bcr

In [47]:
#Dataset Preparation

df_inflation_ani = df_inflation_countries[['year',
                                           'country_name',
                                           'inflation_rate']].copy()

df_inflation_ani = df_inflation_ani[df_inflation_ani['country_name'].isin(latin_countries)]

#change a wide format
df_inflation_ani = df_inflation_ani.pivot(index='year',
                                          columns='country_name',
                                          values='inflation_rate')



In [49]:
bcr.bar_chart_race(
    df=df_inflation_ani,
    filename="inflation_race.mp4",  # Puedes cambiar a .gif si prefieres
    title="Evolución de la Inflación por País (1960-2023)",
    n_bars=5,  # Número de países mostrados en cada frame
    #period_length=2500  # Velocidad de la animación en ms
)


Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.


set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.

