In [None]:
import pandas as pd

In [None]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import seasonal_decompose


In [5]:
df = pd.read_csv('cpi.csv')

In [None]:
df


Unnamed: 0,Sector,Year,Month,Cereals and products,Meat and fish,Egg,Milk and products,Oils and fats,Fruits,Vegetables,...,Fuel and light,Household goods and services,Health,Transport and communication,Recreation and amusement,Education,Personal care and effects,Miscellaneous,General index,Date
0,Rural,2013,January,107.5,106.3,108.1,104.9,106.1,103.9,101.9,...,105.5,104.8,104.0,103.3,103.4,103.8,104.7,104.0,105.1,2013-01-01
1,Urban,2013,January,110.5,109.1,113.0,103.6,103.4,102.3,102.9,...,105.4,104.8,104.1,103.2,102.9,103.5,104.3,103.7,104.0,2013-01-01
2,Rural+Urban,2013,January,108.4,107.3,110.0,104.4,105.1,103.2,102.2,...,105.5,104.8,104.0,103.2,103.1,103.6,104.5,103.9,104.6,2013-01-01
3,Rural,2013,February,109.2,108.7,110.2,105.4,106.7,104.0,102.4,...,106.2,105.2,104.4,103.9,104.0,104.1,104.6,104.4,105.8,2013-02-01
4,Urban,2013,February,112.9,112.9,116.9,104.0,103.5,103.1,104.9,...,105.7,105.2,104.7,104.4,103.3,103.7,104.3,104.3,104.7,2013-02-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,Rural,2023,February,174.2,205.2,173.9,177.0,183.4,167.2,140.9,...,181.6,178.6,186.6,169.0,172.8,178.5,180.7,177.9,178.0,2023-02-01
361,Urban,2023,February,174.7,212.2,177.2,177.9,172.2,172.1,175.8,...,182.8,169.2,180.8,159.8,168.4,172.5,181.4,170.0,176.3,2023-02-01
362,Rural+Urban,2023,February,174.4,207.7,175.2,177.3,179.3,169.5,152.7,...,182.1,174.2,184.4,164.2,170.3,175.0,181.0,174.1,177.2,2023-02-01
363,Rural,2023,March,174.3,205.2,173.9,177.0,183.3,167.2,140.9,...,181.4,178.6,186.6,169.0,172.8,178.5,180.7,177.9,178.0,2023-03-01


In [None]:
df.head(5)

Unnamed: 0,Sector,Year,Month,Cereals and products,Meat and fish,Egg,Milk and products,Oils and fats,Fruits,Vegetables,...,Housing,Fuel and light,Household goods and services,Health,Transport and communication,Recreation and amusement,Education,Personal care and effects,Miscellaneous,General index
0,Rural,2013,January,107.5,106.3,108.1,104.9,106.1,103.9,101.9,...,,105.5,104.8,104.0,103.3,103.4,103.8,104.7,104.0,105.1
1,Urban,2013,January,110.5,109.1,113.0,103.6,103.4,102.3,102.9,...,100.3,105.4,104.8,104.1,103.2,102.9,103.5,104.3,103.7,104.0
2,Rural+Urban,2013,January,108.4,107.3,110.0,104.4,105.1,103.2,102.2,...,100.3,105.5,104.8,104.0,103.2,103.1,103.6,104.5,103.9,104.6
3,Rural,2013,February,109.2,108.7,110.2,105.4,106.7,104.0,102.4,...,,106.2,105.2,104.4,103.9,104.0,104.1,104.6,104.4,105.8
4,Urban,2013,February,112.9,112.9,116.9,104.0,103.5,103.1,104.9,...,100.4,105.7,105.2,104.7,104.4,103.3,103.7,104.3,104.3,104.7


In [6]:
df['Month'] = df['Month'].str.strip()
df['Month'] = df['Month'].replace('Marcrh', 'March')
df['Date'] = pd.to_datetime(df['Year'].astype(str) + '-' + df['Month'], format='%Y-%B')

In [7]:
rural_urban_df= df[df['Sector'] == 'Rural+Urban'].sort_values('Date')

# inflation trend analysis
fig = px.line(rural_urban_df, x='Date', y='General index', title='Inflation Trend Analysis (General CPI Index)')
fig.update_layout(xaxis_title='Date', yaxis_title='CPI - General Index')
fig.show()

In [8]:
rural_urban_df.set_index('Date', inplace=True)
monthly_df = rural_urban_df['General index'].resample('M').mean().interpolate(method='linear')
decomposition = seasonal_decompose(monthly_df, model='multiplicative', period=12)

fig = go.Figure()
fig.add_trace(go.Scatter(x=decomposition.observed.index, y=decomposition.observed, mode='lines', name='Observed'))
fig.add_trace(go.Scatter(x=decomposition.trend.index, y=decomposition.trend, mode='lines', name='Trend'))
fig.add_trace(go.Scatter(x=decomposition.seasonal.index, y=decomposition.seasonal, mode='lines', name='Seasonal'))
fig.add_trace(go.Scatter(x=decomposition.resid.index, y=decomposition.resid, mode='lines', name='Residual'))
fig.update_layout(title='Seasonal Decomposition of CPI (Observed, Trend, Seasonal, Residual)', xaxis_title='Date')
fig.show()


'M' is deprecated and will be removed in a future version, please use 'ME' instead.



In [9]:
sector_df_means = df.groupby(['Sector'])['General index'].mean().reset_index()
fig = px.bar(sector_df_means, x='Sector', y='General index', title='Average CPI Comparison Across Sectors (Rural, Urban, Rural+Urban)')
fig.update_layout(xaxis_title='Sector', yaxis_title='Average CPI - General Index')
fig.show()

In [10]:

cpi_categories = df[['Cereals and products', 'Meat and fish', 'Egg', 'Milk and products', 'Oils and fats',
                           'Fruits', 'Vegetables', 'Fuel and light', 'Housing', 'Health', 'Transport and communication',
                           'Recreation and amusement', 'Education', 'Personal care and effects', 'Miscellaneous', 'General index']]
cpi_categories = cpi_categories.apply(pd.to_numeric, errors='coerce')  # convert to numeric

# calculate the correlation matrix
correlation_matrix = cpi_categories.corr()

# plot the correlation matrix as a heatmap
fig = px.imshow(correlation_matrix, text_auto=True, color_continuous_scale='RdBu_r', zmin=-1, zmax=1,
                title='Correlation between CPI Categories and General Index')
fig.update_layout(xaxis_title='CPI Category', yaxis_title='CPI Category')
fig.show()

In [11]:
sectors_to_analyze = ['Fuel and light', 'Health', 'Housing', 'Cereals and products']
sector_data = rural_urban_df[sectors_to_analyze].fillna(method='ffill').reset_index()

fig = go.Figure()
for sector in sectors_to_analyze:
    fig.add_trace(go.Scatter(x=sector_data['Date'], y=sector_data[sector], mode='lines', name=sector))
fig.update_layout(title='CPI Trends for Selected Sectors', xaxis_title='Date', yaxis_title='CPI Value')
fig.show()


DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.



In [14]:

covid_period = rural_urban_df[(rural_urban_df.index >= '2020-01-01') & (rural_urban_df.index <= '2021-12-31')][sectors_to_analyze + ['General index']].fillna(method='ffill').reset_index()

fig = go.Figure()
fig.add_trace(go.Scatter(x=covid_period['Date'], y=covid_period['General index'], mode='lines', name='General CPI Index', line=dict(width=2, color='black')))
for sector in sectors_to_analyze:
    fig.add_trace(go.Scatter(x=covid_period['Date'], y=covid_period[sector], mode='lines', name=sector))
fig.update_layout(title='CPI Trends During COVID-19 Period (2020-2021)', xaxis_title='Date', yaxis_title='CPI Value')
fig.show()


DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.

