In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('ggplot')

import constants as consts

## Determining the Change in Cost of Gas for Brazilian Residents by State

In this notebook, we will determine if the cost of gas has risen for Brazilian residents after inflation has been taken into account. To determine this, we will use the previously created inflation dataframe, and also account for wage growth in Brazil.

From [this](https://www.statista.com/statistics/941201/growth-rate-average-monthly-income-domestic-workers-brazil/) link, we can determine that wages from 2013 to 2019 have grown by 5.3% by adding the values above each bar in the chart. We will determine if gas prices have risen by a greater amount to determine if Brazilian workers are paying more for gas in 2019 when compared to 2013.

In [2]:
df = pd.read_csv('../../data/gas_prices_brazil/brazil_gas_inflation.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,Last Day of Week,Year,Month,Macro Region,State,Type of Product,Mean Distribution Price,Weeks Since First Day,Adjusted Mean Distribution Price
0,0,2004-05-15,2004,5,CENTRO OESTE,DISTRITO FEDERAL,ETANOL HIDRATADO,0.825,1.0,0.825
1,1,2004-05-15,2004,5,CENTRO OESTE,DISTRITO FEDERAL,GASOLINA COMUM,1.711,1.0,1.711
2,2,2004-05-15,2004,5,CENTRO OESTE,DISTRITO FEDERAL,GLP,27.165,1.0,27.165001
3,3,2004-05-15,2004,5,CENTRO OESTE,DISTRITO FEDERAL,ÓLEO DIESEL,1.249,1.0,1.249
4,4,2004-05-15,2004,5,CENTRO OESTE,GOIAS,ETANOL HIDRATADO,0.763,1.0,0.763


In [4]:
# Making a copy prevents the 'SettingWithCopyWarning' warning.

cost_df = df[(df['Last Day of Week'] == '2013-01-12') | (df['Last Day of Week'] == '2019-06-29')].copy()

In [5]:
cost_df.reset_index(inplace = True)

In [6]:
# We can drop the 'Adjusted Mean Distribution Price' column because that value reflects inflation relative to 2004.

cost_df.drop(['Adjusted Mean Distribution Price',
              'Weeks Since First Day',
              'Unnamed: 0',
              'index'], 
               axis = 1, inplace = True)

In [7]:
# To determine inflation from 2013 to 2019, we will use the value of 1R$ in 2019 relative to 2004 from the already
# obtained data, and divide it by the value of 1R$ in 2013 relative to 2004.

r_inf = consts.inf_rates[2019] / consts.inf_rates[2013]
r_inf

1.40625

From this, we can conclude that inflation grew by roughly 41%, which closely resembles the calculation from [here](https://www.in2013dollars.com/brazil/inflation/2013?endYear=2019&amount=1).

In [8]:
cost_df

Unnamed: 0,Last Day of Week,Year,Month,Macro Region,State,Type of Product,Mean Distribution Price
0,2013-01-12,2013,1,CENTRO OESTE,DISTRITO FEDERAL,ETANOL HIDRATADO,1.874
1,2013-01-12,2013,1,CENTRO OESTE,DISTRITO FEDERAL,GASOLINA COMUM,2.384
2,2013-01-12,2013,1,CENTRO OESTE,DISTRITO FEDERAL,GLP,31.420
3,2013-01-12,2013,1,CENTRO OESTE,DISTRITO FEDERAL,ÓLEO DIESEL,1.954
4,2013-01-12,2013,1,CENTRO OESTE,GOIAS,ETANOL HIDRATADO,1.653
...,...,...,...,...,...,...,...
293,2019-06-29,2019,6,SUL,SANTA CATARINA,GASOLINA COMUM,3.784
294,2019-06-29,2019,6,SUL,SANTA CATARINA,GLP,51.702
295,2019-06-29,2019,6,SUL,SANTA CATARINA,GNV,1.998
296,2019-06-29,2019,6,SUL,SANTA CATARINA,ÓLEO DIESEL,3.023


In [9]:
# If the year is 2013, we do not need to adjust for inflation, otherwise, adjust for inflation.

inf_adj_dist_price = []

for i in range (len(cost_df)):
    if cost_df.loc[i, 'Year'] == 2019:
        inf_adj_dist_price.append(cost_df.loc[i, 'Mean Distribution Price'] / r_inf)
    else:
        inf_adj_dist_price.append(cost_df.loc[i, 'Mean Distribution Price'])

In [10]:
cost_df['Adjusted Mean Distribution Price'] = inf_adj_dist_price

In [11]:
# Check to make sure our calculation is working as intended.

cost_df.sample(10)

Unnamed: 0,Last Day of Week,Year,Month,Macro Region,State,Type of Product,Mean Distribution Price,Adjusted Mean Distribution Price
18,2013-01-12,2013,1,CENTRO OESTE,MATO GROSSO DO SUL,GLP,39.316,39.316
172,2019-06-29,2019,6,NORDESTE,ALAGOAS,GLP,48.169,34.253511
2,2013-01-12,2013,1,CENTRO OESTE,DISTRITO FEDERAL,GLP,31.42,31.42
61,2013-01-12,2013,1,NORDESTE,PIAUI,ÓLEO DIESEL S10,1.945,1.945
22,2013-01-12,2013,1,NORDESTE,ALAGOAS,ETANOL HIDRATADO,2.042,2.042
106,2013-01-12,2013,1,NORTE,TOCANTINS,ÓLEO DIESEL S10,0.0,0.0
146,2013-01-12,2013,1,SUL,SANTA CATARINA,GNV,1.599,1.599
51,2013-01-12,2013,1,NORDESTE,PERNAMBUCO,ETANOL HIDRATADO,1.913,1.913
73,2013-01-12,2013,1,NORDESTE,SERGIPE,ÓLEO DIESEL S10,2.087,2.087
66,2013-01-12,2013,1,NORDESTE,RIO GRANDE DO NORTE,ÓLEO DIESEL,1.899,1.899


In [12]:
cost_df[(cost_df['Macro Region'] == 'NORDESTE') & (cost_df['State'] == 'PIAUI')]

Unnamed: 0,Last Day of Week,Year,Month,Macro Region,State,Type of Product,Mean Distribution Price,Adjusted Mean Distribution Price
57,2013-01-12,2013,1,NORDESTE,PIAUI,ETANOL HIDRATADO,2.141,2.141
58,2013-01-12,2013,1,NORDESTE,PIAUI,GASOLINA COMUM,2.305,2.305
59,2013-01-12,2013,1,NORDESTE,PIAUI,GLP,33.212,33.212
60,2013-01-12,2013,1,NORDESTE,PIAUI,ÓLEO DIESEL,1.98,1.98
61,2013-01-12,2013,1,NORDESTE,PIAUI,ÓLEO DIESEL S10,1.945,1.945
205,2019-06-29,2019,6,NORDESTE,PIAUI,ETANOL HIDRATADO,3.029,2.153956
206,2019-06-29,2019,6,NORDESTE,PIAUI,GASOLINA COMUM,4.031,2.866489
207,2019-06-29,2019,6,NORDESTE,PIAUI,GLP,55.769,39.657956
208,2019-06-29,2019,6,NORDESTE,PIAUI,ÓLEO DIESEL,3.222,2.2912
209,2019-06-29,2019,6,NORDESTE,PIAUI,ÓLEO DIESEL S10,3.243,2.306133


In [13]:
# Checking if there are any zeros in our cost dataframe.

cost_df[cost_df['Mean Distribution Price'] == 0]

Unnamed: 0,Last Day of Week,Year,Month,Macro Region,State,Type of Product,Mean Distribution Price,Adjusted Mean Distribution Price
7,2013-01-12,2013,1,CENTRO OESTE,GOIAS,GNV,0.0,0.0
9,2013-01-12,2013,1,CENTRO OESTE,GOIAS,ÓLEO DIESEL S10,0.0,0.0
19,2013-01-12,2013,1,CENTRO OESTE,MATO GROSSO DO SUL,GNV,0.0,0.0
36,2013-01-12,2013,1,NORDESTE,CEARA,GLP,0.0,0.0
50,2013-01-12,2013,1,NORDESTE,PARAIBA,ÓLEO DIESEL S10,0.0,0.0
64,2013-01-12,2013,1,NORDESTE,RIO GRANDE DO NORTE,GLP,0.0,0.0
78,2013-01-12,2013,1,NORTE,ACRE,ÓLEO DIESEL S10,0.0,0.0
79,2013-01-12,2013,1,NORTE,AMAPA,ETANOL HIDRATADO,0.0,0.0
106,2013-01-12,2013,1,NORTE,TOCANTINS,ÓLEO DIESEL S10,0.0,0.0
112,2013-01-12,2013,1,SUDESTE,ESPIRITO SANTO,ÓLEO DIESEL S10,0.0,0.0


Because there are 13 missing values, using Excel to change these values would not take too much time, and would likely be the best way to proceed in a working environment. However, if there were significantly more zero values, replacing them with Python would be necessary, and doing so will be left to the programmer as an exercise.

In [14]:
cost_df[(cost_df['Macro Region'] == 'SUDESTE') & (cost_df['State'] == 'ESPIRITO SANTO')]

Unnamed: 0,Last Day of Week,Year,Month,Macro Region,State,Type of Product,Mean Distribution Price,Adjusted Mean Distribution Price
107,2013-01-12,2013,1,SUDESTE,ESPIRITO SANTO,ETANOL HIDRATADO,2.113,2.113
108,2013-01-12,2013,1,SUDESTE,ESPIRITO SANTO,GASOLINA COMUM,2.479,2.479
109,2013-01-12,2013,1,SUDESTE,ESPIRITO SANTO,GLP,28.394,28.394
110,2013-01-12,2013,1,SUDESTE,ESPIRITO SANTO,GNV,1.374,1.374
111,2013-01-12,2013,1,SUDESTE,ESPIRITO SANTO,ÓLEO DIESEL,1.92,1.92
112,2013-01-12,2013,1,SUDESTE,ESPIRITO SANTO,ÓLEO DIESEL S10,0.0,0.0
256,2019-06-29,2019,6,SUDESTE,ESPIRITO SANTO,ETANOL HIDRATADO,3.243,2.306133
257,2019-06-29,2019,6,SUDESTE,ESPIRITO SANTO,GASOLINA COMUM,4.02,2.858667
258,2019-06-29,2019,6,SUDESTE,ESPIRITO SANTO,GLP,48.437,34.444089
259,2019-06-29,2019,6,SUDESTE,ESPIRITO SANTO,GNV,0.0,0.0


In [15]:
# We need the same_cols_df for a function that will be able to replace zero values in cost_df.

same_cols_df = df.copy()
same_cols_df.drop(df.columns.difference(list(cost_df.columns)), axis = 1, inplace = True)
same_cols_df.head()

Unnamed: 0,Last Day of Week,Year,Month,Macro Region,State,Type of Product,Mean Distribution Price,Adjusted Mean Distribution Price
0,2004-05-15,2004,5,CENTRO OESTE,DISTRITO FEDERAL,ETANOL HIDRATADO,0.825,0.825
1,2004-05-15,2004,5,CENTRO OESTE,DISTRITO FEDERAL,GASOLINA COMUM,1.711,1.711
2,2004-05-15,2004,5,CENTRO OESTE,DISTRITO FEDERAL,GLP,27.165,27.165001
3,2004-05-15,2004,5,CENTRO OESTE,DISTRITO FEDERAL,ÓLEO DIESEL,1.249,1.249
4,2004-05-15,2004,5,CENTRO OESTE,GOIAS,ETANOL HIDRATADO,0.763,0.763


In [16]:
def cost_query_not_zero(tup):
    # The tuple that is passed to this function needs to be of the following form.
    # (Macro Region, State, Type of Product)
    return same_cols_df[
        (same_cols_df['Macro Region'] == tup[0]) & 
        (same_cols_df['State'] == tup[1]) & 
        (same_cols_df['Type of Product'] == tup[2]) &
        (same_cols_df['Mean Distribution Price'] != 0) &
        (same_cols_df['Year'] >= 2013)
    ]

In [17]:
# This is how we will obtain the 2013 values that need to be replaced.

cost_query_not_zero(('CENTRO OESTE', 'GOIAS', 'GNV')).iloc[0]

Last Day of Week                      2013-01-19
Year                                        2013
Month                                          1
Macro Region                        CENTRO OESTE
State                                      GOIAS
Type of Product                              GNV
Mean Distribution Price                    1.909
Adjusted Mean Distribution Price        1.193125
Name: 55916, dtype: object

In [18]:
cost_query_not_zero(('SUDESTE', 'ESPIRITO SANTO', 'GNV'))

Unnamed: 0,Last Day of Week,Year,Month,Macro Region,State,Type of Product,Mean Distribution Price,Adjusted Mean Distribution Price
55869,2013-01-12,2013,1,SUDESTE,ESPIRITO SANTO,GNV,1.374,0.858750
56020,2013-01-19,2013,1,SUDESTE,ESPIRITO SANTO,GNV,1.600,1.000000
56173,2013-01-26,2013,1,SUDESTE,ESPIRITO SANTO,GNV,1.374,0.858750
56478,2013-02-09,2013,2,SUDESTE,ESPIRITO SANTO,GNV,1.225,0.765625
56630,2013-02-16,2013,2,SUDESTE,ESPIRITO SANTO,GNV,1.375,0.859375
...,...,...,...,...,...,...,...,...
105885,2019-05-18,2019,5,SUDESTE,ESPIRITO SANTO,GNV,2.222,0.987556
106036,2019-05-25,2019,5,SUDESTE,ESPIRITO SANTO,GNV,2.242,0.996444
106186,2019-06-01,2019,5,SUDESTE,ESPIRITO SANTO,GNV,2.242,0.996444
106336,2019-06-08,2019,6,SUDESTE,ESPIRITO SANTO,GNV,2.250,1.000000


In [19]:
# This is how we will obtain the 2019 values that need to be replaced.

cost_query_not_zero(('SUDESTE', 'ESPIRITO SANTO', 'GNV')).iloc[-1]

Last Day of Week                        2019-06-15
Year                                          2019
Month                                            6
Macro Region                               SUDESTE
State                               ESPIRITO SANTO
Type of Product                                GNV
Mean Distribution Price                      2.242
Adjusted Mean Distribution Price          0.996444
Name: 106486, dtype: object

In [20]:
list(cost_df[cost_df['Mean Distribution Price'] == 0].index)

[7, 9, 19, 36, 50, 64, 78, 79, 106, 112, 259, 265, 283]

In [21]:
cost_df.iloc[7]

Last Day of Week                      2013-01-12
Year                                        2013
Month                                          1
Macro Region                        CENTRO OESTE
State                                      GOIAS
Type of Product                              GNV
Mean Distribution Price                      0.0
Adjusted Mean Distribution Price             0.0
Name: 7, dtype: object

In [22]:
cost_df.iloc[7]['Year']

2013

In [23]:
cost_query_not_zero(('CENTRO OESTE', 'GOIAS', 'GNV')).iloc[0]

Last Day of Week                      2013-01-19
Year                                        2013
Month                                          1
Macro Region                        CENTRO OESTE
State                                      GOIAS
Type of Product                              GNV
Mean Distribution Price                    1.909
Adjusted Mean Distribution Price        1.193125
Name: 55916, dtype: object

In [24]:
# Iterate over the indicies with zero values, and replace the values appropriately.

for ind in list(cost_df[cost_df['Mean Distribution Price'] == 0].index):
    if cost_df.iloc[ind]['Year'] == 2013:
        cost_df.iloc[ind] = cost_query_not_zero((cost_df.iloc[ind]['Macro Region'], cost_df.iloc[ind]['State'], cost_df.iloc[ind]['Type of Product'])).iloc[0]
    else:
        cost_df.iloc[ind] = cost_query_not_zero((cost_df.iloc[ind]['Macro Region'], cost_df.iloc[ind]['State'], cost_df.iloc[ind]['Type of Product'])).iloc[-1]

In [25]:
cost_df

Unnamed: 0,Last Day of Week,Year,Month,Macro Region,State,Type of Product,Mean Distribution Price,Adjusted Mean Distribution Price
0,2013-01-12,2013,1,CENTRO OESTE,DISTRITO FEDERAL,ETANOL HIDRATADO,1.874,1.874000
1,2013-01-12,2013,1,CENTRO OESTE,DISTRITO FEDERAL,GASOLINA COMUM,2.384,2.384000
2,2013-01-12,2013,1,CENTRO OESTE,DISTRITO FEDERAL,GLP,31.420,31.420000
3,2013-01-12,2013,1,CENTRO OESTE,DISTRITO FEDERAL,ÓLEO DIESEL,1.954,1.954000
4,2013-01-12,2013,1,CENTRO OESTE,GOIAS,ETANOL HIDRATADO,1.653,1.653000
...,...,...,...,...,...,...,...,...
293,2019-06-29,2019,6,SUL,SANTA CATARINA,GASOLINA COMUM,3.784,2.690844
294,2019-06-29,2019,6,SUL,SANTA CATARINA,GLP,51.702,36.765867
295,2019-06-29,2019,6,SUL,SANTA CATARINA,GNV,1.998,1.420800
296,2019-06-29,2019,6,SUL,SANTA CATARINA,ÓLEO DIESEL,3.023,2.149689


In [26]:
# Checking that the replacement worked as intended

for i in [7, 9, 19, 36, 50, 64, 78, 79, 106, 112, 259, 265, 283]:
    print (cost_df.iloc[i], '\n')

Last Day of Week                      2013-01-19
Year                                        2013
Month                                          1
Macro Region                        CENTRO OESTE
State                                      GOIAS
Type of Product                              GNV
Mean Distribution Price                    1.909
Adjusted Mean Distribution Price        1.193125
Name: 7, dtype: object 

Last Day of Week                         2013-01-19
Year                                           2013
Month                                             1
Macro Region                           CENTRO OESTE
State                                         GOIAS
Type of Product                     ÓLEO DIESEL S10
Mean Distribution Price                       2.058
Adjusted Mean Distribution Price            1.28625
Name: 9, dtype: object 

Last Day of Week                            2013-03-02
Year                                              2013
Month                           