# INFLATION
## There are 29 different types of inflation measured by the Bureau of Labor Statistics (BLS)
### ***NOTE***:  The December 1999 score of 100.0 is *Baseline*, or starting level, for each inflation score type.
#### The monthly data for years 2010-2019 for each inflation type is separated into different dataframes below, using the inflation type as the dataframe name (*i.e. All_items, Housing, Shelter*).

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import config

pd.set_option('display.max_columns', 100, 'display.max_rows', 10)

#Load google.cloud.bigquery
%load_ext google.cloud.bigquery

#Select path to credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=config.GOOGLE_APPLICATION_CREDENTIALS

In [None]:
%%bigquery --use_rest_api inflation
SELECT *
FROM `bigquery-public-data.bls.c_cpi_u`

In [None]:
#make copy of inflation to preserve original before changing it
all_inflation2010_2019 = inflation.copy()

#filtering for only months from 2010-2019
all_inflation2010_2019 = all_inflation2010_2019.loc[(all_inflation2010_2019['year'] < 2020)
                                                    & (all_inflation2010_2019['year'] > 2009)]

#replacing ' ' with '_' for the item_name column (type of inflation)
all_inflation2010_2019.loc[:,'item_name'] = all_inflation2010_2019['item_name'].str.replace(' ', '_') 

#create list of all the different types of inflation
item_names = list(all_inflation2010_2019['item_name'].unique())

#Create dataframe for one of the groupby inflation types
all_inflation2010_2019.groupby('item_name').get_group('All_items')

# This assigns a variable name (the inflation type) to each dataframe
All_items, Energy, All_items_less_food_and_energy,Apparel, Commodities, Durables, \
Education_and_communication, Education,Communication,Food_and_beverages ,Food,\
Food_at_home ,Alcoholic_beverages ,Other_goods_and_services ,Housing ,Shelter, \
Fuels_and_utilities ,Household_furnishings_and_operations ,Medical_care , \
Medical_care_commodities ,Medical_care_services ,Nondurables ,Recreation,Services, \
Transportation ,Private_transportation ,Food_away_from_home ,New_vehicles, \
Public_transportation = [all_inflation2010_2019.groupby('item_name').get_group(item_name)
 for item_name in item_names]

In [None]:
# Inflation dataframe for all items
All_items

#Inflation dataframe for housing
Housing

#Inflation dataframe for shelter
Shelter

In [None]:
#ZRI-SCORES ADJUSTED USING ALL-ITEMS INFLATION

# remove 3 M13 (month 13) values (yearly average...only present for 2010-2012)
All_items = All_items[~(All_items.period == 'M13')]

# Remove first 8 months which are not in the original ZRI Multi-Family table
All_items = All_items.iloc[8:,:]

# return the December of 2019 'All_items' inflation index number
compare_to_value = All_items['value'].iloc[-1]

# Dataframe with only data as index and Inflation values
all_items = All_items[['value', 'date' ]].set_index('date')
all_items.index = pd.to_datetime(all_items.index, format = "%Y-%m-%d")
all_items = all_items.sort_index()

#create dataframe with multipliers to adjust for 'All_items' inflation
all_items_adjusted = compare_to_value/all_items

In [None]:
#ZRI-SCORES ADJUSTED USING HOUSING INFLATION
# remove 3 M13 (month 13) values (yearly average...only present for 2010-2012)
Housing = Housing[~(Housing.period == 'M13')]

# Remove first 8 months which are not in the original ZRI Multi-Family table
Housing = Housing.iloc[8:,:]

# return the December of 2019 'Housing' inflation index number
compare_to_value = Housing['value'].iloc[-1]

# Dataframe with only data as index and Inflation values
housing = Housing[['value', 'date' ]].set_index('date')
housing.index = pd.to_datetime(housing.index, format = "%Y-%m-%d")
housing = housing.sort_index()

#create dataframe with multipliers to adjust for 'Housing' inflation
housing_adjusted = compare_to_value/housing

In [None]:
#ZRI-SCORES ADJUSTED USING SHELTER

# remove 3 M13 (month 13) values (yearly average...only present for 2010-2012)
Shelter = Shelter[~(Shelter.period == 'M13')]

# Remove first 8 months which are not in the original ZRI Multi-Family table
Shelter = Shelter.iloc[8:,:]

# return the December of 2019 'Shelter' inflation index number
compare_to_value = Shelter['value'].iloc[-1]

# Dataframe with only data as index and Inflation values
shelter = Shelter[['value', 'date' ]].set_index('date')
shelter.index = pd.to_datetime(shelter.index, format = "%Y-%m-%d")
shelter = shelter.sort_index()

#create dataframe with multipliers to adjust for 'Shelter' inflation
shelter_adjusted = compare_to_value/shelter

ADJUST ZRI SCORES USING:
- all_items_adjusted
- housing_adjusted 
- shelter_adjusted 

IMPORT ZRI Multi-Family DataFrame

In [None]:
%%bigquery --use_rest_api ZRI_MF
SELECT *
FROM `high-empire-220313.ZRI.Multi_Family`

ZRI_MF = ZRI_MF.set_index('RegionID', drop = False)

# keep only ZRI Month/Year Columns
# drop January 2020 because not include in inflation tables
months_only = ZRI_MF.iloc[:,7:-1]

all_items_adjusted_ZRI = months_only.mul(list(all_items_adjusted.value), axis = 1)

housing_adjusted_ZRI = months_only.mul(list(housing_adjusted.value), axis = 1)

shelter_adjusted_ZRI = months_only.mul(list(shelter_adjusted.value), axis = 1)

year_columns = [x for x in ZRI_MF.columns if ('20' in x)]

In [None]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

year_columns = [x for x in ZRI_MF.columns if ('20' in x)]
def lregress(row):
    '''
    Performs a linear regression on one region's (row) ZRI over time. 
    Outputs the slope, intercept, MSE, and error, predicting the most recent month.
    '''
    years = row[year_columns[:-1]].reset_index().dropna()
    if (years.empty) or (years.shape[0] < 3):
        return(None,None,None,None)
    y = years.dropna().iloc[:,1].values[:-1]
    X = np.array(years.index)[:-1]
    X_test = np.array(years.index)[-1]
    y_test = years.iloc[:,1].values[-1]
    lr.fit(X.reshape(-1,1),y)
    test_residual = lr.predict(X_test.reshape(-1,1)) - y_test
    return(lr.coef_[0],lr.intercept_,lr.score(X.reshape(-1,1),y),test_residual[0])

In [None]:
#WITHOUT INFLATION

#Run lregress function on every row
lr_data = ZRI_MF.apply(lregress,axis = 1)

#Inputs results from lregress to the dataframe
ZRI_MF['slope'] = [x[0] for x in lr_data]
ZRI_MF['intercept'] = [x[1] for x in lr_data]
ZRI_MF['score'] = [x[2] for x in lr_data]
ZRI_MF['error'] = [x[3] for x in lr_data]

#Print results of the lregress
no_inflation_scores = pd.DataFrame(ZRI_MF[['slope',
                                           'intercept',
                                           'score',
                                           'error']].describe())

In [None]:
# REGRESSION FOR ALL_ITEMS INFLATION

#Run lregress function on every row
lr_data_all = all_items_adjusted_ZRI.apply(lregress,axis = 1)

#Inputs results from lregress to the dataframe
all_items_adjusted_ZRI['slope'] = [x[0] for x in lr_data_all]
all_items_adjusted_ZRI['intercept'] = [x[1] for x in lr_data_all]
all_items_adjusted_ZRI['score'] = [x[2] for x in lr_data_all]
all_items_adjusted_ZRI['error'] = [x[3] for x in lr_data_all]

#Print results of the lregress
all_items_inflation_scores = pd.DataFrame(all_items_adjusted_ZRI[['slope',
                                                                  'intercept',
                                                                  'score',
                                                                  'error']].describe())

In [None]:
# REGRESSION FOR SHELTER INFLATION

#Run lregress function on every row
lr_data_shelter = shelter_adjusted_ZRI.apply(lregress,axis = 1)

#Inputs results from lregress to the dataframe
shelter_adjusted_ZRI['slope'] = [x[0] for x in lr_data_shelter]
shelter_adjusted_ZRI['intercept'] = [x[1] for x in lr_data_shelter]
shelter_adjusted_ZRI['score'] = [x[2] for x in lr_data_shelter]
shelter_adjusted_ZRI['error'] = [x[3] for x in lr_data_shelter]

#Print results of the lregress
shelter_inflation_scores = pd.DataFrame(shelter_adjusted_ZRI[['slope',
                                                              'intercept',
                                                              'score',
                                                              'error']].describe())

In [None]:
# REGRESSION FOR HOUSING INFLATION

#Run lregress function on every row
lr_data_housing = housing_adjusted_ZRI.apply(lregress,axis = 1)

#Inputs results from lregress to the dataframe
housing_adjusted_ZRI['slope'] = [x[0] for x in lr_data_housing]
housing_adjusted_ZRI['intercept'] = [x[1] for x in lr_data_housing]
housing_adjusted_ZRI['score'] = [x[2] for x in lr_data_housing]
housing_adjusted_ZRI['error'] = [x[3] for x in lr_data_housing]

#Print results of the lregress
housing_inflation_scores = pd.DataFrame(housing_adjusted_ZRI[['slope',
                                                              'intercept',
                                                              'score',
                                                              'error']].describe())

In [None]:
# Dataframe of difference between no inflation and all_items scores
no_all_diff = no_inflation_scores - all_items_inflation_scores

# Dataframe of difference between no inflation and shelter scores
no_shelter_diff = no_inflation_scores - shelter_inflation_scores

# Dataframe of difference between no inflation and housing scores
no_housing_diff = no_inflation_scores - housing_inflation_scores

In [None]:
#Add prefix so we can distinguish between column names after dataframes are combined.

no_inflation_scores = no_inflation_scores.add_suffix('_Orig')
all_items_inflation_scores = all_items_inflation_scores.add_suffix('_All')
housing_inflation_scores = housing_inflation_scores.add_suffix('_Hous')
shelter_inflation_scores = shelter_inflation_scores.add_suffix('_Shel')

# combined results for the 4 different regressions
four_score_comparison = pd.concat([no_inflation_scores, 
                                   all_items_inflation_scores, 
                                   housing_inflation_scores, 
                                   shelter_inflation_scores], 
                                  axis = 1, sort = True).sort_index(axis = 1)

# separate each error into it's own datafram
errors = four_score_comparison.iloc[:,:4]
intercepts = four_score_comparison.iloc[:,4:8]
scores = four_score_comparison.iloc[:,8:12]
slopes = four_score_comparison.iloc[:,12:]

# rearrange so that the original (no inflation) values are listed first
errors = errors[['error_Orig','error_All', 'error_Hous',  'error_Shel']]
intercepts = intercepts[['intercept_Orig','intercept_All', 'intercept_Hous', 'intercept_Shel']]
scores = scores[['score_Orig', 'score_All', 'score_Hous',  'score_Shel']]
slopes = slopes[['slope_Orig', 'slope_All', 'slope_Hous', 'slope_Shel']]

#Reordered, combinded data with no inflation data first for each measure
four_score_comparison = pd.concat([errors, intercepts, scores, slopes], axis = 1)