## Adjusting for Inflation

In [2]:
import cpi
import pandas as pd
from siuba import *

pd.options.display.float_format = '{:.2f}'.format

In [1]:
!pip install cpi



In [32]:
df = pd.read_parquet("dla_df.parquet")

In [11]:

# Inflation table
def inflation_table(base_year):
    #cpi.update()
    series_df = cpi.series.get(area="U.S. city average").to_dataframe()
    inflation_df = (series_df[series_df.year >= 2008]
           .pivot_table(index='year', values='value', aggfunc='mean')
           .reset_index()
          )
    denominator = inflation_df.value.loc[inflation_df.year==base_year].iloc[0]

    inflation_df = inflation_df.assign(
        multiplier = inflation_df.apply(lambda x: denominator / x.value, axis=1)
    )
    
    return inflation_df

In [12]:
cpi_table = inflation_table(2021)
cpi_table

Unnamed: 0,year,value,multiplier
0,2008,215.3,1.26
1,2009,214.54,1.26
2,2010,218.06,1.24
3,2011,224.94,1.2
4,2012,229.59,1.18
5,2013,232.96,1.16
6,2014,236.74,1.14
7,2015,237.02,1.14
8,2016,240.01,1.13
9,2017,245.12,1.11


In [33]:


keep_cols = [
    "primary_agency_name", "project_no", "prefix", "prepared_y",
]

orig = ["total_requested", 
        # "fed_requested", 
        # "ac_requested",  
]

adj = ["adjusted_total_requested", 
       # "adjusted_fed_requested", 
       # "adjusted_ac_requested"
]

df1 = df[keep_cols + orig + adj]

# Pick ones that have values so that we can check
df1 = df1[(df.total_requested > 0) & (df1.total_requested.notna())]
df1[orig] = df1[orig].astype(int)
# df = df[(df.fed_requested > 0) & (df.ac_requested > 0)]

In [34]:
df2 = pd.merge(df1, 
         cpi_table[["year", "multiplier"]],
         left_on = "prepared_y",
         right_on = "year",
         how = "left",
         validate = "m:1",
        )

In [35]:
for c in orig:
    new_col = f"{c}2"
    df2[new_col] = df2.apply(lambda x: x[c] * x.multiplier, axis=1)

In [36]:
for c in orig:
    new_col = f"compare_{c}"
    natalie_col = f"adjusted_{c}"
    tiff_col = f"{c}2"
    df2[new_col] = df2.apply(lambda x: 1 if x[natalie_col] == x[tiff_col]
                             else 0, axis=1)

In [37]:


# If everything is in 2021 dollars
# unadjusted and adjusted amts should be equal
(df2[df2.prepared_y ==2021])>>arrange(_.compare_total_requested)

Unnamed: 0,primary_agency_name,project_no,prefix,prepared_y,total_requested,adjusted_total_requested,year,multiplier,total_requested2,compare_total_requested
11068,Sacramento County,5924(252),CML,2021.00,405260,405260.35,2021.00,1.00,405260.00,0
11075,Folsom,5288(039),BRLS,2021.00,15456,15456.98,2021.00,1.00,15456.00,0
11082,El Cerrito,5239(029),CML,2021.00,965000,965000.82,2021.00,1.00,965000.00,0
11085,San Jose,5005(129),CML,2021.00,1049114,1049115.75,2021.00,1.00,1049114.00,0
11086,Marin County,5927(114),HSIPL,2021.00,711540,711540.69,2021.00,1.00,711540.00,0
...,...,...,...,...,...,...,...,...,...,...
12363,San Luis Obispo County,5949(156),BRLO,2021.00,194000,194000.17,2021.00,1.00,194000.00,0
12380,San Bernardino,5033(057),HSIPL,2021.00,4853917,4853921.13,2021.00,1.00,4853917.00,0
12381,San Bernardino County,5954(083),STPLER,2021.00,4203285,4203288.58,2021.00,1.00,4203285.00,0
12393,Santa Clara Valley Transportation Authority,6264(091),FTASTPL,2021.00,1987000,1987001.69,2021.00,1.00,1987000.00,0


In [38]:
for c in ["total_requested", "adjusted_total_requested", "total_requested2"]:
    num = df2[df2.prepared_y ==2021][c].iloc[0]
    print(num)

405260
405260.3451364289
405260.0


In [39]:
# If everything is in 2021 dollars, years prior to 2021 have multiplier < 1
# So adjusted values should be smaller than unadjusted
df2[df2.prepared_y == 2014][orig + adj]

Unnamed: 0,total_requested,adjusted_total_requested
5154,1968000,2252587.75
5161,1130683,1294188.35
5242,1130041,1293453.51
5281,1057741,1210698.38
5345,2008167,2298563.20
...,...,...
8301,5631,6445.72
8302,7575292,8670736.77
8307,28508,32631.17
8315,20987,24022.49


In [40]:
cpi.inflate(1968000.0, 2014)

2252589.2133008926

In [41]:
df2[df2.prepared_y == 2014][orig + ["total_requested2"]]

Unnamed: 0,total_requested,total_requested2
5154,1968000,2252585.83
5161,1130683,1294187.25
5242,1130041,1293452.41
5281,1057741,1210697.35
5345,2008167,2298561.25
...,...,...
8301,5631,6445.28
8302,7575292,8670729.38
8307,28508,32630.45
8315,20987,24021.86


In [42]:
test = pd.read_parquet('dla_df.parquet')

In [43]:
test

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_fed_requested,adjusted_ac_requested,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
0,Obligated,BPMP,5904(121),Humboldt County,2018-12-18,2018-12-18,2018-12-18,2018-12-18,2018-12-27,0.00,...,0.00,0.00,0,0,1,0,0,0,0,1
1,Obligated,ER,32D0(008),Mendocino County,2018-12-17,2018-12-19,2018-12-20,2018-12-20,2018-12-27,11508.00,...,12418.31,0.00,0,0,0,0,0,1,0,1
2,Obligated,ER,4820(004),Humboldt County,2018-12-07,2018-12-21,2018-12-21,2018-12-21,2018-12-27,45499.64,...,49098.77,0.00,0,0,0,0,0,1,0,1
3,Obligated,CML,5924(244),Sacramento County,2018-12-11,2018-12-11,2018-12-21,2018-12-27,2018-12-27,207002.00,...,223376.35,0.00,1,0,0,0,0,1,0,2
4,Obligated,CML,5924(214),Sacramento County,2018-12-05,2018-12-11,2018-12-21,2018-12-27,2018-12-27,0.00,...,0.00,6130295.48,0,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21588,DISTRICT,HSIPL,7504(001),Caltrans,NaT,NaT,NaT,NaT,NaT,351200.00,...,,,0,1,0,1,0,0,0,2
21590,FTA Transferred,FTASTPL,6264(091),Santa Clara Valley Transportation Authority,2021-11-22,2021-11-22,2021-11-22,2022-02-15,2022-02-25,1987000.00,...,1987001.69,0.00,0,0,0,0,0,1,0,1
21591,FTA Transferred,FTASTPL,6002(030),Alameda - Contra Costa Transit District,2021-11-22,2021-11-22,2021-11-22,2022-02-15,2022-02-25,1254000.00,...,1254001.07,0.00,0,0,0,0,0,1,0,1
21592,FTA Transferred,FTACML,6292(016),Mountain Area Regional Transit Authority,2022-02-18,2022-02-18,2022-02-18,2022-03-01,2022-03-02,438168.00,...,422305.63,0.00,0,0,0,0,0,1,0,1


In [44]:

test2 = pd.merge(test, 
         cpi_table[["year", "multiplier"]],
         left_on = "prepared_y",
         right_on = "year",
         how = "left",
         validate = "m:1",
        )

In [45]:
orig = ["total_requested", 
        "fed_requested", 
        "ac_requested"]

for c in orig:
    new_col = f"{c}2"
    test2[new_col] = test2.apply(lambda x: x[c] * x.multiplier, axis=1)

In [46]:
test2

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,street,freeway,infra_resiliency_er,congestion_relief,work_categories,year,multiplier,total_requested2,fed_requested2,ac_requested2
0,Obligated,BPMP,5904(121),Humboldt County,2018-12-18,2018-12-18,2018-12-18,2018-12-18,2018-12-27,0.00,...,0,0,0,0,1,2018.00,1.08,0.00,0.00,0.00
1,Obligated,ER,32D0(008),Mendocino County,2018-12-17,2018-12-19,2018-12-20,2018-12-20,2018-12-27,11508.00,...,0,0,1,0,1,2018.00,1.08,14028.32,12418.30,0.00
2,Obligated,ER,4820(004),Humboldt County,2018-12-07,2018-12-21,2018-12-21,2018-12-21,2018-12-27,45499.64,...,0,0,1,0,1,2018.00,1.08,55459.97,49098.73,0.00
3,Obligated,CML,5924(244),Sacramento County,2018-12-11,2018-12-11,2018-12-21,2018-12-27,2018-12-27,207002.00,...,0,0,1,0,2,2018.00,1.08,266540.22,223376.16,0.00
4,Obligated,CML,5924(214),Sacramento County,2018-12-05,2018-12-11,2018-12-21,2018-12-27,2018-12-27,0.00,...,1,0,0,0,1,2018.00,1.08,6153080.88,0.00,6130290.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20983,DISTRICT,HSIPL,7504(001),Caltrans,NaT,NaT,NaT,NaT,NaT,351200.00,...,1,0,0,0,2,,,,,
20984,FTA Transferred,FTASTPL,6264(091),Santa Clara Valley Transportation Authority,2021-11-22,2021-11-22,2021-11-22,2022-02-15,2022-02-25,1987000.00,...,0,0,1,0,1,2021.00,1.00,1987000.00,1987000.00,0.00
20985,FTA Transferred,FTASTPL,6002(030),Alameda - Contra Costa Transit District,2021-11-22,2021-11-22,2021-11-22,2022-02-15,2022-02-25,1254000.00,...,0,0,1,0,1,2021.00,1.00,1254000.00,1254000.00,0.00
20986,FTA Transferred,FTACML,6292(016),Mountain Area Regional Transit Authority,2022-02-18,2022-02-18,2022-02-18,2022-03-01,2022-03-02,438168.00,...,0,0,1,0,1,2022.00,0.96,420385.37,420385.37,0.00


## Another Way

In [27]:
# Inflation table
def inflation_table2(base_year):
    cpi.update()
    series_df = cpi.series.get(area="U.S. city average").to_dataframe()
    inflation_df = (series_df[series_df.year >= 2008]
           .pivot_table(index='year', values='value', aggfunc='mean')
           .reset_index()
          )
    denominator = inflation_df.value.loc[inflation_df.year==base_year].iloc[0]

    inflation_df = inflation_df.assign(
        inflation = inflation_df.value.divide(denominator)
    )
    
    return inflation_df

In [28]:
inflation_table2(2021)

Unnamed: 0,year,value,inflation
0,2008,215.3,0.79
1,2009,214.54,0.79
2,2010,218.06,0.8
3,2011,224.94,0.83
4,2012,229.59,0.85
5,2013,232.96,0.86
6,2014,236.74,0.87
7,2015,237.02,0.87
8,2016,240.01,0.89
9,2017,245.12,0.9


In [48]:
df.sample()

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_fed_requested,adjusted_ac_requested,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
13702,Obligated,CMLNI,5044(129),Visalia,2019-04-29,2019-04-30,2019-05-01,2019-05-02,2019-05-07,2305000.0,...,2443058.12,0.0,0,0,0,0,0,0,0,0


In [29]:
def adjust_prices(df):
    
    cols =  ["total_requested",
           "fed_requested",
           "ac_requested"]
    
    ##get cpi table 
    cpi = inflation_table2(2021)
    cpi.update
    cpi = (cpi>>select(_.year, _.value))
    cpi_dict = dict(zip(cpi['year'], cpi['value']))
    
    
    for col in cols:
        multiplier = df["prepared_y"].map(cpi_dict)  
    
        ##using 270.97 for 2021 dollars
        df[f"adjusted_{col}"] = ((df[col] * 270.97) / multiplier)
    return df


In [49]:
df3 = adjust_prices(df)
df3

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_fed_requested,adjusted_ac_requested,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
0,Obligated,BPMP,5904(121),Humboldt County,2018-12-18,2018-12-18,2018-12-18,2018-12-18,2018-12-27,0.00,...,0.00,0.00,0,0,1,0,0,0,0,1
1,Obligated,ER,32D0(008),Mendocino County,2018-12-17,2018-12-19,2018-12-20,2018-12-20,2018-12-27,11508.00,...,12418.31,0.00,0,0,0,0,0,1,0,1
2,Obligated,ER,4820(004),Humboldt County,2018-12-07,2018-12-21,2018-12-21,2018-12-21,2018-12-27,45499.64,...,49098.77,0.00,0,0,0,0,0,1,0,1
3,Obligated,CML,5924(244),Sacramento County,2018-12-11,2018-12-11,2018-12-21,2018-12-27,2018-12-27,207002.00,...,223376.35,0.00,1,0,0,0,0,1,0,2
4,Obligated,CML,5924(214),Sacramento County,2018-12-05,2018-12-11,2018-12-21,2018-12-27,2018-12-27,0.00,...,0.00,6130295.48,0,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21588,DISTRICT,HSIPL,7504(001),Caltrans,NaT,NaT,NaT,NaT,NaT,351200.00,...,,,0,1,0,1,0,0,0,2
21590,FTA Transferred,FTASTPL,6264(091),Santa Clara Valley Transportation Authority,2021-11-22,2021-11-22,2021-11-22,2022-02-15,2022-02-25,1987000.00,...,1987001.69,0.00,0,0,0,0,0,1,0,1
21591,FTA Transferred,FTASTPL,6002(030),Alameda - Contra Costa Transit District,2021-11-22,2021-11-22,2021-11-22,2022-02-15,2022-02-25,1254000.00,...,1254001.07,0.00,0,0,0,0,0,1,0,1
21592,FTA Transferred,FTACML,6292(016),Mountain Area Regional Transit Authority,2022-02-18,2022-02-18,2022-02-18,2022-03-01,2022-03-02,438168.00,...,420385.73,0.00,0,0,0,0,0,1,0,1


## Another Way

In [64]:
df4 = pd.read_parquet("dla_df.parquet")

In [65]:
df4 = (df4>>filter(_.prepared_y<=2021)>>select(_.primary_agency_name,
                                              _.dist, 
                                              _.prepared_y, 
                                              _.prepared_date,
                                              _.total_requested,
                                              _.work_categories)
      )

In [66]:
df4["real_total"] = df4.apply(lambda x: cpi.inflate(x["total_requested"], x["prepared_date"]) if pd.notnull(x['prepared_date']) else None, axis=1)

In [67]:
df4

Unnamed: 0,primary_agency_name,dist,prepared_y,prepared_date,total_requested,work_categories,real_total
0,Humboldt County,1,2018.00,2018-12-18,0.00,1,0.00
1,Mendocino County,1,2018.00,2018-12-17,13000.00,1,14680.83
2,Humboldt County,1,2018.00,2018-12-07,51394.58,1,58039.61
3,Sacramento County,3,2018.00,2018-12-11,247002.00,2,278937.96
4,Sacramento County,3,2018.00,2018-12-05,5702041.00,1,6439282.52
...,...,...,...,...,...,...,...
21507,San Bernardino,8,2021.00,2021-08-19,4853917.00,0,5033991.36
21523,San Bernardino County,8,2021.00,2021-06-28,4203285.00,1,4389240.94
21554,Amador County,10,2021.00,2021-10-05,0.00,1,0.00
21590,Santa Clara Valley Transportation Authority,4,2021.00,2021-11-22,1987000.00,1,2028234.39
