## Adjusting for Inflation

In [1]:
import cpi
import pandas as pd
from siuba import *

pd.options.display.float_format = '{:.2f}'.format

In [2]:
df = pd.read_parquet("gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/dla_df.parquet")



In [3]:

# Inflation table
def inflation_table(base_year):
    #cpi.update()
    series_df = cpi.series.get(area="U.S. city average").to_dataframe()
    inflation_df = (series_df[series_df.year >= 2008]
           .pivot_table(index='year', values='value', aggfunc='mean')
           .reset_index()
          )
    denominator = inflation_df.value.loc[inflation_df.year==base_year].iloc[0]

    inflation_df = inflation_df.assign(
        multiplier = inflation_df.apply(lambda x: denominator / x.value, axis=1)
    )
    
    return inflation_df

In [4]:
cpi_table = inflation_table(2021)
cpi_table

Unnamed: 0,year,value,multiplier
0,2008,215.3,1.26
1,2009,214.54,1.26
2,2010,218.06,1.24
3,2011,224.94,1.2
4,2012,229.59,1.18
5,2013,232.96,1.16
6,2014,236.74,1.14
7,2015,237.02,1.14
8,2016,240.01,1.13
9,2017,245.12,1.11


In [5]:


keep_cols = [
    "primary_agency_name", "project_no", "prefix", "prepared_y",
]

orig = ["total_requested", 
        # "fed_requested", 
        # "ac_requested",  
]

adj = ["adjusted_total_requested", 
       # "adjusted_fed_requested", 
       # "adjusted_ac_requested"
]

df1 = df[keep_cols + orig + adj]

# Pick ones that have values so that we can check
df1 = df1[(df.total_requested > 0) & (df1.total_requested.notna())]
df1[orig] = df1[orig].astype(int)
# df = df[(df.fed_requested > 0) & (df.ac_requested > 0)]

In [6]:
df2 = pd.merge(df1, 
         cpi_table[["year", "multiplier"]],
         left_on = "prepared_y",
         right_on = "year",
         how = "left",
         validate = "m:1",
        )

In [7]:
for c in orig:
    new_col = f"{c}2"
    df2[new_col] = df2.apply(lambda x: x[c] * x.multiplier, axis=1)

In [8]:
for c in orig:
    new_col = f"compare_{c}"
    natalie_col = f"adjusted_{c}"
    tiff_col = f"{c}2"
    df2[new_col] = df2.apply(lambda x: 1 if x[natalie_col] == x[tiff_col]
                             else 0, axis=1)

In [9]:


# If everything is in 2021 dollars
# unadjusted and adjusted amts should be equal
(df2[df2.prepared_y ==2021])>>arrange(_.compare_total_requested)

Unnamed: 0,primary_agency_name,project_no,prefix,prepared_y,total_requested,adjusted_total_requested,year,multiplier,total_requested2,compare_total_requested
11068,Sacramento County,5924(252),CML,2021.00,405260,405260.35,2021.00,1.00,405260.00,0
11075,Folsom,5288(039),BRLS,2021.00,15456,15456.98,2021.00,1.00,15456.00,0
11082,El Cerrito,5239(029),CML,2021.00,965000,965000.82,2021.00,1.00,965000.00,0
11085,San Jose,5005(129),CML,2021.00,1049114,1049115.75,2021.00,1.00,1049114.00,0
11086,Marin County,5927(114),HSIPL,2021.00,711540,711540.69,2021.00,1.00,711540.00,0
...,...,...,...,...,...,...,...,...,...,...
12453,San Joaquin County,5929(288),HRRRL,2021.00,2245950,2245951.91,2021.00,1.00,2245950.00,0
12467,Fairfield,5132(049),CML,2021.00,3093815,3093817.98,2021.00,1.00,3093815.00,0
12481,Jackson,5141(015),BRLO,2021.00,100000,100000.09,2021.00,1.00,100000.00,0
12482,Riverbank,5255(052),CML,2021.00,674238,674238.57,2021.00,1.00,674238.00,0


In [10]:
for c in ["total_requested", "adjusted_total_requested", "total_requested2"]:
    num = df2[df2.prepared_y ==2021][c].iloc[0]
    print(num)

405260
405260.3451364289
405260.0


In [11]:
# If everything is in 2021 dollars, years prior to 2021 have multiplier < 1
# So adjusted values should be smaller than unadjusted
df2[df2.prepared_y == 2014][orig + adj]

Unnamed: 0,total_requested,adjusted_total_requested
5154,1968000,2252587.75
5161,1130683,1294188.35
5242,1130041,1293453.51
5281,1057741,1210698.38
5345,2008167,2298563.20
...,...,...
8301,5631,6445.72
8302,7575292,8670736.77
8307,28508,32631.17
8315,20987,24022.49


In [12]:
cpi.inflate(1968000.0, 2014)

2252589.2133008926

In [13]:
df2[df2.prepared_y == 2014][orig + ["total_requested2"]]

Unnamed: 0,total_requested,total_requested2
5154,1968000,2252585.83
5161,1130683,1294187.25
5242,1130041,1293452.41
5281,1057741,1210697.35
5345,2008167,2298561.25
...,...,...
8301,5631,6445.28
8302,7575292,8670729.38
8307,28508,32630.45
8315,20987,24021.86


In [18]:
test = pd.read_parquet('gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/dla_df.parquet')

In [19]:
test

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
0,Obligated,BPMP,5904(121),Humboldt County,2018-12-18,2018-12-18,2018-12-18,2018-12-18,2018-12-27,0.00,...,0.00,Large,0,0,1,0,0,0,0,1
1,Obligated,ER,32D0(008),Mendocino County,2018-12-17,2018-12-19,2018-12-20,2018-12-20,2018-12-27,11508.00,...,0.00,Large,0,0,0,0,0,1,0,1
2,Obligated,ER,4820(004),Humboldt County,2018-12-07,2018-12-21,2018-12-21,2018-12-21,2018-12-27,45499.64,...,0.00,Large,0,0,0,0,0,1,0,1
3,Obligated,CML,5924(244),Sacramento County,2018-12-11,2018-12-11,2018-12-21,2018-12-27,2018-12-27,207002.00,...,0.00,Large,1,0,0,0,0,1,0,2
4,Obligated,CML,5924(214),Sacramento County,2018-12-05,2018-12-11,2018-12-21,2018-12-27,2018-12-27,0.00,...,6130295.48,Large,0,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21786,FHWA,FTACML,5044(143),Visalia,2022-04-07,2022-04-07,2022-04-13,2022-04-13,NaT,1750000.00,...,0.00,Large,0,1,0,0,0,1,0,2
21788,HQ,FTACML,6065(255),La Co M T A,2022-03-28,2022-03-28,NaT,NaT,NaT,164010000.00,...,0.00,Large,0,0,0,1,0,0,0,1
21789,DISTRICT,HSIPL,7504(001),Caltrans,NaT,NaT,NaT,NaT,NaT,351200.00,...,,Large,0,0,0,1,0,0,0,1
21790,DISTRICT,FTACML,6071(166),Ora Co Trans Au,NaT,NaT,NaT,NaT,NaT,0.00,...,,Large,0,0,0,1,0,0,0,1


In [20]:

test2 = pd.merge(test, 
         cpi_table[["year", "multiplier"]],
         left_on = "prepared_y",
         right_on = "year",
         how = "left",
         validate = "m:1",
        )

In [21]:
orig = ["total_requested", 
        "fed_requested", 
        "ac_requested"]

for c in orig:
    new_col = f"{c}2"
    test2[new_col] = test2.apply(lambda x: x[c] * x.multiplier, axis=1)

In [22]:
test2

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,street,freeway,infra_resiliency_er,congestion_relief,work_categories,year,multiplier,total_requested2,fed_requested2,ac_requested2
0,Obligated,BPMP,5904(121),Humboldt County,2018-12-18,2018-12-18,2018-12-18,2018-12-18,2018-12-27,0.00,...,0,0,0,0,1,2018.00,1.08,0.00,0.00,0.00
1,Obligated,ER,32D0(008),Mendocino County,2018-12-17,2018-12-19,2018-12-20,2018-12-20,2018-12-27,11508.00,...,0,0,1,0,1,2018.00,1.08,14028.32,12418.30,0.00
2,Obligated,ER,4820(004),Humboldt County,2018-12-07,2018-12-21,2018-12-21,2018-12-21,2018-12-27,45499.64,...,0,0,1,0,1,2018.00,1.08,55459.97,49098.73,0.00
3,Obligated,CML,5924(244),Sacramento County,2018-12-11,2018-12-11,2018-12-21,2018-12-27,2018-12-27,207002.00,...,0,0,1,0,2,2018.00,1.08,266540.22,223376.16,0.00
4,Obligated,CML,5924(214),Sacramento County,2018-12-05,2018-12-11,2018-12-21,2018-12-27,2018-12-27,0.00,...,1,0,0,0,1,2018.00,1.08,6153080.88,0.00,6130290.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21201,FHWA,FTACML,5044(143),Visalia,2022-04-07,2022-04-07,2022-04-13,2022-04-13,NaT,1750000.00,...,0,0,1,0,2,2022.00,0.95,1668987.21,1668987.21,0.00
21202,HQ,FTACML,6065(255),La Co M T A,2022-03-28,2022-03-28,NaT,NaT,NaT,164010000.00,...,1,0,0,0,1,2022.00,0.95,156417481.13,156417481.13,0.00
21203,DISTRICT,HSIPL,7504(001),Caltrans,NaT,NaT,NaT,NaT,NaT,351200.00,...,1,0,0,0,1,,,,,
21204,DISTRICT,FTACML,6071(166),Ora Co Trans Au,NaT,NaT,NaT,NaT,NaT,0.00,...,1,0,0,0,1,,,,,


## Another Way

In [23]:
# Inflation table
def inflation_table2(base_year):
    cpi.update()
    series_df = cpi.series.get(area="U.S. city average").to_dataframe()
    inflation_df = (series_df[series_df.year >= 2008]
           .pivot_table(index='year', values='value', aggfunc='mean')
           .reset_index()
          )
    denominator = inflation_df.value.loc[inflation_df.year==base_year].iloc[0]

    inflation_df = inflation_df.assign(
        inflation = inflation_df.value.divide(denominator)
    )
    
    return inflation_df

In [24]:
inflation_table2(2021)

Unnamed: 0,year,value,inflation
0,2008,215.3,0.79
1,2009,214.54,0.79
2,2010,218.06,0.8
3,2011,224.94,0.83
4,2012,229.59,0.85
5,2013,232.96,0.86
6,2014,236.74,0.87
7,2015,237.02,0.87
8,2016,240.01,0.89
9,2017,245.12,0.9


In [25]:
df.sample()

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
20103,Obligated,STPL,5109(255),Bakersfield,2021-10-18,2021-10-18,2021-10-18,2021-10-18,2021-10-25,-284387.59,...,0.0,Large,1,0,0,1,0,1,0,3


In [26]:
def adjust_prices(df):
    
    cols =  ["total_requested",
           "fed_requested",
           "ac_requested"]
    
    ##get cpi table 
    cpi = inflation_table2(2021)
    cpi.update
    cpi = (cpi>>select(_.year, _.value))
    cpi_dict = dict(zip(cpi['year'], cpi['value']))
    
    
    for col in cols:
        multiplier = df["prepared_y"].map(cpi_dict)  
    
        ##using 270.97 for 2021 dollars
        df[f"adjusted_{col}"] = ((df[col] * 270.97) / multiplier)
    return df


In [27]:
df3 = adjust_prices(df)
df3

Unnamed: 0,location,prefix,project_no,agency,prepared_date,submit__to_hq_date,hq_review_date,submit_to_fhwa_date,to_fmis_date,fed_requested,...,adjusted_ac_requested,obligation_cat,active_transp,transit,bridge,street,freeway,infra_resiliency_er,congestion_relief,work_categories
0,Obligated,BPMP,5904(121),Humboldt County,2018-12-18,2018-12-18,2018-12-18,2018-12-18,2018-12-27,0.00,...,0.00,Large,0,0,1,0,0,0,0,1
1,Obligated,ER,32D0(008),Mendocino County,2018-12-17,2018-12-19,2018-12-20,2018-12-20,2018-12-27,11508.00,...,0.00,Large,0,0,0,0,0,1,0,1
2,Obligated,ER,4820(004),Humboldt County,2018-12-07,2018-12-21,2018-12-21,2018-12-21,2018-12-27,45499.64,...,0.00,Large,0,0,0,0,0,1,0,1
3,Obligated,CML,5924(244),Sacramento County,2018-12-11,2018-12-11,2018-12-21,2018-12-27,2018-12-27,207002.00,...,0.00,Large,1,0,0,0,0,1,0,2
4,Obligated,CML,5924(214),Sacramento County,2018-12-05,2018-12-11,2018-12-21,2018-12-27,2018-12-27,0.00,...,6130295.48,Large,0,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21786,FHWA,FTACML,5044(143),Visalia,2022-04-07,2022-04-07,2022-04-13,2022-04-13,NaT,1750000.00,...,0.00,Large,0,1,0,0,0,1,0,2
21788,HQ,FTACML,6065(255),La Co M T A,2022-03-28,2022-03-28,NaT,NaT,NaT,164010000.00,...,0.00,Large,0,0,0,1,0,0,0,1
21789,DISTRICT,HSIPL,7504(001),Caltrans,NaT,NaT,NaT,NaT,NaT,351200.00,...,,Large,0,0,0,1,0,0,0,1
21790,DISTRICT,FTACML,6071(166),Ora Co Trans Au,NaT,NaT,NaT,NaT,NaT,0.00,...,,Large,0,0,0,1,0,0,0,1


## Another Way

In [28]:
df4 = pd.read_parquet("gs://calitp-analytics-data/data-analyses/dla/e-76Obligated/dla_df.parquet")

In [29]:
df4 = (df4>>filter(_.prepared_y<=2021)>>select(_.primary_agency_name,
                                              _.dist, 
                                              _.prepared_y, 
                                              _.prepared_date,
                                              _.total_requested,
                                              _.work_categories)
      )

In [30]:
df4["real_total"] = df4.apply(lambda x: cpi.inflate(x["total_requested"], x["prepared_date"]) if pd.notnull(x['prepared_date']) else None, axis=1)

In [31]:
df4

Unnamed: 0,primary_agency_name,dist,prepared_y,prepared_date,total_requested,work_categories,real_total
0,Humboldt County,1,2018.00,2018-12-18,0.00,1,0.00
1,Mendocino County,1,2018.00,2018-12-17,13000.00,1,14876.84
2,Humboldt County,1,2018.00,2018-12-07,51394.58,1,58814.52
3,Sacramento County,3,2018.00,2018-12-11,247002.00,2,282662.16
4,Sacramento County,3,2018.00,2018-12-05,5702041.00,1,6525255.82
...,...,...,...,...,...,...,...
21581,Sonoma County,4,2021.00,2021-12-08,0.00,1,0.00
21694,Long Beach,7,2020.00,2020-11-06,0.00,2,0.00
21745,Jackson,10,2021.00,2021-12-03,100000.00,2,103121.21
21746,Riverbank,10,2021.00,2021-06-08,674238.00,1,713466.97
