In [1]:
import pandas as pd
from pathlib import Path
import getpass

user = getpass.getuser()

In [2]:
# First ingest Census data

census_dir = Path(r'X:\petrale\applications\travel_model_lu_inputs')

melt_vars = [
    'TOTHH',
    'TOTEMP',
    'RES_UNITS',
    'HHINCQ1',
    'HHINCQ2',
    'HHINCQ3',
    'HHINCQ4',
    'AGREMPN',
    'FPSEMPN',
    'HEREMPN',
    'MWTEMPN',
    'OTHEMPN',
    'RETEMPN',
]

dfs = []
for year in ['2010', '2020', '2023']:
    in_df = pd.read_csv(census_dir / year / f'TAZ1454 {year} Land Use.csv')
    
    # 2010 has RES_UNITS already but the other years do not
    if 'RES_UNITS' not in in_df.columns:
        in_df['RES_UNITS'] = in_df['SFDU'] + in_df['MFDU']
    
    # Melt only those columns that are present in this year
    long = in_df.melt(
        id_vars='ZONE',
        value_vars=[var for var in melt_vars if var in in_df.columns]
    )

    long['source'] = 'Census'
    long['year'] = year

    dfs.append(long)


In [3]:
# Next ingest model run summaries from a variety of file locations

scenarios = {
    'PBA50': {
        'path': Path(rf"C:\Users\{user}\Box\Modeling and Surveys\Urban Modeling\Bay Area UrbanSim\PBA50\Final Blueprint runs\Final Blueprint (s24)\BAUS v2.25 - FINAL VERSION"),
        'pattern': '*_taz_summaries_*'
    },
    'v0: PBA50 equivalent inputs': {
        'path': Path(r"\\lumodel3\LUModel3Share\baus_main_current_PBA50_inputs\outputs\pba50_fbp_pr319_v0\travel_model_summaries"),
        'pattern': 'taz1_summary_*'
    },
    'v1: BASIS buildings in dev pipeline': {
        'path': Path(r"\\lumodel3\LUModel3Share\baus_main_current_PBA50_inputs\outputs\pba50_fbp_pr319_v1\travel_model_summaries"),
        'pattern': 'taz1_summary_*'
    },
    'v2: BASIS buildings, updated control totals': {
        'path': Path(r"\\lumodel3\LUModel3Share\baus_main_current_PBA50_inputs\outputs\pba50_fbp_pr319_v2\travel_model_summaries"),
        'pattern': 'taz1_summary_*'
    },
}

for scenario, params in scenarios.items():
    for file in params['path'].glob(params['pattern']):
        if file.stem[-4:] in ['2010', '2020', '2025', '2035', '2050']:
            wide = pd.read_csv(file)
            long = wide.melt(
                id_vars='ZONE',
                value_vars=melt_vars
            )

            long['source'] = scenario
            long['year'] = file.stem[-4:]

            dfs.append(long)

df = pd.concat(dfs)

df


Unnamed: 0,ZONE,variable,value,source,year
0,1,TOTHH,25.0,Census,2010
1,2,TOTHH,135.0,Census,2010
2,3,TOTHH,270.0,Census,2010
3,4,TOTHH,58.0,Census,2010
4,5,TOTHH,524.0,Census,2010
...,...,...,...,...,...
18897,1450,RETEMPN,451.0,"v2: BASIS buildings, updated control totals",2050
18898,1451,RETEMPN,107.0,"v2: BASIS buildings, updated control totals",2050
18899,1452,RETEMPN,133.0,"v2: BASIS buildings, updated control totals",2050
18900,1453,RETEMPN,0.0,"v2: BASIS buildings, updated control totals",2050


In [4]:
# Linearly interpolate to generate 2023 estimates

for scenario in scenarios.keys():
    values_2020 = df.loc[(df['source'] == scenario) & (df['year'] == '2020'), 'value']
    values_2025 = df.loc[(df['source'] == scenario) & (df['year'] == '2025'), 'value']
    assert len(values_2020) == len(values_2025)
    values_2023 = values_2020 + (values_2025 - values_2020) * (3 / 5)

    # Construct 2023 records
    id_cols = df.loc[(df['source'] == scenario) & (df['year'] == '2020'), ['ZONE', 'variable']]
    df_2023 = pd.concat([id_cols, values_2023], axis=1)
    df_2023['source'] = scenario
    df_2023['year'] = '2023'
    
    dfs.append(df_2023)

In [5]:
out_df = pd.concat(dfs)

out_df.to_csv('taz_data_long.csv', index=False)