In [None]:
import pandas as pd

from util import Pipeline

p = Pipeline('configs')

In [48]:
with pd.HDFStore('data/pipeline.h5') as store:
    tables = list(store.keys())
    test = store['adjusted_emp_change_targets']

In [49]:
test

Unnamed: 0,target_id,start,emp_chg,emp_chg_adj
0,1,2019,0,0
1,2,2019,16292,0
2,3,2019,0,0
3,4,2019,0,0
4,5,2019,0,0
...,...,...,...,...
133,162,2019,0,0
134,169,2019,279,0
135,163,2019,0,0
136,168,2019,280,0


In [None]:
ofm_estimates_2019_by_control_area

Unnamed: 0,block_geoid,housing_units,occupied_housing_units,group_quarters_population,household_population
0,530330001001000,0.000000,0.000000,0.0,0.000000
1,530330001001027,22.389000,22.137257,0.0,54.708000
2,530330001002003,19.365000,19.070932,0.0,56.066002
3,530330002001002,21.997999,21.879662,0.0,50.729000
4,530330002001013,10.000000,9.367275,0.0,23.754000
...,...,...,...,...,...
66229,530330226042007,0.000000,0.000000,0.0,0.000000
66230,530330226051011,71.999001,71.725670,0.0,164.225998
66231,530330226051012,0.000000,0.000000,0.0,0.000000
66232,530330226051017,23.978001,23.608957,0.0,60.000999


In [21]:
df = combine_targets(p, 'emp')

In [23]:
start_years = df['start'].unique().tolist()
start_years

[2019, 2020]

In [None]:
year = 2019

# get control to target lookup
control_target_lookup = p.get_table('control_target_lookup')

emp = (
    p.get_table(f'employment_{year}')
    .merge(control_target_lookup, on='control_id', how='left')
    .rename(columns={'TotEmpNoMil-ResCon': f'emp_{year}'})
    .merge(control_target_lookup[['control_id', 'target_id']], on='control_id', how='left')
    .groupby('target_id').sum().reset_index()
    [['target_id', f'emp_{year}']]
)

In [None]:
def sum_emp_to_target_area(pipeline, year):
    # target_type: 'total_pop' or 'units'

    p = pipeline
    
    # get control to target lookup
    control_target_lookup = p.get_table('control_target_lookup')
    
    # sum ofm estimates by control area
    ofm = (
        p.get_table(f'ofm_estimates_{year}_by_control_area')
        # add year suffix to ofm column
        .rename(columns={f'ofm_{target_type}':f'ofm_{target_type}_{year}'})
        # join to target ids
        .merge(control_target_lookup[['control_id', 'target_id']], on='control_id', how='left')
        # groupby sum to target id
        .groupby('target_id').sum().reset_index()
        # return only target id and needed ofm column
        [['target_id', f'ofm_{target_type}_{year}']]
    )
    return ofm

In [None]:
def get_emp_all_years(pipeline, start_years):
    p = pipeline
    base_year = p.settings['base_year']

    # create empty dataframe to hold all years of needed ofm columns
    ofm_all_years = pd.DataFrame()
    
    # loop through baseyear and start years and sum ofm to target area

    for start_year in list(set([base_year] + start_years)):
        ofm_df = sum_emp_to_target_area(p, start_year)

        # merge to all years dataframe
        ofm_all_years = ofm_all_years.merge(ofm_df, on='target_id', how='outer') if not ofm_all_years.empty else ofm_df

    return ofm_all_years