In [1]:
import pandas as pd, numpy as np
import seaborn as sns
from numpy.random import default_rng
import scipy.stats as stats

pd.set_option('display.min_rows', 20)

! whoami
! date

zmbc
Mon Jan  1 12:48:15 PM PST 2024


In [2]:
data_dir = '/ihme/scratch/users/zmbc/vivarium_research_prl/migration/data'
acs = pd.read_hdf(f'{data_dir}/acs_2020_5yr_household.hdf', key='acs')

In [3]:
# Duplicate indices! In the future, should probably deal with this in download_acs!
# Filter to relevant columns to save memory
acs = acs[['SERIALNO', 'TYPEHUGQ', 'ST', 'WGTP']].reset_index(drop=True)

In [4]:
# We want the proportion of the *households* in each state in ACS PUMS.
# That's because it's only the location of *households* that are independent
# of each other.
# The GQ population is a whole other issue (we know we are way off in the
# state distribution) which is ignored here.
state_proportions = (
    acs[acs['TYPEHUGQ'] == 1]
    .groupby("ST")
    .WGTP.sum()
)
state_proportions = state_proportions / state_proportions.sum()
state_proportions

ST
1     0.016401
2     0.002300
4     0.021964
5     0.009967
6     0.102656
8     0.017058
9     0.010989
10    0.003167
11    0.002306
12    0.069076
13    0.031276
15    0.003948
16    0.005327
17    0.038816
18    0.020976
19    0.010170
20    0.009249
21    0.014408
22    0.014987
23    0.005395
24    0.017768
25    0.021043
26    0.033315
27    0.017756
28    0.009614
29    0.020260
30    0.003720
31    0.006099
32    0.009164
33    0.004613
34    0.026213
35    0.006816
36    0.060412
37    0.033858
38    0.002720
39    0.037687
40    0.012576
41    0.012922
42    0.041272
44    0.003390
45    0.016753
46    0.002866
47    0.021643
48    0.080277
49    0.008021
50    0.002436
51    0.025556
53    0.022756
54    0.006455
55    0.019572
56    0.002012
Name: WGTP, dtype: float64

In [5]:
len(state_proportions)

51

In [6]:
import yaml
with open('v_and_v_inputs/household_structure.yaml', 'w', encoding='utf-8') as f:
    yaml.dump({
        'state_proportions': {
            'states': {k: float(v) for k, v in state_proportions.items()},
            # As in domestic migration, we specify a range of multiplicative
            # factors by which we would expect the proportion of the US population
            # in any state to drift for each year of population dynamics applied.
            # This is pretty conservative for big states, but there are some
            # small states that could change rather rapidly.
            'multiplicative_drift_per_year': {
                'lower_bound': 0.75,
                'upper_bound': 1.25,
            },
        },
    }, f, default_flow_style=False)