# Immigration automated V&V inputs

We *only* calculate these and not any simulation inputs because the calculation is simple enough that we chose to do it in the simulation.

In [1]:
import pandas as pd, numpy as np
import seaborn as sns
from numpy.random import default_rng
import scipy.stats as stats

pd.set_option('display.min_rows', 20)

! whoami
! date

zmbc
Mon Mar 20 11:19:32 PDT 2023


In [2]:
acs = pd.read_hdf('../data/acs_2020_5yr_person.hdf', key='acs')

In [3]:
# Duplicate indices! In the future, should probably deal with this in download_acs!
# Filter to relevant columns to save memory
acs = acs[['SERIALNO', 'MIG', 'RELSHIPP', 'PWGTP']].reset_index(drop=True)

In [4]:
# https://www2.census.gov/programs-surveys/acs/tech_docs/pums/data_dict/PUMS_Data_Dictionary_2016-2020.pdf
# Mobility status (lived here 1 year ago)
# 2 .No, outside US and Puerto Rico
recent_immigrants = acs[acs['MIG'] == 2].copy()
assert not recent_immigrants.SERIALNO.isnull().any()
assert not recent_immigrants.RELSHIPP.isnull().any()
recent_immigrants['gq_person'] = recent_immigrants.RELSHIPP.isin([37, 38])
recent_immigrants['ref_person'] = (recent_immigrants.RELSHIPP == 20)

recent_immigrants['move_type'] = np.where(
    recent_immigrants.gq_person,
    'gq_person',
    np.where(
        recent_immigrants.SERIALNO.isin(recent_immigrants[recent_immigrants.ref_person].SERIALNO),
        'household',
        'non_reference_person',
    ),
)

In [5]:
# The number of people entering the US per year:
recent_immigrants.PWGTP.sum()

1842835

In [6]:
recent_immigrants.groupby(['move_type']).PWGTP.sum()

move_type
gq_person               125443
household               970554
non_reference_person    746838
Name: PWGTP, dtype: int64

In [7]:
acs_households = pd.read_hdf('../data/acs_2020_5yr_household.hdf', key='acs')

In [8]:
assert recent_immigrants.SERIALNO.isin(acs_households.SERIALNO).all()

In [9]:
recent_immigrants['household_weight'] = recent_immigrants.SERIALNO.map(acs_households.set_index('SERIALNO').WGTP)

In [10]:
individual_immigration_events = recent_immigrants[recent_immigrants.move_type != 'household'].groupby(['move_type']).PWGTP.sum()
individual_immigration_events

move_type
gq_person               125443
non_reference_person    746838
Name: PWGTP, dtype: int64

In [11]:
assert (recent_immigrants[recent_immigrants.ref_person].move_type == 'household').all()
household_immigration_events = recent_immigrants[recent_immigrants.ref_person].groupby(['move_type']).household_weight.sum()
household_immigration_events

move_type
household    468080
Name: household_weight, dtype: int64

In [14]:
immigration_events = pd.concat([individual_immigration_events, household_immigration_events]).add_suffix('_immigration_events').sort_index()
immigration_events

move_type
gq_person_immigration_events               125443
household_immigration_events               468080
non_reference_person_immigration_events    746838
dtype: int64

In [17]:
import json
with open('v_and_v_inputs/immigration.json', 'w', encoding='utf-8') as f:
    json.dump({k: int(v) for k, v in immigration_events.items()}, f, ensure_ascii=False, indent=4)