In [None]:
# This file is part of the Minnesota Population Center's NHGISXWALK.
# For copyright and licensing information, see the NOTICE and LICENSE files
# in this project's top-level directory, and also on-line at:
#   https://github.com/ipums/nhgisxwalk

# Generate national and state-level crosswalks
## 1990 block group parts to 2010 counties

### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)

**James D. Gaboardi, 06/2020**

In [1]:
%load_ext watermark
%watermark

2020-10-01T17:40:39-04:00

CPython 3.8.5
IPython 7.18.1

compiler   : Clang 10.0.1 
system     : Darwin
release    : 19.6.0
machine    : x86_64
processor  : i386
CPU cores  : 8
interpreter: 64bit


In [2]:
import nhgisxwalk
import inspect
import numpy
import pandas

%load_ext autoreload
%autoreload 2
%watermark -w
%watermark -iv

watermark 2.0.2
numpy      1.19.1
nhgisxwalk 0.0.9post1
pandas     1.1.1



### Source and target years for the crosswalk

In [3]:
source_year, target_year = "1990", "2010"
gj_src, gj_trg = "GJOIN%s"%source_year, "GJOIN%s"%target_year

In [4]:
# Set these to a local directory
data_in = "path/to/data/"
data_tab = "path/to/data/"

In [5]:
block_file = "%s_block" % source_year
supp_file = "%s_blck_grp_598" % source_year

### Source-target building base

In [6]:
base_xwalk_name = "nhgis_blk%s_blk%s_gj" % (source_year, target_year)
data_types = nhgisxwalk.str_types([gj_src, gj_trg])
from_csv_kws = {"path": data_in, "archived": True, "remove_unpacked": True}
read_csv_kws = {"dtype": data_types}
base_xwalk = nhgisxwalk.xwalk_df_from_csv(
    base_xwalk_name, **from_csv_kws, **read_csv_kws
)
base_xwalk.head()

Unnamed: 0,GJOIN1990,GJOIN2010,WEIGHT,PAREA_VIA_BLK00
0,G01000100201101A,G01000100201002004,0.000753,0.014284
1,G01000100201101A,G01000100201002005,0.04202,0.109618
2,G01000100201101A,G01000100201002006,0.262146,0.498133
3,G01000100201101A,G01000100201002016,0.237187,0.218109
4,G01000100201101A,G01000100201002023,0.099097,0.012864


### Source summary data

In [7]:
base_source_name = "%s/%s.csv" % (block_file, block_file)
base_source_file = "%s%s" % (data_tab, base_source_name)

### Source supplementary summary data (special case for 1990)

In [8]:
supp_source_name = "%s/%s.csv" % (supp_file, supp_file)
supp_source_file = "%s%s" % (data_tab, supp_source_name)

### Convenience code shorthand/lookup

In [9]:
nhgisxwalk.valid_geo_shorthand(shorthand_name=False)

{'block': 'blk',
 'block group part': 'bgp',
 'block group': 'bg',
 'tract': 'tr',
 'county': 'co'}

### Set the `nhgisxwalk.GeoCrossWalk` parameters
##### see [nhgisxwalk.GeoCrossWalk](https://github.com/ipums/nhgisxwalk/blob/92b4fe55de0a9c53d0315dcda8ec121faaf20aef/nhgisxwalk/geocrosswalk.py#L19) for full details

In [10]:
nhgisxwalk.desc_code_1990

{'Persons': {'Persons': 'Universe',
  'NP1': 'Source code',
  'ET1': 'NHGIS code',
  'Total': 'ET1001'},
 'Families': {'Families': 'Universe',
  'NP2': 'Source code',
  'EUD': 'NHGIS code',
  'Total': 'EUD001'},
 'Households': {'Households': 'Universe',
  'NP3': 'Source code',
  'EUO': 'NHGIS code',
  'Total': 'EUO001'},
 'Housing Units': {'Housing Units': 'Universe',
  'NH1': 'Source code',
  'ESA': 'NHGIS code',
  'Total': 'ESA001'}}

In [11]:
input_vars = [
    nhgisxwalk.desc_code_1990["Persons"]["Total"],
    nhgisxwalk.desc_code_1990["Families"]["Total"],
    nhgisxwalk.desc_code_1990["Households"]["Total"],
    nhgisxwalk.desc_code_1990["Housing Units"]["Total"]
]
input_vars

['ET1001', 'EUD001', 'EUO001', 'ESA001']

In [12]:
input_var_tags = ["pop", "fam", "hh", "hu"]

In [13]:
xwalk_args = {
    "source_year": source_year,
    "target_year": target_year,
    "source_geo": "bgp",
    "target_geo": "co",
    "base_source_table": base_source_file,
    "supp_source_table": supp_source_file,
    "input_var": input_vars,
    "weight_var": input_var_tags,
    "keep_base": False,
    "add_geoid": True
}

### Generate data product
1. Create a national crosswalk then split by state 
2. Write out all products with `README.txt` files

In [14]:
nhgisxwalk.generate_data_product(base_xwalk, xwalk_args, data_in)

-----------------------------------------------