# Sample workflow of `nhgisxwalk`

## Starting from a subset of 2010 Wyoming blocks

For further background information see:
* **Schroeder, J. P**. 2007. *Target-density weighting interpolation and uncertainty evaluation for temporal analysis of census data*. Geographical Analysis 39 (3):311–335.

#### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)

In [1]:
%load_ext watermark
%watermark

2020-05-07T22:27:52-04:00

CPython 3.7.6
IPython 7.13.0

compiler   : Clang 9.0.1 
system     : Darwin
release    : 19.4.0
machine    : x86_64
processor  : i386
CPU cores  : 4
interpreter: 64bit


In [2]:
import nhgisxwalk
import glob
import inspect
import numpy
import pandas

%load_ext autoreload
%autoreload 2
%watermark -w
%watermark -iv

watermark 2.0.2
pandas     1.0.3
nhgisxwalk 0.0.1
numpy      1.18.1



### Source and target years for the crosswalk

In [3]:
source_year, target_year = "1990", "2010"

### Source-target building base

In [4]:
subset_data_dir = "../testing_data_subsets"
base_xwalk_name = "/nhgis_blk%s_blk%s_gj.csv.zip" % (source_year, target_year)
base_xwalk_file = subset_data_dir + base_xwalk_name
data_types = nhgisxwalk.str_types(["GJOIN%s"%source_year, "GJOIN%s"%target_year])
base_xwalk = pandas.read_csv(base_xwalk_file, index_col=0, dtype=data_types)
base_xwalk.head()

Unnamed: 0,GJOIN1990,GJOIN2010,WEIGHT,PAREA_VIA_BLK00
0,G08006900025701,G56002100019021461,0.0,6.228556e-06
1,G08012300024101,G56002100020004064,0.0,9.068587e-07
2,G08012300024102,G56002100020004064,0.0,1.449911e-07
3,G08012300024113,G56002100020004064,0.0,1.604626e-07
4,G08012300024114,G56002100020004064,0.0,9.604924e-07


### Convenience code shorthand/lookup

In [5]:
print(inspect.getsource(nhgisxwalk.valid_geo_shorthand))

def valid_geo_shorthand(shorthand_name=True):
    """Shorthand lookups for census geographies."""
    lookup = {
        "blk": "block",
        "bgp": "block group part",
        "bkg": "block group",
        "trt": "tract",
        "cty": "county",
    }
    if not shorthand_name:
        lookup = {v: k for k, v in lookup.items()}
    return lookup



In [6]:
nhgisxwalk.valid_geo_shorthand(shorthand_name=False)

{'block': 'blk',
 'block group part': 'bgp',
 'block group': 'bkg',
 'tract': 'trt',
 'county': 'cty'}

### Instantiate an `nhgisxwalk.GeoCrossWalk` object
##### see [nhgisxwalk.GeoCrossWalk](https://github.com/jGaboardi/nhgisxwalk/blob/92b4fe55de0a9c53d0315dcda8ec121faaf20aef/nhgisxwalk/geocrosswalk.py#L19) for full details

In [7]:
nhgisxwalk.desc_code_1990

{'Persons': {'Persons': 'Universe',
  'NP1': 'Source code',
  'ET1': 'NHGIS code',
  'Total': 'ET1001'},
 'Families': {'Families': 'Universe',
  'NP2': 'Source code',
  'EUD': 'NHGIS code',
  'Total': 'EUD001'},
 'Households': {'Households': 'Universe',
  'NP3': 'Source code',
  'EUO': 'NHGIS code',
  'Total': 'EUO001'},
 'Housing Units': {'Housing Units': 'Universe',
  'NH1': 'Source code',
  'ESA': 'NHGIS code',
  'Total': 'ESA001'}}

In [8]:
input_vars = [
    nhgisxwalk.desc_code_1990["Persons"]["Total"],
    nhgisxwalk.desc_code_1990["Families"]["Total"],
    nhgisxwalk.desc_code_1990["Households"]["Total"],
    nhgisxwalk.desc_code_1990["Housing Units"]["Total"]
]
input_vars

['ET1001', 'EUD001', 'EUO001', 'ESA001']

In [9]:
input_var_tags = ["pop", "fam", "hh", "hu"]

In [10]:
bgp1990_to_trt2010 = nhgisxwalk.GeoCrossWalk(
    base_xwalk,
    source_year=source_year,
    target_year=target_year,
    source_geo="bgp",
    target_geo="trt",
    base_source_table=subset_data_dir+"/1990_block.csv.zip",
    input_var=input_vars,
    weight_var=input_var_tags,
    stfips="56"
)
bgp1990_to_trt2010.xwalk.tail(15)

Unnamed: 0,bgp1990,trt2010,wt_pop,wt_fam,wt_hh,wt_hu
1304,G560045092255999999513009999999999921,G5600450951300,1.0,1.0,1.0,1.0
1305,G560045092255999999513009999999999922,G5600450951300,1.0,1.0,1.0,1.0
1306,G560045092255999999513009999999999923,G5600450951300,1.0,1.0,1.0,1.0
1307,G560045092255999999513009999999999924,G5600450951300,1.0,1.0,1.0,1.0
1308,G560045092255999999513009999999999925,G5600450951100,0.107631,0.103158,0.089892,0.082977
1309,G560045092255999999513009999999999925,G5600450951300,0.892369,0.896842,0.910108,0.917023
1310,G560045093520791259512009999999999926,G5600450951100,1.0,1.0,1.0,1.0
1311,G560045093520791259512009999999999927,G5600450951100,1.0,1.0,1.0,1.0
1312,G560045093520999999512009999999999921,G5600450951100,1.0,1.0,1.0,1.0
1313,G560045093520999999512009999999999922,G5600450951100,1.0,1.0,1.0,1.0


### Write crosswalk to a `.csv`

In [11]:
res_dir = "../../results/"
bgp1990_to_trt2010.xwalk_to_csv(loc=res_dir)

### Read crosswalk from a `.csv`

In [12]:
fname = res_dir + "bgp1990_to_trt2010_56"
bgp1990_to_trt2010 = nhgisxwalk.GeoCrossWalk.xwalk_from_csv(fname)
bgp1990_to_trt2010.tail(15)

Unnamed: 0.1,Unnamed: 0,bgp1990,trt2010,wt_pop,wt_fam,wt_hh,wt_hu
1304,1304,G560045092255999999513009999999999921,G5600450951300,1.0,1.0,1.0,1.0
1305,1305,G560045092255999999513009999999999922,G5600450951300,1.0,1.0,1.0,1.0
1306,1306,G560045092255999999513009999999999923,G5600450951300,1.0,1.0,1.0,1.0
1307,1307,G560045092255999999513009999999999924,G5600450951300,1.0,1.0,1.0,1.0
1308,1308,G560045092255999999513009999999999925,G5600450951100,0.107631,0.103158,0.089892,0.082977
1309,1309,G560045092255999999513009999999999925,G5600450951300,0.892369,0.896842,0.910108,0.917023
1310,1310,G560045093520791259512009999999999926,G5600450951100,1.0,1.0,1.0,1.0
1311,1311,G560045093520791259512009999999999927,G5600450951100,1.0,1.0,1.0,1.0
1312,1312,G560045093520999999512009999999999921,G5600450951100,1.0,1.0,1.0,1.0
1313,1313,G560045093520999999512009999999999922,G5600450951100,1.0,1.0,1.0,1.0


-----------------------------------------------