# Sample workflow of `nhgisxwalk`

## Starting from a subset of 2010 District of Columbia blocks

For further background information see:
* **Schroeder, J. P**. 2007. *Target-density weighting interpolation and uncertainty evaluation for temporal analysis of census data*. Geographical Analysis 39 (3):311–335.

#### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)

In [1]:
%load_ext watermark
%watermark

2020-05-08T15:55:44-04:00

CPython 3.7.6
IPython 7.13.0

compiler   : Clang 9.0.1 
system     : Darwin
release    : 19.4.0
machine    : x86_64
processor  : i386
CPU cores  : 4
interpreter: 64bit


In [2]:
import nhgisxwalk
import inspect
import numpy
import pandas

%load_ext autoreload
%autoreload 2
%watermark -w
%watermark -iv

watermark 2.0.2
numpy      1.18.1
nhgisxwalk 0.0.1
pandas     1.0.3



### Source and target years for the crosswalk

In [3]:
source_year, target_year = "1990", "2010"

### Source-target building base

In [4]:
subset_data_dir = "../testing_data_subsets"
base_xwalk_name = "/nhgis_blk%s_blk%s_gj.csv.zip" % (source_year, target_year)
base_xwalk_file = subset_data_dir + base_xwalk_name
data_types = nhgisxwalk.str_types(["GJOIN%s"%source_year, "GJOIN%s"%target_year])
base_xwalk = pandas.read_csv(base_xwalk_file, index_col=0, dtype=data_types)
base_xwalk.head()

Unnamed: 0,GJOIN1990,GJOIN2010,WEIGHT,PAREA_VIA_BLK00,st2010
8149,,G11000100001004046,0.0,0.0,11
8150,,G11000100001004047,0.0,0.0,11
8151,,G11000100001004049,0.0,0.0,11
8152,,G11000100002024018,0.0,0.0,11
8153,,G11000100002024019,0.0,0.0,11


### Convenience code shorthand/lookup

In [5]:
print(inspect.getsource(nhgisxwalk.valid_geo_shorthand))

def valid_geo_shorthand(shorthand_name=True):
    """Shorthand lookups for census geographies."""
    lookup = {
        "blk": "block",
        "bgp": "block group part",
        "bkg": "block group",
        "trt": "tract",
        "cty": "county",
    }
    if not shorthand_name:
        lookup = {v: k for k, v in lookup.items()}
    return lookup



In [6]:
nhgisxwalk.valid_geo_shorthand(shorthand_name=False)

{'block': 'blk',
 'block group part': 'bgp',
 'block group': 'bkg',
 'tract': 'trt',
 'county': 'cty'}

### Instantiate an `nhgisxwalk.GeoCrossWalk` object
##### see [nhgisxwalk.GeoCrossWalk](https://github.com/jGaboardi/nhgisxwalk/blob/92b4fe55de0a9c53d0315dcda8ec121faaf20aef/nhgisxwalk/geocrosswalk.py#L19) for full details

In [7]:
nhgisxwalk.desc_code_1990

{'Persons': {'Persons': 'Universe',
  'NP1': 'Source code',
  'ET1': 'NHGIS code',
  'Total': 'ET1001'},
 'Families': {'Families': 'Universe',
  'NP2': 'Source code',
  'EUD': 'NHGIS code',
  'Total': 'EUD001'},
 'Households': {'Households': 'Universe',
  'NP3': 'Source code',
  'EUO': 'NHGIS code',
  'Total': 'EUO001'},
 'Housing Units': {'Housing Units': 'Universe',
  'NH1': 'Source code',
  'ESA': 'NHGIS code',
  'Total': 'ESA001'}}

In [8]:
input_vars = [
    nhgisxwalk.desc_code_1990["Persons"]["Total"],
    nhgisxwalk.desc_code_1990["Families"]["Total"],
    nhgisxwalk.desc_code_1990["Households"]["Total"],
    nhgisxwalk.desc_code_1990["Housing Units"]["Total"]
]
input_vars

['ET1001', 'EUD001', 'EUO001', 'ESA001']

In [9]:
input_var_tags = ["pop", "fam", "hh", "hu"]

In [10]:
subset_state = "11"
bgp1990_to_trt2010 = nhgisxwalk.GeoCrossWalk(
    base_xwalk,
    source_year=source_year,
    target_year=target_year,
    source_geo="bgp",
    target_geo="trt",
    base_source_table=subset_data_dir+"/1990_block.csv.zip",
    input_var=input_vars,
    weight_var=input_var_tags,
    stfips=subset_state
)
bgp1990_to_trt2010.xwalk[688:697]

Unnamed: 0,bgp1990,trt2010,wt_pop,wt_fam,wt_hh,wt_hu
688,G11000105000050000009806989999999884011,G1100010009811,1.0,1.0,1.0,1.0
689,G11000105000050000009806989999999884012,G1100010009810,0.414771,0.415454,0.396873,0.39507
690,G11000105000050000009806989999999884012,G1100010009811,0.585229,0.584546,0.603127,0.60493
691,G11000105000050000009807989999999884011,G1100010009807,1.0,1.0,1.0,1.0
692,G11000105000050000009807989999999884012,G1100010009807,1.0,1.0,1.0,1.0
693,G11000105000050000009807989999999884013,G1100010009807,1.0,1.0,1.0,1.0
694,G11000105000050000009808989999999884014,G1100010010900,1.0,1.0,1.0,1.0
695,G11000105000050000009809989999999884011,G1100010010400,1.0,,,
696,G11000105000050000009810989999999884011,G1100010009801,1.0,1.0,1.0,1.0


### Write crosswalk to a `.csv`

In [11]:
res_dir = "../../results/"
bgp1990_to_trt2010.xwalk_to_csv(loc=res_dir)

### Read crosswalk from a `.csv`

In [12]:
fname = res_dir + "bgp1990_to_trt2010_%s" % subset_state
bgp1990_to_trt2010 = nhgisxwalk.GeoCrossWalk.xwalk_from_csv(fname)
bgp1990_to_trt2010[688:697]

Unnamed: 0.1,Unnamed: 0,bgp1990,trt2010,wt_pop,wt_fam,wt_hh,wt_hu
688,688,G11000105000050000009806989999999884011,G1100010009811,1.0,1.0,1.0,1.0
689,689,G11000105000050000009806989999999884012,G1100010009810,0.414771,0.415454,0.396873,0.39507
690,690,G11000105000050000009806989999999884012,G1100010009811,0.585229,0.584546,0.603127,0.60493
691,691,G11000105000050000009807989999999884011,G1100010009807,1.0,1.0,1.0,1.0
692,692,G11000105000050000009807989999999884012,G1100010009807,1.0,1.0,1.0,1.0
693,693,G11000105000050000009807989999999884013,G1100010009807,1.0,1.0,1.0,1.0
694,694,G11000105000050000009808989999999884014,G1100010010900,1.0,1.0,1.0,1.0
695,695,G11000105000050000009809989999999884011,G1100010010400,1.0,,,
696,696,G11000105000050000009810989999999884011,G1100010009801,1.0,1.0,1.0,1.0


-----------------------------------------------