# Sample workflow: 2000 block group parts to 2010 tracts

## Starting from a subset of 2010 District of Columbia blocks

For further background information see:
* **Schroeder, J. P**. 2007. *Target-density weighting interpolation and uncertainty evaluation for temporal analysis of census data*. Geographical Analysis 39 (3):311â€“335.

#### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)

In [1]:
%load_ext watermark
%watermark

2020-05-12T21:13:31-04:00

CPython 3.7.6
IPython 7.13.0

compiler   : Clang 9.0.1 
system     : Darwin
release    : 19.4.0
machine    : x86_64
processor  : i386
CPU cores  : 4
interpreter: 64bit


In [2]:
import nhgisxwalk
import inspect
import numpy
import pandas

%load_ext autoreload
%autoreload 2
%watermark -w
%watermark -iv

watermark 2.0.2
numpy      1.18.1
pandas     1.0.3
nhgisxwalk 0.0.1



### Source and target years for the crosswalk

In [3]:
source_year, target_year = "2000", "2010"

### Source-target building base

In [4]:
subset_data_dir = "../testing_data_subsets"
base_xwalk_name = "/nhgis_blk%s_blk%s_gj.csv.zip" % (source_year, target_year)
base_xwalk_file = subset_data_dir + base_xwalk_name
data_types = nhgisxwalk.str_types(["GJOIN%s"%source_year, "GJOIN%s"%target_year])
base_xwalk = pandas.read_csv(base_xwalk_file, index_col=0, dtype=data_types)
base_xwalk.head()

Unnamed: 0,GJOIN2000,GJOIN2010,WEIGHT,PAREA
2028146,G11000100001001000,G11000100001001000,1.0,1.0
2028147,G11000100001001001,G11000100001001001,1.0,1.0
2028148,G11000100001001002,G11000100001001005,1.0,1.0
2028149,G11000100001001003,G11000100001001004,1.0,1.0
2028150,G11000100001001004,G11000100001001002,1.0,1.0


### Convenience code shorthand/lookup

In [5]:
print(inspect.getsource(nhgisxwalk.valid_geo_shorthand))

def valid_geo_shorthand(shorthand_name=True):
    """Shorthand lookups for census geographies."""
    lookup = {
        "blk": "block",
        "bgp": "block group part",
        "bkg": "block group",
        "trt": "tract",
        "cty": "county",
    }
    if not shorthand_name:
        lookup = {v: k for k, v in lookup.items()}
    return lookup



In [6]:
nhgisxwalk.valid_geo_shorthand(shorthand_name=False)

{'block': 'blk',
 'block group part': 'bgp',
 'block group': 'bkg',
 'tract': 'trt',
 'county': 'cty'}

### Instantiate an `nhgisxwalk.GeoCrossWalk` object
##### see [nhgisxwalk.GeoCrossWalk](https://github.com/jGaboardi/nhgisxwalk/blob/92b4fe55de0a9c53d0315dcda8ec121faaf20aef/nhgisxwalk/geocrosswalk.py#L19) for full details

In [7]:
nhgisxwalk.desc_code_2000_SF1b

{'Persons': {'Persons': 'Universe',
  'NP001A': 'Source code',
  'FXS': 'NHGIS code',
  'Total': 'FXS001'},
 'Families': {'Families': 'Universe',
  'NP031A': 'Source code',
  'F2V': 'NHGIS code',
  'Total': 'F2V001'},
 'Households': {'Households': 'Universe',
  'NP010A': 'Source code',
  'FY4': 'NHGIS code',
  'Total': 'FY4001'},
 'Housing Units': {'Housing Units': 'Universe',
  'NH001A': 'Source code',
  'FV5': 'NHGIS code',
  'Total': 'FV5001'},
 'Population by Urban and Rural': {'Persons': 'Universe',
  'NP002A': 'Source code',
  'FXT': 'NHGIS code',
  'Urban': 'FXT001',
  'Rural': 'FXT002',
  'Not defined for this file': 'FXT003'}}

In [8]:
input_vars = [
    nhgisxwalk.desc_code_2000_SF1b["Persons"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Families"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Households"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Housing Units"]["Total"]
]
input_vars

['FXS001', 'F2V001', 'FY4001', 'FV5001']

In [9]:
input_var_tags = ["pop", "fam", "hh", "hu"]

In [10]:
subset_state = "11"
bgp2000_to_trt2010 = nhgisxwalk.GeoCrossWalk(
    base_xwalk,
    source_year=source_year,
    target_year=target_year,
    source_geo="bgp",
    target_geo="trt",
    base_source_table=subset_data_dir+"/2000_block.csv.zip",
    input_var=input_vars,
    weight_var=input_var_tags,
    stfips=subset_state
)
bgp2000_to_trt2010.xwalk[674:685]

Unnamed: 0,bgp2000,trt2010,wt_pop,wt_fam,wt_hh,wt_hu
674,G1101050000500009803U3,G1100010009803,1.0,1.0,1.0,1.0
675,G1101050000500009803U4,G1100010009803,1.0,1.0,1.0,1.0
676,G1101050000500009804U1,G1100010009804,1.0,1.0,1.0,1.0
677,G1101050000500009806R2,G1100010009810,0.0,0.0,0.0,0.0
678,G1101050000500009806U1,G1100010009811,1.0,1.0,1.0,1.0
679,G1101050000500009806U2,G1100010009810,0.423448,0.431075,0.404344,0.404372
680,G1101050000500009806U2,G1100010009811,0.576552,0.568925,0.595656,0.595628
681,G1101050000500009806U3,G1100010009810,1.0,1.0,1.0,1.0
682,G1101050000500009807R1,G1100010009807,0.0,0.0,0.0,0.0
683,G1101050000500009807R3,G1100010009807,0.0,0.0,0.0,0.0


### Write crosswalk to a `.csv`

In [11]:
res_dir = "../../results/"
bgp2000_to_trt2010.xwalk_to_csv(loc=res_dir)

### Read crosswalk from a `.csv`

In [12]:
fname = res_dir + "bgp2000_to_trt2010_%s" % subset_state
bgp2000_to_trt2010 = nhgisxwalk.GeoCrossWalk.xwalk_from_csv(fname)
bgp2000_to_trt2010[674:685]

Unnamed: 0,bgp2000,trt2010,wt_pop,wt_fam,wt_hh,wt_hu
674,G1101050000500009803U3,G1100010009803,1.0,1.0,1.0,1.0
675,G1101050000500009803U4,G1100010009803,1.0,1.0,1.0,1.0
676,G1101050000500009804U1,G1100010009804,1.0,1.0,1.0,1.0
677,G1101050000500009806R2,G1100010009810,0.0,0.0,0.0,0.0
678,G1101050000500009806U1,G1100010009811,1.0,1.0,1.0,1.0
679,G1101050000500009806U2,G1100010009810,0.423448,0.431075,0.404344,0.404372
680,G1101050000500009806U2,G1100010009811,0.576552,0.568925,0.595656,0.595628
681,G1101050000500009806U3,G1100010009810,1.0,1.0,1.0,1.0
682,G1101050000500009807R1,G1100010009807,0.0,0.0,0.0,0.0
683,G1101050000500009807R3,G1100010009807,0.0,0.0,0.0,0.0


-----------------------------------------------