# Generate national and state-level crosswalks
## 2000 block group parts to 2010 block groups

### NHGIS [block crosswalks](https://www.nhgis.org/user-resources/geographic-crosswalks)

**James D. Gaboardi, 06/2020**

In [1]:
%load_ext watermark
%watermark

2020-07-28T22:12:21-04:00

CPython 3.7.6
IPython 7.15.0

compiler   : Clang 9.0.1 
system     : Darwin
release    : 19.6.0
machine    : x86_64
processor  : i386
CPU cores  : 8
interpreter: 64bit


In [2]:
import nhgisxwalk
import inspect
import numpy
import pandas

%load_ext autoreload
%autoreload 2
%watermark -w
%watermark -iv

watermark 2.0.2
pandas     1.0.4
numpy      1.18.5
nhgisxwalk 0.0.6



### Source and target years for the crosswalk

In [3]:
source_year, target_year = "2000", "2010"
gj_src, gj_trg = "GJOIN%s"%source_year, "GJOIN%s"%target_year

### Source-target building base

In [4]:
base_xwalk_name = "nhgis_blk%s_blk%s_gj.zip" % (source_year, target_year)
base_xwalk_file = "../../crosswalks/%s" % base_xwalk_name
data_types = nhgisxwalk.str_types([gj_src, gj_trg])
base_xwalk = pandas.read_csv(base_xwalk_file, dtype=data_types)
base_xwalk.head()

Unnamed: 0,GJOIN2000,GJOIN2010,WEIGHT,PAREA
0,G01000100201001000,G01000100201002000,0.035897,0.008988
1,G01000100201001000,G01000100201002001,0.25333,0.263725
2,G01000100201001000,G01000100201002002,0.0,0.000385
3,G01000100201001000,G01000100201002003,0.076297,0.05543
4,G01000100201001000,G01000100201002004,0.032441,0.007543


### Source summary data

In [5]:
base_source_name = "%s_block/%s_block.csv" % (source_year, source_year)
base_source_file = "../../tabular_data/%s" % base_source_name

### Convenience code shorthand/lookup

In [6]:
nhgisxwalk.valid_geo_shorthand(shorthand_name=False)

{'block': 'blk',
 'block group part': 'bgp',
 'block group': 'bg',
 'tract': 'tr',
 'county': 'co'}

### Instantiate an `nhgisxwalk.GeoCrossWalk` object
##### see [nhgisxwalk.GeoCrossWalk](https://github.com/jGaboardi/nhgisxwalk/blob/92b4fe55de0a9c53d0315dcda8ec121faaf20aef/nhgisxwalk/geocrosswalk.py#L19) for full details

In [7]:
nhgisxwalk.desc_code_2000_SF1b

{'Persons': {'Persons': 'Universe',
  'NP001A': 'Source code',
  'FXS': 'NHGIS code',
  'Total': 'FXS001'},
 'Families': {'Families': 'Universe',
  'NP031A': 'Source code',
  'F2V': 'NHGIS code',
  'Total': 'F2V001'},
 'Households': {'Households': 'Universe',
  'NP010A': 'Source code',
  'FY4': 'NHGIS code',
  'Total': 'FY4001'},
 'Housing Units': {'Housing Units': 'Universe',
  'NH001A': 'Source code',
  'FV5': 'NHGIS code',
  'Total': 'FV5001'}}

In [8]:
input_vars = [
    nhgisxwalk.desc_code_2000_SF1b["Persons"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Families"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Households"]["Total"],
    nhgisxwalk.desc_code_2000_SF1b["Housing Units"]["Total"]
]
input_vars

['FXS001', 'F2V001', 'FY4001', 'FV5001']

In [9]:
input_var_tags = ["pop", "fam", "hh", "hu"]

In [10]:
bgp2000_to_bg2010 = nhgisxwalk.GeoCrossWalk(
    base_xwalk,
    source_year=source_year,
    target_year=target_year,
    source_geo="bgp",
    target_geo="bg",
    base_source_table=base_source_file,
    input_var=input_vars,
    weight_var=input_var_tags,
    keep_base=False,
    add_geoid=True
)
del base_xwalk
bgp2000_to_bg2010.xwalk

Unnamed: 0,bgp2000gj,bg2010gj,bg2010ge,wt_pop,wt_fam,wt_hh,wt_hu
0,G01000109017103220021100R2,G01000100211001,010010211001,1.0,1.0,1.0,1.0
1,G01000109017103220021100R3,G01000100211003,010010211003,1.0,1.0,1.0,1.0
2,G01000109017199999021100R1,G01000100211002,010010211002,1.0,1.0,1.0,1.0
3,G01000109017199999021100R2,G01000100211001,010010211001,1.0,1.0,1.0,1.0
4,G01000109017199999021100R3,G01000100208011,010010208011,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
501579,G56004509225599999951300U3,G56004509513003,560459513003,1.0,0.0,1.0,1.0
501580,G56004509352079125951100R1,G56004509511001,560459511001,1.0,1.0,1.0,1.0
501581,G56004509352099999951100R1,G56001109502002,560119502002,0.0,0.0,0.0,0.0
501582,G56004509352099999951100R1,G56004509511001,560459511001,1.0,1.0,1.0,1.0


### Write crosswalk to a `.csv`

In [11]:
nat_dir = "../../crosswalks/"
nhgisxwalk.xwalk_df_to_csv(
    dfkwds={
        "df": bgp2000_to_bg2010.xwalk,
        "xwalk_name": bgp2000_to_bg2010.xwalk_name
    },
    path=nat_dir
)

### Split by (target) state and write out

In [12]:
stfips_codes = nhgisxwalk.extract_unique_stfips(
    df=bgp2000_to_bg2010.xwalk, endpoint=bgp2000_to_bg2010.target
)
stfips_codes = sorted(list(stfips_codes))
stfips_codes

['01',
 '02',
 '04',
 '05',
 '06',
 '08',
 '09',
 '10',
 '11',
 '12',
 '13',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '40',
 '41',
 '42',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '50',
 '51',
 '53',
 '54',
 '55',
 '56']

In [13]:
state_dir = nat_dir + "nhgis_bgp2000_bg2010_state/"
for stfips in stfips_codes:
    xwalk_name = bgp2000_to_bg2010.xwalk_name
    source, target = bgp2000_to_bg2010.target, bgp2000_to_bg2010.target
    _stxwalk = nhgisxwalk.extract_state(
        bgp2000_to_bg2010.xwalk,
        stfips,
        xwalk_name,
        target,
        sort_by=[source, target]
    )
    dfkwds = {"df": _stxwalk, "stfips": stfips, "xwalk_name": xwalk_name}
    nhgisxwalk.xwalk_df_to_csv(dfkwds=dfkwds, path=state_dir)

-----------------------------------------------