In [1]:
import pandas as pd

## Load in the "rosetta stone" file

I made this file using QGIS, the open-source mapping software. I loaded in the US Census 2010 block-level shapefile for Hennipin County. I then used the block centroids, provided by the census, to colect them within each zone. Since the centroids, by nature, are a "half a block" from the nearest street, this is more reliable than a polygon-in-polygon calculation. I then inspected the map visually for outliers.

I'll write up my steps for that soonest.

In [2]:
rosetta_df = pd.read_csv('../data/minneapolis/rosetta_nabes.csv')

In [3]:
rosetta_df

Unnamed: 0,GEOID10,FID,BDNAME,BDNUM,TEXT_NBR
0,270531260003016,1,Phillips West,90,90
1,270531260003001,1,Phillips West,90,90
2,270531260002001,1,Phillips West,90,90
3,270531260001000,1,Phillips West,90,90
4,270531260002006,1,Phillips West,90,90
...,...,...,...,...,...
5909,270530083002009,87,Lyndale,53,53
5910,270530082003000,87,Lyndale,53,53
5911,270530082004000,87,Lyndale,53,53
5912,270530083002008,87,Lyndale,53,53


## Load in the population data

I downloaded the population files from [census.data.gov](https://census.data.gov). 

Here are the [P3 and P5 census table files for Cook County](https://s3.amazonaws.com/media.johnkeefe.net/census-by-precinct/17031_Cook_County.zip). And here is the ["productDownload_2020-06-07T173132" zip file](https://s3.amazonaws.com/media.johnkeefe.net/census-by-precinct/productDownload_2020-06-07T173132.zip). It's a little messy, and the census doesn't label the files well, but I'm providing them as I got them. The CSVs you need are in there! Adjust your paths accordingly.

In [11]:
# census P3 for county by block
p3_df = pd.read_csv('/Volumes/JK_Smarts_Data/precinct_project/MN/productDownload_2020-06-19T224000/DECENNIALSF12010.P3_data_with_overlays_2020-06-19T223910.csv')

In [12]:
p3_df

Unnamed: 0,GEO_ID,NAME,P003001,P003002,P003003,P003004,P003005,P003006,P003007,P003008
0,id,Geographic Area Name,Total,Total!!White alone,Total!!Black or African American alone,Total!!American Indian and Alaska Native alone,Total!!Asian alone,Total!!Native Hawaiian and Other Pacific Islan...,Total!!Some Other Race alone,Total!!Two or More Races
1,1000000US270530203021004,"Block 1004, Block Group 1, Census Tract 203.02...",0,0,0,0,0,0,0,0
2,1000000US270530203031003,"Block 1003, Block Group 1, Census Tract 203.03...",34,22,10,0,0,0,0,2
3,1000000US270530259033011,"Block 3011, Block Group 3, Census Tract 259.03...",109,102,0,3,0,0,3,1
4,1000000US270530265083010,"Block 3010, Block Group 3, Census Tract 265.08...",138,125,1,0,4,0,0,8
...,...,...,...,...,...,...,...,...,...,...
19171,1000000US270530267022035,"Block 2035, Block Group 2, Census Tract 267.02...",0,0,0,0,0,0,0,0
19172,1000000US270530238013013,"Block 3013, Block Group 3, Census Tract 238.01...",37,30,0,0,7,0,0,0
19173,1000000US270530252055008,"Block 5008, Block Group 5, Census Tract 252.05...",61,53,2,0,2,0,3,1
19174,1000000US270530262053021,"Block 3021, Block Group 3, Census Tract 262.05...",0,0,0,0,0,0,0,0


In [13]:
p3_df.reset_index()
p3_df.drop(0, inplace=True)

In [14]:
p5_df = pd.read_csv('/Volumes/JK_Smarts_Data/precinct_project/MN/productDownload_2020-06-19T224000/DECENNIALSF12010.P5_data_with_overlays_2020-06-19T223910.csv')

In [15]:
p5_df.reset_index()
p5_df.drop(0, inplace=True)

In [16]:
p3_df.shape, p5_df.shape

((19175, 10), (19175, 19))

In [17]:
population_df = p3_df.merge(p5_df, on='GEO_ID')

In [18]:
population_df.shape

(19175, 28)

In [19]:
population_df

Unnamed: 0,GEO_ID,NAME_x,P003001,P003002,P003003,P003004,P003005,P003006,P003007,P003008,...,P005008,P005009,P005010,P005011,P005012,P005013,P005014,P005015,P005016,P005017
0,1000000US270530203021004,"Block 1004, Block Group 1, Census Tract 203.02...",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1000000US270530203031003,"Block 1003, Block Group 1, Census Tract 203.03...",34,22,10,0,0,0,0,2,...,0,2,0,0,0,0,0,0,0,0
2,1000000US270530259033011,"Block 3011, Block Group 3, Census Tract 259.03...",109,102,0,3,0,0,3,1,...,0,1,3,0,0,0,0,0,3,0
3,1000000US270530265083010,"Block 3010, Block Group 3, Census Tract 265.08...",138,125,1,0,4,0,0,8,...,0,7,1,0,0,0,0,0,0,1
4,1000000US270530260182005,"Block 2005, Block Group 2, Census Tract 260.18...",39,37,0,0,2,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19170,1000000US270530267022035,"Block 2035, Block Group 2, Census Tract 267.02...",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19171,1000000US270530238013013,"Block 3013, Block Group 3, Census Tract 238.01...",37,30,0,0,7,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19172,1000000US270530252055008,"Block 5008, Block Group 5, Census Tract 252.05...",61,53,2,0,2,0,3,1,...,0,1,5,2,0,0,0,0,3,0
19173,1000000US270530262053021,"Block 3021, Block Group 3, Census Tract 262.05...",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
rosetta_df.shape

(5914, 5)

In [22]:
rosetta_df.dtypes

GEOID10      int64
FID          int64
BDNAME      object
BDNUM        int64
TEXT_NBR     int64
dtype: object

In [23]:
population_df.dtypes

GEO_ID     object
NAME_x     object
P003001    object
P003002    object
P003003    object
P003004    object
P003005    object
P003006    object
P003007    object
P003008    object
NAME_y     object
P005001    object
P005002    object
P005003    object
P005004    object
P005005    object
P005006    object
P005007    object
P005008    object
P005009    object
P005010    object
P005011    object
P005012    object
P005013    object
P005014    object
P005015    object
P005016    object
P005017    object
dtype: object

In [24]:
population_df['GEOID10'] = population_df['GEO_ID'].str[9:].astype(int)

In [25]:
population_df.drop(columns=['NAME_y'], inplace = True)

In [26]:
## Add demographic data to each chicago PD district block
block_data = rosetta_df.merge(population_df, on="GEOID10", how="left")

In [27]:
block_data.shape

(5914, 32)

In [28]:
block_data

Unnamed: 0,GEOID10,FID,BDNAME,BDNUM,TEXT_NBR,GEO_ID,NAME_x,P003001,P003002,P003003,...,P005008,P005009,P005010,P005011,P005012,P005013,P005014,P005015,P005016,P005017
0,270531260003016,1,Phillips West,90,90,1000000US270531260003016,"Block 3016, Block Group 3, Census Tract 1260, ...",8,1,6,...,0,0,1,0,0,0,0,0,1,0
1,270531260003001,1,Phillips West,90,90,1000000US270531260003001,"Block 3001, Block Group 3, Census Tract 1260, ...",0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,270531260002001,1,Phillips West,90,90,1000000US270531260002001,"Block 2001, Block Group 2, Census Tract 1260, ...",47,6,11,...,1,0,3,0,1,2,0,0,0,0
3,270531260001000,1,Phillips West,90,90,1000000US270531260001000,"Block 1000, Block Group 1, Census Tract 1260, ...",256,116,84,...,0,10,58,32,0,1,0,0,20,5
4,270531260002006,1,Phillips West,90,90,1000000US270531260002006,"Block 2006, Block Group 2, Census Tract 1260, ...",25,16,7,...,0,0,1,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5909,270530083002009,87,Lyndale,53,53,1000000US270530083002009,"Block 2009, Block Group 2, Census Tract 83, He...",94,69,6,...,0,5,21,7,0,0,0,0,13,1
5910,270530082003000,87,Lyndale,53,53,1000000US270530082003000,"Block 3000, Block Group 3, Census Tract 82, He...",108,72,13,...,0,5,21,3,2,0,0,0,16,0
5911,270530082004000,87,Lyndale,53,53,1000000US270530082004000,"Block 4000, Block Group 4, Census Tract 82, He...",257,86,36,...,0,10,155,35,2,0,0,0,110,8
5912,270530083002008,87,Lyndale,53,53,1000000US270530083002008,"Block 2008, Block Group 2, Census Tract 83, He...",123,86,19,...,0,7,14,9,0,0,0,0,4,1


In [30]:
# need to make all those columns numeric
block_data[['P003001', 'P003002', 'P003003', 'P003004',
       'P003005', 'P003006', 'P003007', 'P003008', 'P005001', 'P005002',
       'P005003', 'P005004', 'P005005', 'P005006', 'P005007', 'P005008',
       'P005009', 'P005010', 'P005011', 'P005012', 'P005013', 'P005014',
       'P005015', 'P005016', 'P005017']] = block_data[['P003001', 'P003002', 'P003003', 'P003004',
       'P003005', 'P003006', 'P003007', 'P003008', 'P005001', 'P005002',
       'P005003', 'P005004', 'P005005', 'P005006', 'P005007', 'P005008',
       'P005009', 'P005010', 'P005011', 'P005012', 'P005013', 'P005014',
       'P005015', 'P005016', 'P005017']].apply(pd.to_numeric)

In [32]:
block_data.to_csv('./temp_data/mpls_2010blocks_2020nabes_population.csv', index=False)

-----------------------

**Note**: I stopped here because I'm going to publish the rest using Datasette

Done!