In [38]:
import geopandas
import pandas as pd
import maup
import sys

import shapely.geometry

sys.path.insert(0, '..')
from read_config import read_yaml

In [55]:
config = read_yaml('../preprocessing_config.yaml')
main_path = '../../'
districts_geo_path = main_path + config['PREPROCESSING']['DATA']['NEVADA']['UNPROCESSED']['GEOMETRY']['DISTRICTS']
precincts_geo_path = main_path + config['PREPROCESSING']['DATA']['NEVADA']['UNPROCESSED']['GEOMETRY']['PRECINCTS']
cb_geo_path = main_path + config['PREPROCESSING']['DATA']['NEVADA']['UNPROCESSED']['GEOMETRY']['CENSUS_BLOCKS']
cb_info_path = main_path + config['PREPROCESSING']['DATA']['NEVADA']['UNPROCESSED']['INFORMATION']['CENSUS_BLOCKS']
county_election_path = main_path + config['PREPROCESSING']['DATA']['NEVADA']['UNPROCESSED']['INFORMATION']['COUNTY_ELECTION']
county_name_fips_path = main_path + config['PREPROCESSING']['DATA']['NEVADA']['UNPROCESSED']['INFORMATION']['COUNTY_NAME_FIPS']

In [66]:
districts_geo = geopandas.read_file(districts_geo_path)
precincts_geo = geopandas.read_file(precincts_geo_path)
cb_geo = geopandas.read_file(cb_geo_path)
cb_info = pd.read_csv(cb_info_path)
county_election = pd.read_excel(county_election_path)
county_name_fips = pd.read_csv(county_name_fips_path)

In [67]:
districts_geo.to_crs(cb_geo.crs, inplace=True)
precincts_geo.to_crs(cb_geo.crs, inplace=True)

### Clean and prepare dataframes for merges

In [68]:
print(cb_geo.columns)
print(cb_info.columns)
cb_geo = cb_geo[['GEOID20', 'geometry']]
#cb_info = cb_info[['COUNTYFP20','GEOID20', 'ADJPOP', 'TAWHITEALN', 'TABLACKCMB', 'TAASIANCMB', 'TAHISPANIC']]
cb_geo.rename(columns={'GEOID20': 'blockId'}, inplace=True)
# cb_info.rename(columns={'COUNTYFP20':'fips','GEOID20': 'blockId', 'ADJPOP': 'population', 'TAWHITEALN': 'white', 'TABLACKCMB': 'black',
#                         'TAASIANCMB': 'asian', 'TAHISPANIC': 'hispanic'}, inplace=True)
cb_geo['blockId'] = cb_geo['blockId'].apply(lambda col: str(col))
cb_info['blockId'] = cb_geo['blockId'].apply(lambda col: str(col))

districts_geo = districts_geo[['District', 'geometry']]
districts_geo.rename(columns={'District': 'districtId'}, inplace=True)
# precincts_geo = precincts_geo[['GEOID10', 'geometry']]
# precincts_geo.rename(columns={'GEOID10': 'precinctId'}, inplace=True)

county_election = county_election[['county','Democrat','Republican', 'Total']]
county_election.rename(columns = {'Democrat':'democrat', 'Republican':'republican', 'Total':'voters'}, inplace=True)
county_name_fips['county'] = county_name_fips['county'].apply(lambda x: x.rsplit(' ', 1)[0])
county_name_fips['fips'] = county_name_fips['fips'].apply(lambda x: int(x.split(' ')[0][2:]))
county_name_fips.drop(columns=['state'], inplace= True)
county_election = county_election.merge(county_name_fips, on='county')

Index(['STATEFP20', 'COUNTYFP20', 'TRACTCE20', 'BLOCKCE20', 'GEOID20',
       'NAME20', 'MTFCC20', 'UR20', 'UACE20', 'UATYPE20', 'FUNCSTAT20',
       'ALAND20', 'AWATER20', 'INTPTLAT20', 'INTPTLON20', 'geometry'],
      dtype='object')
Index(['Unnamed: 0', 'fips', 'blockId', 'population', 'white', 'black',
       'asian', 'hispanic'],
      dtype='object')


### Approximate democrats and republicans per census block using county data

In [69]:
cb_info.to_csv("haha.csv")

In [70]:
cb_per_county = cb_info[['blockId','fips']].groupby(by='fips').count()
cb_per_county.reset_index(level=0, inplace = True)
cb_per_county.rename(columns={'blockId':'cbCount'},inplace = True)
county_election = county_election.merge(cb_per_county, on='fips')
county_election['democrat'] = county_election['democrat']/county_election['cbCount']
county_election['republican'] = county_election['republican']/county_election['cbCount']
county_election['voters'] = county_election['voters']/county_election['cbCount']
county_election = county_election[['fips','democrat','republican','voters']]

### Merge geometric data with demographics and election data

In [71]:
cb = cb_geo.merge(cb_info, on='blockId').merge(county_election, on='fips')
print(cb.isna().sum())
print(len(cb))
assert (cb.isna().sum().sum() == 0), "imperfect matching in merging"

blockId       0
geometry      0
Unnamed: 0    0
fips          0
population    0
white         0
black         0
asian         0
hispanic      0
democrat      0
republican    0
voters        0
dtype: int64
57409


In [72]:
### Assign census blocks to districts and precincts

In [73]:
assignment_cb_districts = maup.assign(cb, districts_geo)
assert (assignment_cb_districts.isna().sum() == 0), "Error in census block to district assignment"
cb['district'] = assignment_cb_districts
assignment_cb_precincts = maup.assign(cb, precincts_geo)
assert (assignment_cb_precincts.isna().sum() == 0), "Error in census block to precinct assignment"
cb['precinct'] = assignment_cb_precincts

  geometry.index = i
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]

  geometries = geometries[geometries.area > area_cutoff]

  return assign_to_max(intersections(sources, targets, area_cutoff=0).area)
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', y

In [74]:
districts_geo['index'] = districts_geo.index
cb = cb.merge(districts_geo[['districtId', 'index']], left_on='district', right_on='index').drop(
    columns=['index', 'district'])
precincts_geo['index'] = precincts_geo.index
cb = cb.merge(precincts_geo[['precinctId', 'index']], left_on='precinct', right_on='index').drop(
    columns=['index', 'precinct'])
cb['districtingId'] = 'enacted'
cb = cb[['blockId', 'precinctId', 'districtId', 'districtingId', 'population', 'white', 'black', 'asian', 'hispanic',
         'democrat','republican','voters','geometry']]
cb

Unnamed: 0,blockId,precinctId,districtId,districtingId,population,white,black,asian,hispanic,democrat,republican,voters,geometry
0,320019505001019,3200116,2,enacted,0,0,0,0,0,1.779045,5.443336,9.289380,"POLYGON ((-118.64108 39.51654, -118.64108 39.5..."
1,320019501001041,3200116,2,enacted,0,0,0,0,0,1.779045,5.443336,9.289380,"POLYGON ((-118.12490 39.63694, -118.12489 39.6..."
2,320019501001193,3200116,2,enacted,0,0,0,0,0,1.779045,5.443336,9.289380,"POLYGON ((-118.57486 39.51611, -118.57486 39.5..."
3,320019501001035,3200116,2,enacted,0,0,0,0,0,1.779045,5.443336,9.289380,"POLYGON ((-117.86432 39.95896, -117.86416 39.9..."
4,320019501001115,3200116,2,enacted,84,81,0,0,0,1.779045,5.443336,9.289380,"POLYGON ((-118.44116 39.63778, -118.44111 39.6..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
57404,320030034233001,320036907,1,enacted,39,36,3,3,3,3.466162,7.523598,15.046305,"POLYGON ((-115.20695 36.19303, -115.20634 36.1..."
57405,320030001012010,320036331,1,enacted,31,12,0,0,17,1.715757,3.098659,6.584291,"POLYGON ((-115.20605 36.17734, -115.20436 36.1..."
57406,320030001012011,320036331,1,enacted,21,8,5,4,5,11.371094,12.182333,32.351549,"POLYGON ((-115.20073 36.17649, -115.20073 36.1..."
57407,320030010031017,320033308,1,enacted,0,0,0,0,0,11.371094,12.182333,32.351549,"POLYGON ((-115.22559 36.15180, -115.22548 36.1..."


In [79]:
cb_path = main_path + config['PREPROCESSING']['DATA']['NEVADA']['PROCESSED']['ENACTED']['CENSUS_BLOCKS']
cb.to_file(cb_path, driver="GeoJSON")
