In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely import centroid
import os
import shutil
from shapely import MultiPolygon, Polygon, wkt, intersection
import matplotlib.pyplot as plt
from geopy import distance
import pandas_geojson as pdg
import shapely


In [2]:
def get_raw_precinct_gpd(filename='../project_data/chc/ChicagoPrecincts2012_2022.csv'):
    precs = pd.read_csv(filename)
    precs['precinct_id'] = precs.PRECINCT*100 + precs.WARD
    precs['the_geom'] = precs['the_geom'].apply(wkt.loads)
    precs = precs.rename(columns={'the_geom':'geometry'})
    precs = precs.drop(['FULL_TEXT', 'SHAPE_AREA', 'SHAPE_LEN'], axis=1)
    precs['centroid'] = centroid(precs.geometry)

    precs_gpd = gpd.GeoDataFrame(precs, crs='epsg:4326')
    
    return precs_gpd.set_index('precinct_id')

def get_raw_simplex_gpd(filename='../project_data/chc/chc_death_simplices_by_death_in_dim_1.npy'):
    dsimps = np.load(filename, allow_pickle=True)
    dsimps_dict = dict(zip(['geometry','death_filtration_value','death_filtration_zscore','death_birth_ratio'],dsimps.T))
    dsimps_gpd = gpd.GeoDataFrame(dsimps_dict, crs='epsg:4326')
    dsimps_gpd['dsimp_index']= dsimps_gpd.index.values

    return dsimps_gpd

def get_raw_polls_gpd(filename = '../project_data/chc/Polling_Places_Chicago_2016.csv'):

    polls = pd.read_csv(filename)
    polls_gpd = gpd.GeoDataFrame()
    polls_gpd['precinct_id'] = polls.Precinct.astype(int)
    polls_gpd['polling_name'] = polls['Polling Place Name']
    polls_gpd['polling_address'] = polls['Address Line 1']
    polls_gpd['polling_description'] = polls['Description']
    polls_gpd['polling_accessible'] = 1 * (polls['Accessible'] == 'Y')
    polls_gpd['polling_zip'] = polls['Zip']
    polls_gpd['polling_lat'] = polls['lat']
    polls_gpd['polling_lon'] = polls['lon']



    return polls_gpd.set_index('precinct_id')


In [3]:
def append_intersecting_simplex_data_to_precinct(precs,dsimps_filename = '../project_data/chc/chc_death_simplices_by_death_in_dim_1.npy'):
    
    dsimps = get_raw_simplex_gpd(dsimps_filename)

    n_intersect = []
    mean_dfv = []
    indices = []

    for ii, prec in precs.iterrows():
        dfvs_ii = []
        indices_ii = []
        
        
        for jj, dsimp in dsimps.iterrows():
    
            if not intersection(prec.geometry,dsimp.geometry).area == 0:
                
                indices_ii.append(str(dsimp.dsimp_index))
                dfvs_ii.append(dsimp.death_filtration_value)
                
        if len(dfvs_ii) == 0:
            mean_dfv.append(0)
            indices.append('')
        else:
            mean_dfv.append(np.mean(dfvs_ii))
            indices.append('_'.join(indices_ii))
                            
        n_intersect.append(len(indices_ii))


    precs['dsimp_n_intersect'] = n_intersect
    precs['dsimp_mean_dfv'] = mean_dfv
    precs['dsimp_indices'] = indices
    return precs

def append_polls_to_precinct(precs, polls_filename = '../project_data/chc/Polling_Places_Chicago_2016.csv'):
    
    polls = get_raw_polls_gpd(polls_filename)

    precs = precs.join(polls)

    imputed = []
    precs_with = precs[precs.polling_lat != -999]
    
    counter = 0
    for ii, prec in precs.iterrows():
        if counter % 100 == 0: print(counter)
        counter += 1
        if prec.polling_lat != -999: 
            imputed.append(0)
            continue
        
        min_dist = None
        nearest_id = None
        for jj, poll in precs_with.iterrows():
   
            dist = distance.distance(prec.centroid.__geo_interface__['coordinates'][::-1],(poll.polling_lat,poll.polling_lon)).km
            if min_dist is None or dist < min_dist:
                min_dist = dist
                nearest_id = jj
        imputed.append(1)
        precs.loc[ii, 'polling_address'] = precs_with.loc[nearest_id].polling_address
        precs.loc[ii, 'polling_name'] = precs_with.loc[nearest_id].polling_name
        precs.loc[ii, 'polling_description'] = precs_with.loc[nearest_id].polling_description
        precs.loc[ii, 'polling_accessible'] = precs_with.loc[nearest_id].polling_accessible
        precs.loc[ii, 'polling_zip'] = precs_with.loc[nearest_id].polling_zip
        precs.loc[ii, 'polling_lat'] = precs_with.loc[nearest_id].polling_lat
        precs.loc[ii, 'polling_lon'] = precs_with.loc[nearest_id].polling_lon
        
    precs['polling_imputed'] = imputed




    return precs
    
                

In [4]:
p = get_raw_precinct_gpd()
p = append_intersecting_simplex_data_to_precinct(p)
p = append_polls_to_precinct(p)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
