In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib
import descartes

In [2]:
import numpy as np

In [3]:
import shapely
import shapely.wkt
from collections import deque

In [52]:
#path to the raw school district file and the raw precinct file
#I unzipped both zip files
districts_shp_path = 'districts/districts/School_Districts_1920.shp'
districts_frame = gpd.read_file(districts_shp_path)
districts_frame['geometry'] = districts_frame['geometry'].buffer(0)
general_shp_path = 'general/general/2016General.shp'
general_frame = gpd.read_file(general_shp_path)
school_voter_path = 'DistrictVoterData.csv'
voting_frame = pd.read_csv(school_voter_path)

In [53]:
general_frame['geometry'] = general_frame['geometry'].buffer(0)
districts_frame['geometry'] = districts_frame['geometry'].buffer(0)

In [54]:
districts_frame = districts_frame[['DISTRICT_N', 'geometry']]
super_districts_path = 'ESC_Regions.shp'
super_districts_frame = gpd.read_file(super_districts_path)
super_districts_frame['geometry'] = super_districts_frame['geometry'].buffer(0)

In [55]:
super_districts_frame = super_districts_frame[['REGION','geometry']]

In [56]:
#is cleaning up the school district, with the region. They weren't extactly perfect so we mapped
#the school district with its super region if it had a 90% overlap
output_super_join = gpd.sjoin(districts_frame.to_crs('EPSG:4326'),super_districts_frame, op = 'intersects')
output_super_join['percent_overlap'] = output_super_join.apply(lambda x: x['geometry'].intersection(super_districts_frame.iloc[x['index_right']]['geometry']).area / x['geometry'].area*100, axis=1)     
output_super_join = output_super_join[output_super_join.percent_overlap > 90]

In [57]:
#returns the top votes grouped by newly allocated region
def recalculate_top_votes(total_votes):
    voting_region = total_votes.groupby('NEWLY_ALLOCATED').sum()
    voting_region = voting_region[['VOTES']].sort_values('VOTES', ascending = False)
    voting_region = voting_region.reset_index()
    return voting_region

In [58]:
#this takes in a geopandas frame, and retruns the index in the frame, that has the northernmost point 
def find_starting_geom(district_frame):
    g = [i for i in district_frame.geometry]
    all_coords = []
    i = 0
    for geo in g:
        #if (type(geo) == shapely.geometry.Polygon):
        coords = []
        if (str(geo.type) == 'Polygon'):
            transition = list(geo.exterior.coords)
            for point in transition:
                coords.append(point)
        #is of type multipolygon
        else:
            flat_list = []
            list_poly = []
            for poly in geo:
                list_poly.append(list(poly.exterior.coords))
            for poly in list_poly:
                for point in poly:
                    flat_list.append(point)

        all_coords.append((i, coords))
        i+= 1
    index_southernest = -1;
    furthest_total = -1;
    for school in all_coords:
        furthest_coord_y = -1;
        for point in school[1]:
            if (point[1] > furthest_coord_y):
                furthest_coord_y = point[1]
        if (furthest_coord_y > furthest_total):
            index_southernest = school[0]
            furthest_total = furthest_coord_y
    return index_southernest

In [59]:
from math import radians, cos, sin, asin, sqrt
#used for centroid distance calculation
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 3956 # Radius of earth in miles. Use 6371 for kilometers
    return c * r

In [127]:
#Join up the school district frame, with the voting frame on district_n
join_w_data = output_super_join.join(voting_frame.set_index('DISTRICT_N'), on='DISTRICT_N' ,how = 'left')
join_w_data['NEWLY_ALLOCATED'] = join_w_data['REGION'].astype(int)
join_w_data.head()

Unnamed: 0.1,DISTRICT_N,geometry,index_right,REGION,percent_overlap,Unnamed: 0,PTY,VOTES,NEWLY_ALLOCATED
0,73901,"POLYGON ((-96.99308 31.40669, -96.99297 31.406...",11,12,100.0,768,D,178,12
0,73901,"POLYGON ((-96.99308 31.40669, -96.99297 31.406...",11,12,100.0,769,I,17,12
0,73901,"POLYGON ((-96.99308 31.40669, -96.99297 31.406...",11,12,100.0,770,R,480,12
76,18908,"POLYGON ((-97.75760 31.85712, -97.75616 31.855...",11,12,100.0,209,D,85,12
76,18908,"POLYGON ((-97.75760 31.85712, -97.75616 31.855...",11,12,100.0,210,I,10,12


In [128]:
#check voting distribution
recalculate_top_votes(join_w_data)

Unnamed: 0,NEWLY_ALLOCATED,VOTES
0,4,1806946
1,10,1272735
2,11,1102963
3,13,967361
4,20,664822
5,6,347558
6,7,345728
7,1,328456
8,12,258833
9,19,196610


In [129]:
#i is going to be the new region number for the 8 split regions
#(current_region, region_to get redistricted) 
join_w_data['NEWLY_ALLOCATED'] = join_w_data['REGION'].astype(int)
offset = 0
#16 new regions 21 to 36
for i in range(21,37):
    #get the first most populous voting region
    voting_region = recalculate_top_votes(join_w_data)
    district_to_split = voting_region.iloc[0]
    
    #progress bar
    print(i)
    
    #getting the region we're splitting on and the possible districts that can get allocated
    region_split_on = voting_region.iloc[0]['NEWLY_ALLOCATED']
    total_votes_needed = district_to_split['VOTES'] /2
    districts_to_seperate = join_w_data.loc[join_w_data['NEWLY_ALLOCATED'] == region_split_on]
    districts_to_seperate = districts_to_seperate.reset_index()
    
    #find initial index
    index_starting = find_starting_geom(districts_to_seperate)
    starting_geom = districts_to_seperate.iloc[index_starting]['geometry'].centroid.coords
    
    #set distance from to 0 and then calculate colummn based on each districts distance from the starting one
    districts_to_seperate['DISTANCE_FROM'] = 0.0
    districts_to_seperate['DISTANCE_FROM']  = districts_to_seperate.apply(lambda x: haversine(starting_geom[0][0], starting_geom[0][1], x['geometry'].centroid.coords[0][0], x['geometry'].centroid.coords[0][1]) ,axis = 1)
    districts_to_seperate_sorted = districts_to_seperate.sort_values('DISTANCE_FROM', ascending = False)
    dropped_distance_sorted = districts_to_seperate_sorted.drop_duplicates('DISTRICT_N')
    
    newRegion = []
    districts_added = 0
    total_votes = 0
    #iterativley add the nearest centroid until total_votes > 50%
    while(total_votes < total_votes_needed):
        district_num = dropped_distance_sorted.iloc[districts_added+1]['DISTRICT_N']
        votesAdded = districts_to_seperate.loc[districts_to_seperate.DISTRICT_N == district_num]['VOTES'].sum()
        newRegion.append(district_num)
        iterative_votes = districts_to_seperate.loc[districts_to_seperate['DISTRICT_N'] == district_num]['VOTES'].sum()
        districts_added+=1
        total_votes += iterative_votes
    
    #set the districts to their new region number.
    for district in newRegion:
        join_w_data.loc[join_w_data.DISTRICT_N == district, 'NEWLY_ALLOCATED'] = i

21
4
1806946
146904
[20905, 20907, 20910, 79906, 20902, 20904, 79901, 237902, 237905, 84908, 237904, 79907, 20901, 101914, 79910, 84909, 84902, 101903, 84906, 20908, 84911, 84901, 101907, 101920, 84910]

22
10
1272735
91909
[70907, 70901, 129905, 234902, 70903, 129904, 70915, 70912, 129910, 70910, 129903, 70908, 234907, 70905, 70911, 129901, 57906, 57913, 57904, 129906, 57907, 129902, 116906, 116908, 116916, 57910, 57914, 57905, 116910, 57919, 199902, 199901, 116903, 57912, 116905, 57911]

23
11
1102963
49909
[72909, 72903, 72901, 72908, 213901, 72904, 72910, 182905, 126907, 111903, 182901, 126904, 126903, 111902, 182904, 111901, 182906, 126906, 126901, 126908, 126911, 126905, 184909, 126902, 182902, 184904, 182903, 220908, 220912, 220904, 184907, 184903, 220914, 184911, 220901, 220905, 184908, 220920, 220917, 220910, 220916]

24
13
967361
246902
[86902, 89901, 94902, 75903, 94904, 75906, 94901, 130902, 86024, 89905, 75902, 75901, 75908, 46901, 94903, 28903, 46902, 28906, 86901, 144901

In [146]:
#check voting distribution of split region
recalculate_top_votes(join_w_data)

Unnamed: 0,NEWLY_ALLOCATED,VOTES
0,36,492902
1,32,475586
2,21,444118
3,4,433713
4,26,421970
5,27,396665
6,29,372912
7,13,372250
8,6,347558
9,7,345728


In [131]:
#write out the spatial file
join_w_data.to_file('by_centroid.shp')

In [138]:
#district number in item[0], item[1] all districts that need their number reset to item[0]
reassign = [
    (1, []),
    (2, []),
    (3, []),
    (4, [84903]),
    (5, []),
    (6, []),
    (7, []),
    (8, []),
    (9, []),
    (10, [74907]),
    (11, []),
    (12, []),
    (13, []),
    (14, []),
    (15, []),
    (16, []),
    (17, []),
    (18, []),
    (19, []),
    (20, []),
    (21, [101921]),
    (22, []),
    (23, [72902]),
    (24, []),
    (25, []),
    (26, []),
    (27, [142901]),
    (28, [234902,129904, 129910]),
    (29, []),
    (30, [75903, 75906]),
    (31, [126904,]),
    (32, [94902,89903]),
    (33, []),
    (34, [70909,70907]),
    (35, [20906,20905,84902,]),
    (36, [57911])    
]

In [139]:
#reassign the districts that manually need to get cleaned
for item in reassign:
    for district in item[1]:
        join_w_data.loc[join_w_data.DISTRICT_N == district, 'NEWLY_ALLOCATED'] = item[0] 

In [140]:
join_w_data.to_file('by_centroid_maually_reshaped.shp')

In [142]:
join_w_data.head()

Unnamed: 0.1,DISTRICT_N,geometry,index_right,REGION,percent_overlap,Unnamed: 0,PTY,VOTES,NEWLY_ALLOCATED
0,73901,"POLYGON ((-96.99308 31.40669, -96.99297 31.406...",11,12,100.0,768,D,178,12
0,73901,"POLYGON ((-96.99308 31.40669, -96.99297 31.406...",11,12,100.0,769,I,17,12
0,73901,"POLYGON ((-96.99308 31.40669, -96.99297 31.406...",11,12,100.0,770,R,480,12
76,18908,"POLYGON ((-97.75760 31.85712, -97.75616 31.855...",11,12,100.0,209,D,85,12
76,18908,"POLYGON ((-97.75760 31.85712, -97.75616 31.855...",11,12,100.0,210,I,10,12


In [143]:
join_w_data_trimmed= join_w_data[['DISTRICT_N','REGION','PTY','VOTES','NEWLY_ALLOCATED']]

In [145]:
join_w_data_trimmed.to_csv('centroid_manually_reshaped.csv')

In [141]:
recalculate_top_votes(join_w_data)

Unnamed: 0,NEWLY_ALLOCATED,VOTES
0,36,492902
1,32,475586
2,21,444118
3,4,433713
4,26,421970
5,27,396665
6,29,372912
7,13,372250
8,6,347558
9,7,345728
