-----
This Notebook provides some sample code you can use (and extend) to check your maps
-----
-----

In [None]:
!pip install geojson
import geojson
import pandas as pd
import numpy as np
import random

### Checking Precinct Assignment for my map

In [None]:
#assignment will contain the precinct-district assignments from my map
assignment = pd.read_csv('badlypaintedmapexample.csv')
#assignment = pd.read_csv('incompletemap.csv')
assignment.head(10)

In [None]:
print(f"- We have {assignment.shape[0]} number of precincts assigned to {assignment.District.nunique()} districts.")
#print(f"- There are {np.count_nonzero(assignment.isnull().values.ravel())} missing values in the dataset.\n")
print(f"- All the precint ids in the dataset are unique is {assignment.shape[0] == assignment.GEOID20.nunique()}")
print(f"- {6754-assignment.shape[0]} Districts are unassigned (including 415 missing/corrupted precincts)")

In [None]:
#Number of Precincts per district
assignment.groupby(by="District").size()

### Analyzing Precinct Data per district

In [None]:
#pre_data will contain the precinct demographics and historical election data.

###final_NJ_2020_State_Legislative_data.csv has the 2020 data
pre_data = pd.read_csv('final_NJ_2020_State_Legislative_data.csv')

pre_data.head(5)

### Calculating the population of each district of my map

In [None]:
##This code shows you an example of how to calculate the population of a district, you can adapt it to count the partisan support.

population=0
sum_pop=0
for districts in (assignment.groupby(by="District")):
    #print(districts)
    for precinct in districts[1].iterrows():
        #print(precinct[1][0])
        if(pre_data.loc[pre_data.GEOID20==str(precinct[1][0]),'Total_2020_TotalAdj'].any()):
            population += pre_data.loc[pre_data.GEOID20==str(precinct[1][0]),'Total_2020_TotalAdj'].values[0]
            #print(type(population))
    print("The population of District "+str(districts[0])+ " is "+str(population))
    sum_pop+=population
    population=0
print("Total population is "+str(sum_pop))

------
Checking the geography of the map
------
------

### Are two precincts contiguous?

In [None]:
!pip install shapely
!pip install PyShp

In [None]:
import shapefile as shp
from shapely.geometry import Polygon,shape,MultiPolygon
#import StringIO


shpfile = 'nj_vtd_2020_bound.shp'
dbffile = 'nj_vtd_2020_bound.dbf'
shxfile = 'nj_vtd_2020_bound.shx'
shpfile = shp.Reader(shp=shpfile, shx=shxfile, dbf=dbffile)
print(shpfile)

In [None]:
field_names = []
for f in shpfile.fields[1:]:
    field_names.append((f[0]))
print(field_names)

precinct_boundaries={}
count=0
for sr in shpfile.iterShapeRecords():
    geom = sr.shape # get geo bit
    rec = sr.record # get db fields
    precinct_boundaries[rec[3]]=geom
    count=count+1
    
##print the coordinated of a precinct polygon
print(Polygon(shape(precinct_boundaries['34003060003'])))

In [None]:
###Examples
a = Polygon(shape(precinct_boundaries['34003060003']))
b = Polygon(shape(precinct_boundaries['34007043046']))
c = Polygon(shape(precinct_boundaries['34007043047']))

#print(shape(precinct_boundaries['34029102002']))
                  
#b = Polygon(precint_boundaries['34007043046'])
print(a.touches(b))
print(c.touches(b))

#### Function to check if two precinct overlap (needs precinct_boundaries to be instanciated)

In [None]:
def is_contiguous_precinct(p1,p2,precinct_boundaries):
    #print(shape(precinct_boundaries[p1]).type)
    try:
        if shape(precinct_boundaries[p1]).type == 'Polygon':
            a = Polygon(shape(precinct_boundaries[p1]))
        else:
            return False
        if shape(precinct_boundaries[p2]).type == 'Polygon':
            b = Polygon(shape(precinct_boundaries[p2]))
        else:
            return False
        return(a.touches(b))
    except KeyError: return False

In [None]:
print(is_contiguous_precinct('34003060003','34007043046',precinct_boundaries))
print(is_contiguous_precinct('34007043047','34007043046',precinct_boundaries))
print(is_contiguous_precinct('34003060003','34003060002',precinct_boundaries))
print(is_contiguous_precinct('34007043047','34003060003',precinct_boundaries))
print(is_contiguous_precinct('34041080003','34005020001',precinct_boundaries))
print(is_contiguous_precinct('34041080003','34005075101',precinct_boundaries))



#### Function to find contiguous districts (needs precinct_boundaries to be instanciated)

In [None]:
## This code will tell you how many precinct are contiguous to a given precinct
def contiguous_precincts(p1,precinct_boundaries):
    count=0
    neighbors=[]
    #district;
    for p in range(0,len(pre_data)):
        #print(pre_data.iloc[p].GEOID20)
        if(is_contiguous_precinct(p1,pre_data.iloc[p].GEOID20,precinct_boundaries)):
            count+=1
            neighbors.append(pre_data.iloc[p].GEOID20)
    return([count,neighbors])


In [None]:
print(contiguous_precincts('34005070001',precinct_boundaries))
print(contiguous_precincts('34041080003',precinct_boundaries))

In [None]:
import pickle
picklename = 'FinalPrecinctContiguous.p'
contiguous_precincts= pickle.load(open(picklename,"rb"))

In [None]:
contiguous_precincts
#type(contiguous_precincts)

In [None]:
# generate useful dictionaries
#dictionary of current precinct to district assignments
prec_to_dist = {}
# dictionary of precinct to blue, red 2016-2021 Comp
blue_red_votes = {}
# dictionary of population per precinct
prec_pop = {}
for index, row in pre_data.iterrows():
  prec_to_dist[row['GEOID20']] = row['District']
  blue_red_votes[row['GEOID20']] = [row['Dem_2016-2021_Comp'], row['Rep_2016-2021_Comp'], row['Total_2016-2021_Comp']]
  prec_pop[row['GEOID20']] = row['Total_2020_TotalAdj']

In [None]:
blue_red_votes

In [None]:
districts_votes = {}
for i in range(1,41):
  districts_votes[i] = [0,0,0]
#print(districts_votes)
for index, row in pre_data.dropna().iterrows():
  districts_votes[row['District']][0] += row['Dem_2016-2021_Comp']
  districts_votes[row['District']][1] += row['Rep_2016-2021_Comp']
  districts_votes[row['District']][2] += row['Total_2016-2021_Comp']
districts_votes

In [None]:
# current district to population dictionary
district_to_pop = {}
population=0
# total population
total_pop=0
for districts in (pre_data.groupby(by="District")):
    #print(districts)
    for precinct in districts[1].iterrows():
        #print(precinct[1][0])
        if(pre_data.loc[pre_data.GEOID20==str(precinct[1][0]),'Total_2020_TotalAdj'].any()):
            population += pre_data.loc[pre_data.GEOID20==str(precinct[1][0]),'Total_2020_TotalAdj'].values[0]
    #print("The population of District "+str(districts[0])+ " is "+str(population))
    district_to_pop[int(districts[0])] = int(population)
    total_pop+=population
    population=0
#print("Total population is "+str(total_pop))
district_to_pop

In [None]:
avg_pop = int(total_pop/40)
print(avg_pop)
dev = avg_pop * 0.08
max_population = int(avg_pop + dev)
print(max_population)
min_population = int(avg_pop - dev)
print(min_population)


In [None]:
# current district to party dictionary
count_dem = 0 # counter dem against metric
count_rep = 0 # counter rep against metric
temp_data = pre_data.groupby(by="District").sum()
#temp_data = [tuple(x) for x in temp_data.values.tolist()]

print(temp_data)

In [None]:
district_to_party = {}
#checking what cannot be changed due to current map party leaning (swapping other precinct)
for districts, rows in temp_data.iterrows():
    party = 0
    #print(type(districts))
    #print(districts)
    total = round(temp_data.loc[districts,'Total_2016-2021_Comp'], 2)
    rep = round(temp_data.loc[districts,'Rep_2016-2021_Comp']/total, 2)
    dem = round(temp_data.loc[districts,'Dem_2016-2021_Comp']/total , 2)
    #total
    #print(rep)
    #print(dem)
    
    #0 being can be changed, 1 means cannot be changed
    if(dem >= .85 or rep >= .85 ):
        party = 1 #1 part
        district_to_party[int(rows[0])] = party
        if(dem >= .85):
            count_dem = count_dem + 1
        if(rep >= .85):
            count_rep = count_rep + 1

    else:
        party = 0
        district_to_party[int(rows[0])] = party
    
#print(district_to_party)
#len(district_to_party)
#print(count_dem)
#print(count_rep)
#total

In [None]:
print(count_dem)
print(count_rep)

In [None]:
district_to_party
#type(district_to_party)

In [None]:
#update the keys to the district numbers - it's already in order we don't have to worry
total_districts_list = len(assignment.groupby(by="District").size())
ini_dict = list(range(1,total_districts_list + 1))
type(ini_dict)
final_dict_to_party = dict(zip(ini_dict,list(district_to_party.values()))) 
#updated dict district to party
final_dict_to_party

In [None]:
#equal amounts for each party
count_blue = 14 #currently there are 26 districts that lean blue
count_red = 26 #currently there are 14 districts that lean red
#first metric if party lean is more than 60% then leave them alone

In [None]:
def get_random_p():
  candidate = random.choice(pre_data['GEOID20'])
  return candidate
tmp = get_random_p()
contiguous_precincts[tmp]

In [None]:
#updating current count
curr_blue = count_blue - count_dem #how many more dem leaning districts we can have left
curr_red = count_rep - count_rep #how many more rep leaning districts we can have left
print(curr_blue)
print(curr_red)


In [None]:
def find_neighbors(precinct, precinct_to_swap, district,input_list, contiguous_precincts, prec_to_dist): 
    for ele in contiguous_precincts[precinct]:
      if ele != precinct_to_swap and prec_to_dist[ele] == district:
        if ele not in input_list:
          input_list.append(ele)
          return find_neighbors(ele, precinct_to_swap, district, input_list, contiguous_precincts, prec_to_dist)
    return input_list

In [None]:
blue_red_votes

In [None]:
considered = 0
swap_counter = 0
total_left = curr_blue + curr_red
for i in range(0,12000):
    tmp = get_random_p()
    
    #check tmp's district whether it can be changed if not we don't check 
    if(final_dict_to_party[prec_to_dist[tmp]] == 0):
        
        for p in contiguous_precincts[tmp]:
            #get district and party of p
            district = prec_to_dist[p]
            
            #checking whether we can change p's district based on district
            if(final_dict_to_party[district] == 0):
                #print(district, p)
                if prec_to_dist[p] != prec_to_dist[tmp]:
                    #need to check neighboring contiguous districts
                    considered += 1
                    neighbors = contiguous_precincts[tmp]
                    neighbors_d = {}
                    for o in neighbors:
                        check_district = prec_to_dist[o]
                        
                        #again checking o's district that cannot be changed
                        if(final_dict_to_party[district] == 0):
                            
                            #print(check_district, o)
                            if prec_to_dist[o] == prec_to_dist[tmp]:
                                d = prec_to_dist[tmp]
                                i = find_neighbors(o,tmp,d,[], contiguous_precincts, prec_to_dist)
                                neighbors_d[o] = i

                        values = list(neighbors_d.values());
                        #print(values)
                        temp_list = []
                        for val in values:
                            if val not in temp_list:
                                temp_list.append(val)
                        if len(temp_list) != 1:
                            continue
                        #checking voting district
                        old_district = prec_to_dist[tmp]
                        new_district = prec_to_dist[p]
                        
                        new_pop = district_to_pop[new_district] + prec_pop[tmp]
                        old_pop = district_to_pop[old_district] - prec_pop[tmp]
                        
                        
                        if new_pop in range(min_population, max_population) and old_pop in range(min_population, max_population):
                            #blue_red_votes is a dictionary of key:precinct, value: [blue votes, red votes, total]
                            #districts_votes is dictionary of key:district, value: [blue votes, red votes, total]
                            curr_b = districts_votes[new_district][0] / districts_votes[new_district][2]
                            curr_r = districts_votes[new_district][1] / districts_votes[new_district][2]
                            new_b = districts_votes[new_district][0] + blue_red_votes[tmp][0] / districts_votes[new_district][2] + blue_red_votes[tmp][2]
                            new_r = districts_votes[new_district][1] + blue_red_votes[tmp][1] / districts_votes[new_district][2] + blue_red_votes[tmp][2]
                            percentage_b_increase = new_b - curr_b
                            percentage_r_increase = new_r - curr_r
                        total_left = curr_blue + curr_red 
                        if((percentage_b_increase > percentage_r_increase) and curr_blue !=0):
                            swap_counter += 1
                            #swap
                            prec_to_dist[tmp] = new_district
                            district_to_pop[new_district] = new_pop
                            district_to_pop[old_district] = old_pop
                            districts_votes[new_district][0] += blue_red_votes[tmp][0]
                            districts_votes[new_district][1] += blue_red_votes[tmp][1]
                            districts_votes[new_district][2] += blue_red_votes[tmp][2]
                            districts_votes[old_district][0] -= blue_red_votes[tmp][0]
                            districts_votes[old_district][1] -= blue_red_votes[tmp][1]
                            districts_votes[old_district][2] -= blue_red_votes[tmp][2]
                            curr_blue = curr_blue - 1
                        
                        if((percentage_b_increase < percentage_r_increase) and curr_red !=0):
                            swap_counter += 1
                            #swap
                            prec_to_dist[tmp] = new_district
                            district_to_pop[new_district] = new_pop
                            district_to_pop[old_district] = old_pop
                            districts_votes[new_district][0] += blue_red_votes[tmp][0]
                            districts_votes[new_district][1] += blue_red_votes[tmp][1]
                            districts_votes[new_district][2] += blue_red_votes[tmp][2]
                            districts_votes[old_district][0] -= blue_red_votes[tmp][0]
                            districts_votes[old_district][1] -= blue_red_votes[tmp][1]
                            districts_votes[old_district][2] -= blue_red_votes[tmp][2]
                            curr_red = curr_red - 1
                            #int = p
                            #data_p = access_data(p) #->calling the wrong thin
                            #[dem, rep, total]
                            #new_total = round((temp_data.loc[new_district,'Total_2016-2021_Comp'] + data[2]), 2)
                            #new_rep_avg = round((temp_data.loc[new_district,'Rep_2016-2021_Comp'] + data[1])/total, 2)
                            #new_dem_avg = round((temp_data.loc[new_district,'Dem_2016-2021_Comp'] + data[0])/total , 2)
                        
                            #if(new_rep_avg >= .60 and curr_red != 0):
                                #change district and minus the count
                                #curr_red = curr_red - 1
                               # prec_to_dist[tmp] = new_district
                                
                            #if(new_dem_avg >= .60 and curr_blue !=0):
                                #change district and minus the count
                                #curr_blue = curr - 1
                                #prec_to_dist[tmp] = new_district
                           
                    #break
    
            #dist = final_dict_to_party[prec_to_dist[tmp]]
            #print(tmp,dist)
            #break
    #else:
    #    dist = final_dict_to_party[prec_to_dist[tmp]]
    #    print(tmp,dist)
    #    break
    
print(swap_counter)
print(considered)

In [None]:
prec_to_dist

In [None]:
missing_p= pre_data.loc[pre_data['Dem_2016-2021_Comp'].isnull(),'GEOID20'].tolist()

In [None]:
for ele in missing_p:
  prec_to_dist[ele] = 40

In [None]:
import csv
header = ['GEOID20', 'District']
with open('testfair14.csv', 'w') as f:
    header = ['GEOID20', 'District']
    writer = csv.DictWriter(f, fieldnames = header)
    writer.writeheader()
    for key in prec_to_dist.keys():
        f.write("%s,%s\n"%(key,prec_to_dist[key]))

In [None]:
#OLD CODE DONT RUN

In [None]:
#here we can merge/recombine and cut population in half (first attempt)
#finding what districts are contiguous
#import csv

#conti_p = {}
#for precincts in (assignment.groupby(by="GEOID20")):
    #for precinct in districts[1].iterrows():

#    p1 = precincts[0]
    #print(p1)
#    conti_p[p1] = contiguous_precincts(p1, precinct_boundaries)[1]
#field_name = ['Precinct', 'Count', 'Neighbors']

#with open('conti_p.csv', 'w') as f:
#    writer = csv.DictWriter(f, fieldnames=conti_p.keys())
#    writer.writeheader()
#    writer.writerow(conti_p)

In [None]:
#print(list(conti_p.values()))

In [None]:
#output has to be GEOID20 and district 