Look for shapes that have the same color as their neighbors, and change them to a new random color until there are no collisions. 

`map_color_id = 13` means that this new district is made up of less than 50% of an old district by area, so it needs a new random color. 

In [1]:
import random
import pandas as pd
import geopandas as gp

In [2]:
# Let's set the random seed so we get the same colors picked every time
random.seed('DC')

In [3]:
df = gp.read_file('to-mapbox-2022-smd-data-initial.geojson')

df.sort_values(by='smd_id', inplace=True)

df['neighbors'] = None
df['num_neighbors'] = None
df['neighbor_colors'] = None
df['has_collision'] = False

possible_colors = list(range(1,13))

In [4]:
# df.loc[0].geometry[0].boundary.xy

In [5]:
# Determine the neighbors for each shape

for idx, row in df.iterrows():

    # get 'not disjoint' shapes
    neighbors = df[~df.geometry.disjoint(row.geometry)].smd_id.tolist()

    # remove own name from the list
    neighbors = [ name for name in neighbors if row.smd_id != name ]

    # add names of neighbors as NEIGHBORS value
    df.loc[idx, 'neighbors'] = ", ".join(neighbors)
    df.loc[idx, 'num_neighbors'] = len(neighbors)

In [6]:
df.groupby('num_neighbors').size()

num_neighbors
2      2
3     11
4     44
5     63
6     71
7     55
8     47
9     36
10    19
11    14
12     7
13     3
14     6
dtype: int64

In [7]:
def assess_collisions(df):
    """
    Mark True for districts with collisions, meaning the district has the same color as a neighbor
    and needs a new color assigned. Also count map_color_id = 13 as needing a new color. 
    """
    
    for idx, row in df.iterrows():
        neighbors = row['neighbors'].split(', ')
        neighbor_colors = [df.loc[df.smd_id == n, 'map_color_id'].values[0] for n in neighbors]

        df.loc[idx, 'neighbor_colors'] = ", ".join([str(n) for n in neighbor_colors])

        if row['map_color_id'] in neighbor_colors:
            df.loc[idx, 'has_collision'] = True
    
    # color ID of 13 means this district doesn't match an old district and it also needs its color changed
    df.loc[df.map_color_id == 13, 'has_collision'] = True
    
    num_collisions = df['has_collision'].sum()
    print(f'Current collisions: {num_collisions}')

    return df, num_collisions

In [8]:
def least_used_color(df, available_colors):
    """
    Return the color that is used the least on the map as it stands now
    """
    
    districts_by_color = df.groupby('map_color_id').size()
    
    return districts_by_color[available_colors].idxmin()

In [9]:
def change_one_district_color(df):
    """Change color for one district to an available color"""
    
    smd_to_change = df[df['has_collision']].sample(1)['smd_id'].values[0]

    row = df[df['smd_id'] == smd_to_change]
    
    old_color = row['map_color_id'].values[0]

    neighbor_colors_str = row['neighbor_colors'].values[0].split(', ')
    neighbor_colors = [int(n) for n in neighbor_colors_str]
    
    available_colors = [c for c in possible_colors if c not in neighbor_colors]
    new_color = least_used_color(df, available_colors)

    df.loc[row.index, 'map_color_id'] = new_color
    df.loc[row.index, 'has_collision'] = False
    
    print(f'District {smd_to_change} changed from {old_color} to color {new_color}')
    
    return df

In [10]:
df, num_collisions = assess_collisions(df)

num_iterations = 200
i = 0

while num_collisions != 0 and i < num_iterations:
    i += 1
    print()
    
    df = change_one_district_color(df)
    df, num_collisions = assess_collisions(df)

Current collisions: 107

District smd_3D07 changed from 13 to color 7
Current collisions: 106

District smd_4C07 changed from 1 to color 6
Current collisions: 105

District smd_5A05 changed from 1 to color 5
Current collisions: 104

District smd_4B10 changed from 13 to color 11
Current collisions: 103

District smd_4C06 changed from 13 to color 12
Current collisions: 102

District smd_8B07 changed from 13 to color 12
Current collisions: 101

District smd_3D03 changed from 8 to color 4
Current collisions: 100

District smd_6B08 changed from 11 to color 7
Current collisions: 99

District smd_4C02 changed from 13 to color 11
Current collisions: 98

District smd_1E03 changed from 13 to color 4
Current collisions: 97

District smd_7D10 changed from 13 to color 6
Current collisions: 96

District smd_6E09 changed from 13 to color 8
Current collisions: 95

District smd_1D01 changed from 9 to color 4
Current collisions: 94

District smd_8C02 changed from 13 to color 9
Current collisions: 93

Di

In [11]:
# Use the same field names as the 2012 geojson
previous_map = gp.read_file('../uploads/to-mapbox-smd-data.geojson')

for c in previous_map.columns:
    if c not in df.columns:
        df[c] = None

In [14]:
# df[df['smd_id'] == 'smd_1C07']

In [15]:
# df[(df['smd_id'] == 'smd_2C02') | (df['smd_id'] == 'smd_6E05') ]

In [16]:
df.groupby('map_color_id').size()

map_color_id
1     32
2     32
3     32
4     31
5     32
6     33
7     31
8     31
9     31
10    31
11    31
12    31
dtype: int64

In [22]:
# Some manual fixes
df.loc[df.smd_id == 'smd_1A07', 'map_color_id'] = 7
# df.loc[df.smd_id == 'smd_1A07', 'neighbors'].values

In [23]:
df[previous_map.columns].to_file('to-mapbox-2022-smd-data.geojson', driver='GeoJSON')

In [24]:
df[previous_map.columns].to_csv('to-mapbox-2022-smd-data.csv', index=False)