# 3) Find Neighbors 2022

Look for shapes that have the same color as their neighbors, and change them to a new random color until there are no collisions. 

`map_color_id = 13` means that this new district is made up of less than 50% of an old district by area, so it needs a new random color. 

In [1]:
import random
import pandas as pd
import geopandas as gpd

In [2]:
# Let's set the random seed so we get the same colors picked every time
random.seed('DC')

In [3]:
df = gpd.read_file('smd-2022-colors.geojson')

df.sort_values(by='smd_id', inplace=True)

df['neighbors'] = None
df['num_neighbors'] = None
df['neighbor_colors'] = None
df['has_collision'] = False

possible_colors = list(range(1,13))

# Should be 345
len(df)

345

In [4]:
# df.loc[0].geometry[0].boundary.xy

In [5]:
# Determine the neighbors for each shape

for idx, row in df.iterrows():

    # get 'not disjoint' shapes
    neighbors = df[~df.geometry.disjoint(row.geometry)].smd_id.tolist()

    # remove own name from the list
    neighbors = [ name for name in neighbors if row.smd_id != name ]

    # add names of neighbors as NEIGHBORS value
    df.loc[idx, 'neighbors'] = ", ".join(neighbors)
    df.loc[idx, 'num_neighbors'] = len(neighbors)

In [6]:
df.groupby('num_neighbors').size()

num_neighbors
2      4
3     14
4     57
5     80
6     75
7     51
8     36
9     16
10     9
11     3
dtype: int64

In [7]:
def least_used_color(df, available_colors):
    """
    Return the color that is used the least on the map as it stands now
    """
    
    districts_by_color = df.groupby('map_color_id').size()
    
    return districts_by_color[available_colors].idxmin()

In [8]:
def neighbors_of_neighbors(df, smd_id):
    """
    Return a deduped list of smd_ids, that are its neighbors-of-neighbors, not containing
    the original smd_id
    """
    
    neighbors = df.loc[df.smd_id == smd_id, 'neighbors'].values[0].split(', ')
    
    n_of_n = []
    for d in neighbors:
        n_of_n += df.loc[df.smd_id == d, 'neighbors'].values[0].split(', ')
    
    return [d for d in sorted(n_of_n) if d != smd_id]

    # todo: dedupe this list

In [9]:
def assess_collisions(df):
    """
    Mark True for districts with collisions, meaning the district has the same color as a neighbor
    and needs a new color assigned. Also count map_color_id = 13 as needing a new color. 
    """
    
    for idx, row in df.iterrows():
        neighbors = row['neighbors'].split(', ')
        neighbor_colors = [df.loc[df.smd_id == n, 'map_color_id'].values[0] for n in neighbors]

        df.loc[idx, 'neighbor_colors'] = ", ".join([str(n) for n in neighbor_colors])

        if row['map_color_id'] in neighbor_colors:
            df.loc[idx, 'has_collision'] = True
    
    # color ID of 13 means this district doesn't match an old district and it also needs its color changed
    df.loc[df.map_color_id == 13, 'has_collision'] = True
    
    num_collisions = df['has_collision'].sum()
    print(f'Current collisions: {num_collisions}')

    return df, num_collisions

In [10]:
def change_one_district_color(df):
    """Change color for one district to an available color"""
    
    # todo: make this use the seed
    smd_to_change = df[df['has_collision']].head(1)['smd_id'].values[0]

    row = df[df['smd_id'] == smd_to_change]    
    old_color = row['map_color_id'].values[0]

    neighbor_colors_str = row['neighbor_colors'].values[0].split(', ')
    neighbor_colors = [int(n) for n in neighbor_colors_str]

    n_of_n = neighbors_of_neighbors(df, smd_to_change)        
    
    n_of_n_colors = df.loc[df.smd_id.isin(n_of_n)].groupby('map_color_id').size()
    n_of_n_colors.name = 'n_of_n_colors'
    
    # We only want to evaluate actual colors, not the placeholder color
    districts_by_color = df[df.map_color_id != 13].groupby('map_color_id').size()
    districts_by_color.name = 'all_districts'
    
    color_comparison = pd.merge(
        districts_by_color
        , n_of_n_colors
        , how='left'
        , left_index=True
        , right_index=True
    )
    
    color_comparison['n_of_n_colors'] = color_comparison['n_of_n_colors'].fillna(0)
    
    # New color can't be the old color and it can't be one of the neighbor colors
    acceptable_colors = [
        c for c in possible_colors
        if c not in neighbor_colors
        and c != old_color
    ]
    color_comparison = color_comparison.loc[acceptable_colors]
    
    # Of all the neighbor colors, we should pick the color used the least, breaking ties by picking
    # the color used the least on the whole map
    least_used_neighbor_colors = color_comparison[
        color_comparison['n_of_n_colors'] == color_comparison['n_of_n_colors'].min()]
    new_color = least_used_neighbor_colors['all_districts'].idxmin()

    df.loc[row.index, 'map_color_id'] = new_color
    df.loc[row.index, 'has_collision'] = False
    
    print(f'District {smd_to_change} changed from {old_color} to color {new_color}')
    
    return df

In [11]:
df, num_collisions = assess_collisions(df)

num_iterations = 200
i = 0

while num_collisions != 0 and i < num_iterations:
    i += 1
    print()
    
    df = change_one_district_color(df)
    df, num_collisions = assess_collisions(df)

Current collisions: 100

District smd_2022_1A07 changed from 7 to color 5
Current collisions: 99

District smd_2022_1A09 changed from 13 to color 7
Current collisions: 98

District smd_2022_1B05 changed from 13 to color 11
Current collisions: 97

District smd_2022_1B07 changed from 13 to color 9
Current collisions: 96

District smd_2022_1C02 changed from 13 to color 12
Current collisions: 95

District smd_2022_1C05 changed from 13 to color 11
Current collisions: 94

District smd_2022_1D01 changed from 9 to color 7
Current collisions: 93

District smd_2022_1D06 changed from 13 to color 8
Current collisions: 92

District smd_2022_1E03 changed from 13 to color 10
Current collisions: 91

District smd_2022_1E06 changed from 13 to color 5
Current collisions: 90

District smd_2022_1E07 changed from 13 to color 10
Current collisions: 89

District smd_2022_2A01 changed from 1 to color 7
Current collisions: 88

District smd_2022_2A08 changed from 13 to color 12
Current collisions: 87

District s

In [12]:
# Use the same field names as the 2012 geojson
previous_map = gpd.read_file('../uploads/to-mapbox-smd-data.geojson')

for c in previous_map.columns:
    if c not in df.columns:
        df[c] = None

In [13]:
# df[df['smd_id'] == 'smd_1C07']

In [14]:
# df[(df['smd_id'] == 'smd_2C02') | (df['smd_id'] == 'smd_6E05') ]

In [15]:
df.groupby('map_color_id').size()

map_color_id
1     31
2     29
3     28
4     27
5     28
6     30
7     29
8     29
9     29
10    27
11    28
12    30
dtype: int64

In [30]:
# Some manual fixes
# df.loc[df.smd_id == 'smd_2022_1A07', 'map_color_id'] = 7
# df.loc[df.smd_id == 'smd_1A07', 'neighbors'].values

df.loc[df.smd_id == 'smd_2022_5A05', 'map_color_id'] = 5

In [31]:
df[previous_map.columns].to_file('smd-2022-preprocessed.geojson', driver='GeoJSON')

In [32]:
df[[c for c in previous_map.columns if c != 'geometry']].to_csv('smd-2022-preprocessed.csv', index=False)

In [19]:
# Should be 345
len(df)

345

In [23]:
# How do these colors differ from what we currently have?
districts = pd.read_csv('../data/districts.csv')

In [28]:
cols_to_merge = ['smd_id', 'map_color_id']
comp = pd.merge(districts[cols_to_merge], df[cols_to_merge], how='inner', on='smd_id', suffixes=['_current', '_new'])
comp[comp['map_color_id_current'] != comp['map_color_id_new']]

Unnamed: 0,smd_id,map_color_id_current,map_color_id_new
6,smd_2022_1A07,7,5
8,smd_2022_1A09,4,7
14,smd_2022_1B05,3,11
20,smd_2022_1C02,6,12
23,smd_2022_1C05,6,11
...,...,...,...
339,smd_2022_8E09,12,7
341,smd_2022_8F02,10,11
342,smd_2022_8F03,8,1
343,smd_2022_8F04,4,7


In [29]:
df[df.smd_id == 'smd_2022_5A05']

Unnamed: 0,smd_id,smd_name,anc_id,map_color_id,qgis_label,geometry,neighbors,num_neighbors,neighbor_colors,has_collision,...,neighbor_smds,overlap_smds,notes,description,current_person_id,current_commissioner,future_person_id,commissioner_elect,map_display_box,votes_2020
236,smd_2022_5A05,5A05,5A,12,2022_5A05,"POLYGON ((-77.00636 38.93660, -77.00629 38.936...","smd_2022_1E01, smd_2022_1E02, smd_2022_1E04, s...",10,"8, 9, 3, 9, 1, 11, 6, 4, 2, 7",False,...,,,,,,,,,,
