# Open Seats by Districts

Which ANCs and wards have the most districts without any candidates?

In [1]:
import os
os.chdir('../')

import pandas as pd
pd.set_option('display.max_rows', 500)

from scripts.data_transformations import (
    list_commissioners
    , list_candidates
)

In [2]:
candidates = list_candidates(election_year=2022)
districts = pd.read_csv('data/districts.csv')
districts = districts[districts.redistricting_year == 2022].copy()
ancs = pd.read_csv('data/ancs.csv')
wards = pd.read_csv('data/wards.csv')

In [3]:
districts = pd.merge(districts, ancs, how='inner', on='anc_id')
districts = pd.merge(districts, wards, how='inner', on='ward_id')

In [4]:
districts = pd.merge(
    districts
    , pd.DataFrame(candidates.groupby('smd_id').size(), columns=['num_candidates']).reset_index()
    , how='left'
    , on='smd_id'
)

In [5]:
districts['num_candidates'] = districts['num_candidates'].fillna(0)

districts['no_candidate_districts'] = 0
districts.loc[districts.num_candidates == 0, 'no_candidate_districts'] = 1

In [6]:
candidates_by_anc = districts.groupby('anc_id').agg(
    num_districts=('smd_id', 'size')
    , no_candidate_districts=('no_candidate_districts', 'sum')
)
candidates_by_anc['percentage_no_candidate'] = (
    candidates_by_anc.no_candidate_districts / candidates_by_anc.num_districts
)

In [7]:
candidates_by_anc.sort_values(by=['percentage_no_candidate', 'no_candidate_districts'], ascending=False)


Unnamed: 0_level_0,num_districts,no_candidate_districts,percentage_no_candidate
anc_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
anc_8D_2022,8,5,0.625
anc_6E_2022,9,5,0.555556
anc_2C_2022,4,2,0.5
anc_7E_2022,7,3,0.428571
anc_2E_2022,8,3,0.375
anc_2F_2022,8,2,0.25
anc_7F_2022,8,2,0.25
anc_1C_2022,9,2,0.222222
anc_5A_2022,9,2,0.222222
anc_5D_2022,9,2,0.222222


In [8]:
sorted(ancs[ancs.notes.fillna('').str.contains('new')].anc_name)

['ANC 1E', 'ANC 2G', 'ANC 3A', 'ANC 4E', 'ANC 5F', 'ANC 6/8F']

In [9]:
candidates_by_anc.sum()

num_districts              345.000000
no_candidate_districts      45.000000
percentage_no_candidate      5.894048
dtype: float64

In [10]:
candidates_by_ward = districts.groupby('ward_id').agg(
    num_districts=('smd_id', 'size')
    , no_candidate_districts=('no_candidate_districts', 'sum')
)
candidates_by_ward #.sort_values(by='no_candidate_districts', ascending=False)

Unnamed: 0_level_0,num_districts,no_candidate_districts
ward_id,Unnamed: 1_level_1,Unnamed: 2_level_1
ward_1_2022,42,5
ward_2_2022,46,7
ward_3_2022,43,3
ward_4_2022,42,3
ward_5_2022,45,4
ward_6_2022,42,6
ward_7_2022,43,8
ward_8_2022,42,9


In [11]:
candidates_by_ward.sum()

num_districts             345
no_candidate_districts     45
dtype: int64

In [12]:
candidate_count = pd.pivot_table(
    data=districts
    , columns='num_candidates'
    , index='ward_name'
    , aggfunc='size'
    , fill_value=0
)

candidate_count['Total'] = candidate_count.sum(axis=1)
candidate_count.loc['Total'] = candidate_count.sum(axis=0)
candidate_count.index.name = 'Ward'
candidate_count.columns.name = 'Number of Candidates'

candidate_count

Number of Candidates,0.0,1.0,2.0,3.0,4.0,Total
Ward,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ward 1,5,25,9,3,0,42
Ward 2,7,25,12,2,0,46
Ward 3,3,24,14,2,0,43
Ward 4,3,25,12,2,0,42
Ward 5,4,20,21,0,0,45
Ward 6,6,25,9,1,1,42
Ward 7,8,21,9,5,0,43
Ward 8,9,9,16,8,0,42
Total,45,174,102,23,1,345


In [13]:
candidate_count = pd.pivot_table(
    data=districts
    , columns='num_candidates'
    , index='anc_name'
    , aggfunc='size'
    , fill_value=0
)

candidate_count['Total'] = candidate_count.sum(axis=1)
candidate_count.loc['Total'] = candidate_count.sum(axis=0)
candidate_count.index.name = 'ANC'
candidate_count.columns.name = 'Number of Candidates'

candidate_count

Number of Candidates,0.0,1.0,2.0,3.0,4.0,Total
ANC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ANC 1A,0,8,1,1,0,10
ANC 1B,1,5,3,0,0,9
ANC 1C,2,5,1,1,0,9
ANC 1D,1,4,2,0,0,7
ANC 1E,1,3,2,1,0,7
ANC 2A,0,6,2,1,0,9
ANC 2B,0,7,2,0,0,9
ANC 2C,2,1,1,0,0,4
ANC 2D,0,2,0,0,0,2
ANC 2E,3,3,2,0,0,8


In [14]:
districts.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 345 entries, 0 to 344
Data columns (total 38 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   smd_id                  345 non-null    object 
 1   smd_name                345 non-null    object 
 2   sort_order_x            345 non-null    int64  
 3   redistricting_year_x    345 non-null    int64  
 4   redistricting_cycle_x   345 non-null    object 
 5   anc_id                  345 non-null    object 
 6   ward_id                 345 non-null    object 
 7   centroid_lon_x          345 non-null    float64
 8   centroid_lat_x          345 non-null    float64
 9   area_x                  0 non-null      float64
 10  map_color_id            345 non-null    int64  
 11  neighbor_smds           345 non-null    object 
 12  overlap_smds            345 non-null    object 
 13  overlap_percentage_x    345 non-null    object 
 14  notes_x                 0 non-null      ob