# Open Seats by Districts

Which ANCs and wards have the most districts without any candidates?

In [1]:
import os
os.chdir('../')

import pandas as pd
pd.set_option('display.max_rows', 500)

from scripts.data_transformations import (
    list_commissioners
    , list_candidates
)

In [2]:
candidates = list_candidates()
districts = pd.read_csv('data/districts.csv')
districts = districts[districts.redistricting_year == 2022].copy()
ancs = pd.read_csv('data/ancs.csv')

In [3]:
districts = pd.merge(
    districts
    , pd.DataFrame(candidates.groupby('smd_id').size(), columns=['num_candidates']).reset_index()
    , how='left'
    , on='smd_id'
)

In [4]:
districts['num_candidates'] = districts['num_candidates'].fillna(0)

districts['no_candidate_districts'] = 0
districts.loc[districts.num_candidates == 0, 'no_candidate_districts'] = 1

In [5]:
candidates_by_anc = districts.groupby('anc_id').agg(
    num_districts=('smd_id', 'size')
    , no_candidate_districts=('no_candidate_districts', 'sum')
)
candidates_by_anc['percentage_no_candidate'] = (
    candidates_by_anc.no_candidate_districts / candidates_by_anc.num_districts
)

In [6]:
candidates_by_anc.sort_values(by=['no_candidate_districts', 'percentage_no_candidate'], ascending=False)

Unnamed: 0_level_0,num_districts,no_candidate_districts,percentage_no_candidate
anc_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
anc_5A_2022,9,7,0.777778
anc_6E_2022,9,7,0.777778
anc_2F_2022,8,6,0.75
anc_4D_2022,8,6,0.75
anc_8D_2022,8,6,0.75
anc_1C_2022,9,6,0.666667
anc_7E_2022,7,5,0.714286
anc_2E_2022,8,5,0.625
anc_1D_2022,7,4,0.571429
anc_1E_2022,7,4,0.571429


In [7]:
sorted(ancs[ancs.notes.fillna('').str.contains('new')].anc_name)

['ANC 1E', 'ANC 2G', 'ANC 3A', 'ANC 4E', 'ANC 5F', 'ANC 8F']

In [8]:
candidates_by_anc.sum()

num_districts              345.000000
no_candidate_districts     150.000000
percentage_no_candidate     20.248413
dtype: float64

In [9]:
candidates_by_ward = districts.groupby('ward_id').agg(
    num_districts=('smd_id', 'size')
    , no_candidate_districts=('no_candidate_districts', 'sum')
)
candidates_by_ward.sort_values(by='no_candidate_districts', ascending=False)

Unnamed: 0_level_0,num_districts,no_candidate_districts
ward_id,Unnamed: 1_level_1,Unnamed: 2_level_1
ward_5_2022,45,23
ward_2_2022,46,21
ward_8_2022,44,21
ward_1_2022,42,20
ward_4_2022,42,20
ward_7_2022,43,19
ward_3_2022,43,14
ward_6_2022,40,12


In [10]:
candidates_by_ward.sum()

num_districts             345
no_candidate_districts    150
dtype: int64