# OANC Commissioner List

Download a list of commissioners from the Office of Advisory Neighborhood Commissions and compare it against the OpenANC list.

In [1]:
import os
os.chdir('../')

In [2]:
import time
import requests
import pandas as pd
from tqdm import tqdm
from fuzzywuzzy import fuzz

import config
from scripts.common import hash_dataframe, current_timestamp, validate_smd_ids
from scripts.data_transformations import list_commissioners

pd.set_option('display.max_rows', 500)

In [3]:
ancs = pd.read_csv('data/ancs.csv')
ancs = ancs[ancs.redistricting_year == config.current_redistricting_year].copy()

In [4]:
comm_dict = {}

for idx, anc in tqdm(ancs.iterrows(), total=len(ancs)):
    r = requests.get(anc.dc_oanc_link)
    
    table_list = pd.read_html(r.text, converters={'SMD': str})

    if 'SMD' in table_list[0].columns:
        comm_dict[anc.anc_id] = table_list[0].copy()
    elif anc.anc_id == 'anc_5F_2022':
        # First row of this dataframe is the column names and should be fixed then dropped
        comm_dict[anc.anc_id] = table_list[0].copy()
        comm_dict[anc.anc_id].columns = comm_dict[anc.anc_id].iloc[0]
        comm_dict[anc.anc_id].drop(0, inplace=True)
    else:
        print(f'Warning: ANC {anc.anc_id} table not present on OANC site.')
        print(table_list)
    
    time.sleep(2)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 46/46 [01:36<00:00,  2.10s/it]


In [5]:
comm_official = pd.concat(comm_dict).reset_index().rename(columns={'level_0': 'anc_id'}).drop(columns='level_1')

In [6]:
comm_official['smd_id'] = 'smd_2022_' + comm_official['SMD']

# Fix some mangled SMD IDs
comm_official['smd_id'] = (
    comm_official['smd_id']
    .str.replace('smd_2022_3G', 'smd_2022_3/4G')
    .str.replace('smd_2022_7B02l', 'smd_2022_7B02')
    .str.replace('smd_2022_6/8F', 'smd_2022_8F')
)


In [7]:
# Confirm that district list is correct
validate_smd_ids(comm_official)
assert len(comm_official) == 345

In [8]:
comm_official['is_vacant'] = comm_official.Name.str.lower()== 'vacant'
comm_official['is_chairperson'] = comm_official.Name.str.lower().str.contains('chairperson|chairpeson')

In [9]:
strings_to_remove_from_names = [
    ' Chairperson'
    , ' Chairpeson'
    , ' Chairman'
    , ' Vice Chair/Treasurer'
    , ' Vice Chair'
    , ' Corresponding Secretary'
    , ' Recording Secretary'
    , ' Secretary'
    , ' /Secretary'
    , ' Seretary'
    , ' Parlimentarian'
    , ' Parliamentarian'
    , ' Sargent-at-arms'
    , ' Treasurer'
    , ' Vice-Chairperson'
]

comm_official['oanc_name'] = comm_official['Name']

for s in strings_to_remove_from_names:
    comm_official['oanc_name'] = comm_official['oanc_name'].str.replace(s, '')

comm_official['oanc_name'] = comm_official['oanc_name'].str.strip()

In [10]:
comm_official['oanc_hash_id'] = hash_dataframe(comm_official, ['SMD', 'oanc_name'])

In [11]:
# Compare official to OpenANC
comm_openanc = list_commissioners(status='current')
comm = pd.merge(comm_official, comm_openanc, how='left', on='smd_id')

comm['oanc_name'] = comm['oanc_name'].fillna('Vacant')
comm['openanc_name'] = comm['commissioner_name'].fillna('Vacant')
comm['name_score'] = comm.apply(lambda x: fuzz.ratio(x.oanc_name, x.openanc_name), axis=1)

In [12]:
"""
0 = the district is not vacant according to both sources (good)
1 = the district is vacant according to one source (bad)
2 = the district is vacant according to both sources (good)
"""
comm['num_sources_say_vacant'] = (comm[['oanc_name', 'openanc_name']] == 'Vacant').sum(axis=1)
comm.loc[comm['num_sources_say_vacant'] == 1, 'name_score'] = 0

In [13]:
# Evaluate name mismatches
comm.loc[comm.name_score < 100, ['smd_id', 'oanc_name', 'openanc_name', 'name_score']].sort_values(
    by=['name_score', 'smd_id']
)

Unnamed: 0,smd_id,oanc_name,openanc_name,name_score
6,smd_2022_1A07,Vacant,Mukta Ghorpadey,0
46,smd_2022_2A05,Vacant,Luke Chadwick,0
268,smd_2022_7B06,Vacant,Kelvin Brown,0
281,smd_2022_7D01,Vacant,Charles Boston,0
333,smd_2022_8D06,Vacant,Wendy Hamilton,0
305,smd_2022_7F08,Shameka L. Hayes (proxy Danjuma Gaskins),Shameka Hayes,48
128,smd_2022_3E08,Lizzie Graff,Elizabeth Graff,67
318,smd_2022_8B06,Marcus Hickman,"Marcus ""Mick"" Thomas Hickman",67
200,smd_2022_5D05,Salvador Sauceda-Guzman,"Salvador ""The Commissioner"" Sauceda-Guzman",71
341,smd_2022_8E06,Dolores Bryant,"Dolores ""Miracle"" Bryant",74


In [14]:
columns_to_save = [
    'smd_id'
    , 'Name'
    , 'oanc_name'
    , 'is_vacant'
    , 'is_chairperson'
    , 'oanc_hash_id'
]

filename = 'data/oanc/commissioners_{}.csv'.format(current_timestamp().strftime('%Y-%m-%d'))

comm_official[columns_to_save].to_csv(filename, index=False)

In [15]:
# Confirm that there is one chairperson per ANC
comm_official['is_chairperson_int'] = comm_official['is_chairperson'].astype(int)
num_chairs = comm_official.groupby('anc_id').is_chairperson_int.sum()
# num_chairs.sum() == comm_official.anc_id.nunique()

# Total number of chairpeople
# num_chairs.sum()

In [16]:
# Districts with an issue to resolve
comm.loc[comm['num_sources_say_vacant'] == 1, ['smd_id', 'oanc_name', 'openanc_name']] #.to_clipboard(index=False)

Unnamed: 0,smd_id,oanc_name,openanc_name
6,smd_2022_1A07,Vacant,Mukta Ghorpadey
46,smd_2022_2A05,Vacant,Luke Chadwick
268,smd_2022_7B06,Vacant,Kelvin Brown
281,smd_2022_7D01,Vacant,Charles Boston
333,smd_2022_8D06,Vacant,Wendy Hamilton
