Download a list of commissioners from the Office of Advisory Neighborhood Commissions

In [1]:
import os
os.chdir('../')

In [2]:
import time
import requests
import pandas as pd
from tqdm import tqdm
from fuzzywuzzy import fuzz

from scripts.common import hash_dataframe, current_timestamp
from scripts.data_transformations import list_commissioners

pd.set_option('display.max_rows', 500)

In [3]:
ancs = pd.read_csv('data/ancs.csv')
ancs = ancs[ancs.redistricting_year == 2012].copy()

In [4]:
comm_dict = {}

for idx, anc in tqdm(ancs.iterrows(), total=len(ancs)):
    r = requests.get(anc.dc_oanc_link)
    
    table_list = pd.read_html(r.text, converters={'SMD': str})

    if 'SMD' in table_list[0].columns:
        comm_dict[anc.anc_id] = table_list[0].copy()
    else:
        print(f'Warning: ANC {anc.anc_id} table not present on OANC site.')
    
    time.sleep(2)

100%|██████████████████████████████████████████████████████████████████████████████████| 40/40 [01:38<00:00,  2.46s/it]


In [5]:
comm_official = pd.concat(comm_dict).reset_index().rename(columns={'level_0': 'anc_id'}).drop(columns='level_1')

In [6]:
comm_official['smd_id'] = 'smd_' + comm_official['SMD']

In [7]:
comm_official['is_vacant'] = comm_official.Name.str.lower()== 'vacant'
comm_official['is_chairperson'] = comm_official.Name.str.lower().str.contains('chairperson|chairpeson')

In [8]:
comm_official['official_name'] = (
    comm_official.Name
    .str.replace(' Chairperson', '')
    .str.replace(' Chairpeson', '')
    .str.strip()
)

In [9]:
comm_official['oanc_hash_id'] = hash_dataframe(comm_official, ['SMD', 'official_name'])

In [10]:
# Compare official to OpenANC
comm_openanc = list_commissioners(status='current')
comm = pd.merge(comm_official, comm_openanc, how='left', on='smd_id')

comm['official_name'] = comm['official_name'].fillna('Vacant')
comm['commissioner_name'] = comm['commissioner_name'].fillna('Vacant')
comm['name_score'] = comm.apply(lambda x: fuzz.ratio(x.official_name, x.commissioner_name), axis=1)

comm[['smd_id', 'official_name', 'commissioner_name', 'name_score']].sort_values(by='name_score')

Unnamed: 0,smd_id,official_name,commissioner_name,name_score
90,smd_3D02,Vacant,Elizabeth R. Pemmerl,15
142,smd_4C08,Vacant,Clara Haskell Botstein,21
278,smd_8C04,Vacant,"Travon ""Ward 8"" Hawkins",21
51,smd_2B07,Michael Scott McKernan,Randy D. Downs,22
95,smd_3D07,Vacant,Christian Damiana,26
176,smd_5D06,Vacant,Zachary Hoffman,38
154,smd_5A04,Diego Rojas,Alex Rojas,67
161,smd_5B03,Prita Piekara,"Sukhprita ""Prita"" Piekara",68
83,smd_3C04,"Robert ""Beau"" Finley",Beau Finley,71
47,smd_2B03,Robin Linnett Nunn,Robin Nunn,71


In [11]:
columns_to_save = [
    'smd_id'
    , 'Name'
    , 'official_name'
    , 'is_vacant'
    , 'is_chairperson'
    , 'oanc_hash_id'
]

filename = 'data/oanc/commissioners_{}.csv'.format(current_timestamp().strftime('%Y-%m-%d'))

comm_official[columns_to_save].to_csv(filename, index=False)

In [12]:
# Confirm that there is one chairperson per ANC
comm_official['is_chairperson_int'] = comm_official['is_chairperson'].astype(int)
num_chairs = comm_official.groupby('anc_id').is_chairperson_int.sum()
num_chairs.sum() == comm_official.anc_id.nunique()

True