Sum and rank the number of votes cast in each SMD

In [1]:
import pandas as pd

In [2]:
results = pd.read_csv('../data/results.csv')
districts = pd.read_csv('../data/districts.csv')

In [3]:
votes_per_smd = pd.DataFrame(results.groupby('smd_id').votes.sum()).reset_index()

In [4]:
# Calculate number of SMDs in each Ward and ANC
smds_per_ward = pd.DataFrame(districts.groupby('ward').size(), columns=['smds_in_ward']).reset_index()
smds_per_anc = pd.DataFrame(districts.groupby('anc_id').size(), columns=['smds_in_anc']).reset_index()

In [5]:
divo = pd.merge(districts, votes_per_smd, how='inner', on='smd_id')
divo = pd.merge(divo, smds_per_ward, how='inner', on='ward')
divo = pd.merge(divo, smds_per_anc, how='inner', on='anc_id')

In [6]:
divo['smds_in_dc'] = len(districts)

In [7]:
# method = min: assigns the lowest rank when multiple rows are tied
divo['rank_dc'] = divo['votes'].rank(method='min', ascending=False)
divo['rank_ward'] = divo.groupby('ward').votes.rank(method='min', ascending=False)
divo['rank_anc'] = divo.groupby('anc_id').votes.rank(method='min', ascending=False)

In [8]:
def make_ordinal(n):
    """
    Convert an integer into its ordinal representation::

        make_ordinal(0)   => '0th'
        make_ordinal(3)   => '3rd'
        make_ordinal(122) => '122nd'
        make_ordinal(213) => '213th'
    Source: https://stackoverflow.com/a/50992575/3443926
    """
    n = int(n)
    suffix = ['th', 'st', 'nd', 'rd', 'th'][min(n % 10, 4)]
    if 11 <= (n % 100) <= 13:
        suffix = 'th'
    return str(n) + suffix

In [9]:
f'{make_ordinal(4)} of {len(divo)}'

'4th of 296'

In [10]:
# def ranking_string(ranking, count_in_group):  
#     return f'{make_ordinal(ranking)} of {count_in_group}'

In [11]:
divo['string_dc'] = divo.apply(
    lambda row: f"{make_ordinal(row['rank_dc'])} out of {row['smds_in_dc']} SMDs", axis=1)

divo['string_ward'] = divo.apply(
    lambda row: f"{make_ordinal(row['rank_ward'])} out of {row['smds_in_ward']} SMDs", axis=1)

divo['string_anc'] = divo.apply(
    lambda row: f"{make_ordinal(row['rank_anc'])} out of {row['smds_in_anc']} SMDs", axis=1)

In [12]:
divo.sort_values(by='votes')

Unnamed: 0,smd_id,anc_id,ward,centroid_lon,centroid_lat,area,map_color_id,neighbor_smds,notes,description,...,votes,smds_in_ward,smds_in_anc,smds_in_dc,rank_dc,rank_ward,rank_anc,string_dc,string_ward,string_anc
154,smd_5A04,5A,5,-77.000625,38.937487,941020,4,"5A03, 5A05, 5A06, 5A07, 5A08, 5B02, 5B05, 5E01",,,...,3,37,8,296,296.0,37.0,8.0,296th out of 296 SMDs,37th out of 37 SMDs,8th out of 8 SMDs
62,smd_2E04,2E,2,-77.075688,38.908170,184010,3,"2E01, 2E05, 2E08, 3D09",,,...,6,38,8,296,295.0,38.0,8.0,295th out of 296 SMDs,38th out of 38 SMDs,8th out of 8 SMDs
66,smd_2E08,2E,2,-77.072482,38.907957,108794,7,"2E01, 2E02, 2E03, 2E04, 2E05",,,...,13,38,8,296,293.0,36.0,7.0,293rd out of 296 SMDs,36th out of 38 SMDs,7th out of 8 SMDs
44,smd_2A08,2A,2,-77.044712,38.898829,252413,9,"2A01, 2A06, 2A07, 2B05, 2B06",,,...,13,38,8,296,293.0,36.0,8.0,293rd out of 296 SMDs,36th out of 38 SMDs,8th out of 8 SMDs
95,smd_3D07,3D,3,-77.089375,38.937013,265095,12,"3D01, 3D02, 3D10",,,...,21,39,10,296,292.0,39.0,10.0,292nd out of 296 SMDs,39th out of 39 SMDs,10th out of 10 SMDs
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,smd_5C03,5C,5,-76.953500,38.923021,1335818,11,"5C02, 5C04, 7D03",,,...,1523,37,7,296,5.0,2.0,1.0,5th out of 296 SMDs,2nd out of 37 SMDs,1st out of 7 SMDs
223,smd_6E05,6E,6,-77.018350,38.902531,150004,3,"2C01, 2C02, 2F06, 6E04, 6E07",,,...,1634,38,7,296,4.0,3.0,1.0,4th out of 296 SMDs,3rd out of 38 SMDs,1st out of 7 SMDs
180,smd_5E03,5E,5,-77.001603,38.915308,516348,9,"5C05, 5D01, 5E02, 5E04, 5E10, 6C06",,,...,1759,37,10,296,3.0,1.0,1.0,3rd out of 296 SMDs,1st out of 37 SMDs,1st out of 10 SMDs
211,smd_6C06,6C,6,-77.003400,38.904393,782651,8,"5D01, 5E03, 5E04, 5E05, 6A01, 6C04, 6C05, 6E06...",,,...,2945,38,6,296,2.0,2.0,1.0,2nd out of 296 SMDs,2nd out of 38 SMDs,1st out of 6 SMDs


In [19]:
average_votes_by_ward = divo.groupby('ward').votes.mean()
average_votes_by_anc = divo.groupby('anc_id').votes.mean()

In [20]:
divo.votes.mean()

847.9324324324324

In [22]:
average_votes_by_ward.max()

1130.3947368421052

In [23]:
average_votes_by_anc.max()

1335.8333333333333