# 6) List of historical overlapping ANCs

For every ANC past and present, generate a list of ANCs that overlapped with it.

In other words, turn the overlap CSVs (one row for every overlap) into a list for every ANC.

* Backwards - how much of each new district is made up of the old districts? 
* Forwards - how much of each old district went into this new district? 

In [1]:
import pandas as pd

In [2]:
ancs = pd.read_csv('../data/ancs.csv')
overlap_backwards = pd.read_csv('overlap_backwards_anc.csv')
overlap_forwards = pd.read_csv('overlap_forwards_anc.csv')

Confirm that the overlap percentages add up to roughly one

In [3]:
all(abs(overlap_backwards.groupby('anc_id_2022').overlap_perc.sum() - 1) < 0.0001)

True

In [4]:
all(abs(overlap_forwards.groupby('anc_id_2012').overlap_perc.sum() - 1) < 0.0001)

True

In [5]:
overlap_backwards['valid'] = True
overlap_forwards['valid'] = True

# The overlap area must by at least 10 meters square
overlap_backwards.loc[overlap_backwards.overlap_area < 500, 'valid'] = False
overlap_forwards.loc[overlap_forwards.overlap_area < 500, 'valid'] = False

overlap_backwards.loc[overlap_backwards.anc_id_2012.isnull(), 'valid'] = False
overlap_backwards.loc[overlap_backwards.anc_id_2022.isnull(), 'valid'] = False

overlap_forwards.loc[overlap_forwards.anc_id_2012.isnull(), 'valid'] = False
overlap_forwards.loc[overlap_forwards.anc_id_2022.isnull(), 'valid'] = False

In [6]:
# # Manually remove some overlaps

# # Per Corey Holman, this overlap is just a shapefile artifact along a bridge
# overlap_backwards.loc[
#     (overlap_backwards.anc_id_2012 == 'smd_6B06') & (overlap_backwards.anc_id_2022 == 'smd_2022_8F01')
#     , 'valid'
# ] = False

# overlap_forwards.loc[
#     (overlap_forwards.anc_id_2012 == 'smd_6B06') & (overlap_forwards.anc_id_2022 == 'smd_2022_8F01')
#     , 'valid'
# ] = False

In [7]:
# Limit to only the valid overlaps
overlap_backwards = overlap_backwards[overlap_backwards.valid].copy()
overlap_forwards = overlap_forwards[overlap_forwards.valid].copy()

In [8]:
# Sort by overlap percentage, descending within ANCs
overlap_forwards.sort_values(by=['anc_id_2012', 'district_rank'])

Unnamed: 0,anc_id_2022,district_area_2022,anc_id_2012,district_area_2012,overlap_area,overlap_perc,district_rank,valid
0,anc_1A_2022,1.125854e+06,anc_1A,1.663392e+06,9.280265e+05,0.557912,1.0,True
1,anc_1E_2022,1.682767e+06,anc_1A,1.663392e+06,6.079276e+05,0.365475,2.0,True
2,anc_1D_2022,1.075326e+06,anc_1A,1.663392e+06,1.274377e+05,0.076613,3.0,True
8,anc_1B_2022,1.464266e+06,anc_1B,2.671191e+06,1.399202e+06,0.523812,1.0,True
9,anc_1E_2022,1.682767e+06,anc_1B,2.671191e+06,1.074839e+06,0.402382,2.0,True
...,...,...,...,...,...,...,...,...
266,anc_6/8F_2022,1.821084e+06,anc_8C,9.239722e+06,8.185725e+03,0.000886,5.0,True
270,anc_8D_2022,1.510744e+07,anc_8D,1.242115e+07,1.200822e+07,0.966756,1.0,True
271,anc_8E_2022,2.859324e+06,anc_8D,1.242115e+07,4.129069e+05,0.033242,2.0,True
273,anc_8E_2022,2.859324e+06,anc_8E,3.165925e+06,1.964358e+06,0.620469,1.0,True


In [9]:
# of_list = overlap_forwards.groupby('anc_id_2022').anc_id_2012.apply(lambda x: ', '.join(sorted(list(x))))

In [10]:
# ob_list = overlap_backwards.groupby('anc_id_2012').anc_id_2022.apply(lambda x: ', '.join(sorted(list(x))))

In [11]:
# df = pd.DataFrame(pd.concat([of_list, ob_list]), columns=['overlap_smds'])
# df.index.name = 'anc_id'
# df = df.reset_index()

In [12]:
# df['redistricting_year'] = df.anc_id.apply(lambda x: 2022 if '_2022_' in x else 2012)

In [13]:
# Copy this to OpenANC Source
# df.sort_values(by=['redistricting_year', 'anc_id']).overlap_smds.to_clipboard(index=False)

In [14]:
# Convert the overlap percentages into strings so that they can be nicely concatenated
overlap_backwards['overlap_perc_str'] = overlap_backwards.overlap_perc.apply(lambda x: f'{x:.4f}')
overlap_forwards['overlap_perc_str'] = overlap_forwards.overlap_perc.apply(lambda x: f'{x:.4f}')

In [15]:
of = overlap_forwards.groupby('anc_id_2012').agg(
    overlap_ancs=('anc_id_2022', lambda x: ', '.join(list(x)))
    , overlap_percentage=('overlap_perc_str', lambda x: ', '.join(list(x)))
)

In [16]:
ob = overlap_backwards.groupby('anc_id_2022').agg(
    overlap_ancs=('anc_id_2012', lambda x: ', '.join(list(x)))
    , overlap_percentage=('overlap_perc_str', lambda x: ', '.join(list(x)))
)

In [17]:
df = pd.concat([of, ob]).reset_index().rename(columns={'index': 'anc_id'})

In [18]:
to_google = pd.merge(ancs, df, how='inner', on='anc_id')
to_google[['anc_id', 'overlap_ancs', 'overlap_percentage']].to_clipboard(index=False)