# 6) List of historical overlapping districts

For every SMD past and present, generate a list of SMDs that overlapped with it.

In other words, turn the overlap CSVs (one row for every overlap) into a list for every district.

* Backwards - how much of each new district is made up of the old districts? 
* Forwards - how much of each old district went into this new district? 

In [1]:
import pandas as pd

In [2]:
districts = pd.read_csv('../data/districts.csv')
overlap_backwards = pd.read_csv('overlap_backwards.csv')
overlap_forwards = pd.read_csv('overlap_forwards.csv')

Confirm that the overlap percentages add up to roughly one

In [3]:
all(abs(overlap_backwards.groupby('smd_id_2022').overlap_perc.sum() - 1) < 0.0001)

True

In [4]:
all(abs(overlap_forwards.groupby('smd_id_2012').overlap_perc.sum() - 1) < 0.0001)

True

In [5]:
overlap_backwards['valid'] = True
overlap_forwards['valid'] = True

# The overlap area must by at least 10 meters square
overlap_backwards.loc[overlap_backwards.overlap_area < 10, 'valid'] = False
overlap_forwards.loc[overlap_forwards.overlap_area < 10, 'valid'] = False

overlap_backwards.loc[overlap_backwards.smd_id_2012.isnull(), 'valid'] = False
overlap_backwards.loc[overlap_backwards.smd_id_2022.isnull(), 'valid'] = False

overlap_forwards.loc[overlap_forwards.smd_id_2012.isnull(), 'valid'] = False
overlap_forwards.loc[overlap_forwards.smd_id_2022.isnull(), 'valid'] = False

In [6]:
# Manually remove some overlaps

# Per Corey Holman, this overlap is just a shapefile artifact along a bridge
overlap_backwards.loc[
    (overlap_backwards.smd_id_2012 == 'smd_6B06') & (overlap_backwards.smd_id_2022 == 'smd_2022_8F01')
    , 'valid'
] = False

overlap_forwards.loc[
    (overlap_forwards.smd_id_2012 == 'smd_6B06') & (overlap_forwards.smd_id_2022 == 'smd_2022_8F01')
    , 'valid'
] = False

In [7]:
# Limit to only the valid overlaps
overlap_backwards = overlap_backwards[overlap_backwards.valid].copy()
overlap_forwards = overlap_forwards[overlap_forwards.valid].copy()

In [8]:
# Sort by overlap percentage, descending within SMDs
overlap_forwards.sort_values(by=['smd_id_2012', 'district_rank'])

Unnamed: 0,smd_id_2022,district_area_2022,smd_id_2012,district_area_2012,overlap_area,overlap_perc,district_rank,valid
0,smd_2022_1D07,85585.815540,smd_1A01,74006.926669,74006.684808,0.999997,1.0,True
6,smd_2022_1A02,90454.734665,smd_1A02,112743.370174,59312.563845,0.526085,1.0,True
7,smd_2022_1D06,115019.961220,smd_1A02,112743.370174,41851.818955,0.371213,2.0,True
8,smd_2022_1D07,85585.815540,smd_1A02,112743.370174,11578.913068,0.102701,3.0,True
13,smd_2022_1A03,65682.304770,smd_1A03,76769.942502,58045.140114,0.756092,1.0,True
...,...,...,...,...,...,...,...,...
2239,smd_2022_8E07,211671.067426,smd_8E06,461441.616996,54564.506327,0.118248,4.0,True
2244,smd_2022_8E07,211671.067426,smd_8E07,275369.398054,114159.811690,0.414570,1.0,True
2245,smd_2022_8E08,228759.923965,smd_8E07,275369.398054,91780.187545,0.333298,2.0,True
2246,smd_2022_8E06,326421.094835,smd_8E07,275369.398054,46137.145498,0.167546,3.0,True


In [9]:
# of_list = overlap_forwards.groupby('smd_id_2022').smd_id_2012.apply(lambda x: ', '.join(sorted(list(x))))

In [10]:
# ob_list = overlap_backwards.groupby('smd_id_2012').smd_id_2022.apply(lambda x: ', '.join(sorted(list(x))))

In [11]:
# df = pd.DataFrame(pd.concat([of_list, ob_list]), columns=['overlap_smds'])
# df.index.name = 'smd_id'
# df = df.reset_index()

In [12]:
# df['redistricting_year'] = df.smd_id.apply(lambda x: 2022 if '_2022_' in x else 2012)

In [13]:
# Copy this to OpenANC Source
# df.sort_values(by=['redistricting_year', 'smd_id']).overlap_smds.to_clipboard(index=False)

In [14]:
# Convert the overlap percentages into strings so that they can be nicely concatenated
overlap_backwards['overlap_perc_str'] = overlap_backwards.overlap_perc.apply(lambda x: f'{x:.4f}')
overlap_forwards['overlap_perc_str'] = overlap_forwards.overlap_perc.apply(lambda x: f'{x:.4f}')

In [17]:
overlap_backwards.groupby('smd_id_2022').agg(
    overlap_smds=('smd_id_2012', lambda x: ', '.join(list(x)))
    , overlap_percentage=('overlap_perc_str', lambda x: ', '.join(list(x)))
).to_clipboard()

In [18]:
overlap_forwards.groupby('smd_id_2012').agg(
    overlap_smds=('smd_id_2022', lambda x: ', '.join(list(x)))
    , overlap_percentage=('overlap_perc_str', lambda x: ', '.join(list(x)))
).to_clipboard()