# Create supersites_geom.geojson geojson file with the caucus supersite borders
    
1. create supersites DataFrame from Judi's supersite precinct file
2. create pctgeo (precinct GeoDataFrame)
    - read precinct geometry from current pct_area_boulder.geojson file
    - add supersite name column using supersites dataframe
    - save pctgeo to pct_supersites.geojson
3. create supersite border geometry and save geojson file
- create ssgeo from pctgeo by using dissolve('supersite')
- save ssgeo in supersite_region_geom.geojson

- NOTE: need to update precinct to Supersite geojson every time a Supersite/precinct list is modified 
    - e.g. move precincts 814 and 823 from Frasier to Manhatten

- Started: January 7, 2024
- Update: January 8, 2024 - large revision - add ssname column to supersites
- Update: January 9, 2024 - version 0.1.0 add ssgeom to supersitesgeo
- Update: January 11, 2024 - version 0.2.0 add pctlist column to supersites dataframe
- Update: January 12, 2024 - version 0.3.0 start supersite boundary geometry
- Update: January 13, 2024 - version 0.3.1 finish supersite boundary geometry

### Variables

- supersites    - DataFrame read from Judi's supersite file
- pctgeo        - GeoDataFrame read from pct_area_boulder.geojson
- sss_pcts_lists    - list of lists of all the precincts at a supersite
- ssgeo         - GeoDataFrame of supersites with region boundaries


##     ISSUES

#### FIXED Precinct format mismatch
- pct_area_boulder.geojson lists zero-filled precincts, e.g. 002, 003, 004
- Draft Supersites-judi-20240106.xlsx lists single-digit precincts, e.g. 2, 3, 4
    - notebook creates rows for both, 002 and 2, 003 and 3, 004 and 4
    - not critical failure but should be fixed. 
- WORKAROUND: drop extra rows 2, 3, 4
- FIXED: zfill precincts when reading in Judi's file


In [None]:
import pandas as pd
import geopandas as gpd
# import fiona
# import numpy as np

## 1. Create supersites - dataframe of supersites with list of precincts in a column  
- start with Judi's spreadsheet
- create pctlist from "Pct #'s" column
    - use pctstr_to_list(ss) function to create series of lists
    - add pctlist series as column on supersites

In [None]:
# FUNCTION: Convert column with string of precinct numbers to series with lists of precincts
#   e.g. for each row: '2, 101, 237, 304,,,,,' to [002, 101, 237, 304]

def pctstr_to_list(ss) :

    # transform each string in the list to a list of precinct numbers
    pctlist = ss["Pct #'s"].str.rstrip(',').str.split(',')

    # add leading zeros to single-digit precincts
    pctlist = [ [p.zfill(3) for p in pl] for pl in pctlist]
    
    return pctlist   


In [None]:
# Read current supersite list
supersites = pd.read_excel('data/2024_Supersite_list w Chairs & Cochairs.xlsx', sheet_name='Recap SS & Precinct #s', skiprows=3)

# add pctlist column to supersites datafram
supersites = supersites.assign(pctlist = pctstr_to_list)
supersites = supersites[["Supersite", "# of Reg Dems", "Forecast of  Attendees",
       "# of Pct's", "Pct #'s", "pctlist"]]

supersites.info()
supersites.head()

In [None]:
# check pctlist 
# - list of precincts for each supersites
# - each precinct is 3-digits, e.g. 810 or 002
supersites['pctlist']

## 2 create pctgeo (precinct GeoDataFrame)
- read precinct geometry from current pct_area_boulder.geojson file
- add supersite name column using supersites dataframe
- save as pct_supersite.geojson

In [None]:
# 2. Create pctgeo, GeoDataframe from file with individual precinct boundaries
# data/pct_area_boulder.geojson
pctgeo = gpd.read_file('data/pct_area_boulder.geojson', driver='GEOJSON')

# set pctgeo index to Pct
pctgeo = pctgeo.set_index('Pct', drop=False)

pctgeo.info()
pctgeo.head()

In [None]:
# FUNCTION: Add supersite name column to pctgeo

def add_ss_to_pctgeo(ssdf):

    for ss in ssdf.index:

        # get pctlist in first supersite
        pctlist = ssdf['pctlist'][ss]
        # print(pctlist, '\n')  # list of pcts in supersite

        # get supersite name
        ssname = ssdf.loc[ss,'Supersite'] 

        # loop through each pct in pctlist

        for p in pctlist:
            
            # print(p, ssname,'\n')

            # add supersite name to pctgeo
            pctgeo.loc[p, 'supersite'] = ssname
            # print(pctgeo.loc[p, :], '\n')  #  dataframe row
    return



In [None]:
# add supersite column to pctgeo and populate it
pctgeo['supersite'] = ''
add_ss_to_pctgeo(supersites)

pctgeo.info()
pctgeo.head()

In [None]:
# ### Check precinct data in  geodataframe
# - Number of precincts = 193
# - Number of Areas = 27
# - Check precincts in each Area

pctgeo['Pct'].nunique() # 193
pctgeo.AreaShort.nunique() # 27
pctgeo.value_counts('AreaShort').sort_index()

In [None]:
# check # of supersites - expect 20
pctgeo['supersite'].nunique()

# check precincts per supersite
pctgeo['supersite'].value_counts()

In [None]:
# save pctgeo with supersite column to file 
pctgeo.to_file('data/pct_supersite.geojson', driver='GeoJSON', index=False)

### 3 Create Supersite GeoDataFrame, ssgeo, and add geometry column 

- create ssgeo by dissolving pctgeo('supersite')
- save ssgeo to supersites_region_geom.geojson

In [None]:
# Create ssgeo: Supersite GeoDataFrame with supersite boundaries
#   pctgeo.dissolve('supersite') 
#      group precincts by supersite 
#      combine group precinct geometry into supersite geometry

ssgeo = pctgeo.dissolve('supersite')
ssgeo = ssgeo['geometry']
ssgeo.to_file('data/supersites_region_geom.geojson', driver='GeoJSON')

ssgeo.plot()
ssgeo

### test areas.geojson

In [None]:
supersite_test = gpd.read_file('data/supersites_region_geom.geojson', driver='GeoJSON')
supersite_test.info()

In [None]:
# supersite_test[supersite_test['supersite'] == 'Boulder HS'].plot()
supersite_test[supersite_test['supersite'] == 'Altona MS'].plot()