# Create supersites_geom.geojson geojson file with the caucus supersite borders

#### latest run of notebook: 1/14/2024 7am

## Main Steps
1. create supersites DataFrame from Judi's supersite precinct file
    - INPUT: data/2024_Supersite_list w Chairs & Cochairs.xlsx (update: 1/13/2024)
    - OUTPUT: none
2. create pctgeo (precinct GeoDataFrame)
    - INPUT: read precinct geometry from current pct_area_boulder.geojson file
    - add supersite name column using supersites dataframe
    - OUTPUT: data/pct_supersite.geojson (save pctgeo with supersite column) 
3. create supersite border geometry and save geojson file
    - create ssgeo from pctgeo by using dissolve('supersite')
    - OUTPUT: data/supersites_region_geom.geojson (save ssgeo)

- NOTE: need to update precinct to Supersite geojson every time a Supersite/precinct list is modified 
    - e.g. move precincts 814 and 823 from Frasier to Manhatten

### Notebook History Summary
- full history see git commits
- Started: January 7, 2024
- Update: January 8, 2024 - large revision - add ssname column to supersites
- Update: January 9, 2024 - version 0.1.0 add ssgeom to supersitesgeo
- Update: January 11, 2024 - version 0.2.0 add pctlist column to supersites dataframe
- Update: January 12, 2024 - version 0.3.0 start supersite boundary geometry
- Update: January 13, 2024 - version 0.3.1 finish supersite boundary geometry

### Variables

- supersites    - DataFrame read from Judi's supersite file
- pctgeo        - GeoDataFrame read from pct_area_boulder.geojson
- sss_pcts_lists    - list of lists of all the precincts at a supersite
- ssgeo         - GeoDataFrame of supersites with region boundaries


##     ISSUES

#### FIXED Precinct format mismatch
- pct_area_boulder.geojson lists zero-filled precincts, e.g. 002, 003, 004
- Draft Supersites-judi-20240106.xlsx lists single-digit precincts, e.g. 2, 3, 4
    - notebook creates rows for both, 002 and 2, 003 and 3, 004 and 4
    - not critical failure but should be fixed. 
- WORKAROUND: drop extra rows 2, 3, 4
- FIXED: zfill precincts when reading in Judi's file


In [6]:
import pandas as pd
import geopandas as gpd

import supersites as ss
# import fiona
# import numpy as np

## 1. Create supersites - dataframe of supersites with list of precincts in a column  
- start with Judi's spreadsheet
- create pctlist from "Pct #'s" column
    - use pctstr_to_list(ss) function to create series of lists
    - add pctlist series as column on supersites

In [8]:
# check supersites
# - total_precincts matches length of pctlist for each supersites
# - 20 unique supersites in dataframe
# - all columns are 20 non-null

supersites = ss.read_supersite_pct()
supersites.info()
supersites

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   supersite          20 non-null     object
 1   dems               20 non-null     int64 
 2   attendee_forecast  20 non-null     int64 
 3   total_precincts    20 non-null     int64 
 4   pctlist            20 non-null     object
dtypes: int64(3), object(2)
memory usage: 928.0+ bytes


Unnamed: 0,supersite,dems,attendee_forecast,total_precincts,pctlist
0,Allenspark Fire Station,155,4,1,[914]
1,Altona MS,7518,188,14,"[605, 606, 607, 608, 609, 610, 611, 614, 615, ..."
2,Boulder HS,7241,181,10,"[810, 817, 818, 820, 821, 828, 829, 831, 832, ..."
3,Burlington Elementary,3351,84,7,"[600, 601, 602, 603, 604, 612, 613]"
4,Casey MS,4696,117,7,"[822, 823, 824, 825, 826, 827, 910]"
5,Centaurus HS,11327,283,20,"[300, 301, 302, 303, 304, 305, 306, 307, 308, ..."
6,Centennial MS,8884,222,13,"[803, 804, 805, 806, 807, 811, 812, 813, 814, ..."
7,Eldorado K8,3303,83,8,"[003, 100, 101, 102, 103, 104, 105, 106]"
8,Erie MS,3936,98,9,"[400, 401, 402, 403, 404, 405, 406, 407, 408]"
9,Gold Hill School,308,8,1,[909]


In [13]:
pct_ss = supersites[['supersite','pctlist']].explode('pctlist').sort_values('pctlist')
pct_ss.info()
pct_ss

<class 'pandas.core.frame.DataFrame'>
Index: 194 entries, 18 to 12
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   supersite  194 non-null    object
 1   pctlist    194 non-null    object
dtypes: object(2)
memory usage: 4.5+ KB


Unnamed: 0,supersite,pctlist
18,Trail Ridge MS,002
7,Eldorado K8,003
13,Manhattan MS,004
7,Eldorado K8,100
7,Eldorado K8,101
...,...,...
6,Centennial MS,912
10,Jamestown School,913
0,Allenspark Fire Station,914
12,Lyons Middle Senior,915


In [14]:
pct_ss['pctlist'].nunique()

193

In [16]:
pct_ss[pct_ss.duplicated(subset=['pctlist'])]

Unnamed: 0,supersite,pctlist
1,Altona MS,631


In [18]:
pct_ss[pct_ss['pctlist']=='631']

Unnamed: 0,supersite,pctlist
1,Altona MS,631
1,Altona MS,631


In [25]:
altona_pcts = supersites[supersites['supersite']=='Altona MS']['pctlist'].to_list()
altona_pcts

[['605',
  '606',
  '607',
  '608',
  '609',
  '610',
  '611',
  '614',
  '615',
  '616',
  '624',
  '625',
  '630',
  '631',
  '631']]

## 2 create pctgeo (precinct GeoDataFrame)
- read precinct geometry from current pct_area_boulder.geojson file
- add supersite name column using supersites dataframe
- save as pct_supersite.geojson

In [3]:
# 2. Create pctgeo, GeoDataframe from file with individual precinct boundaries
# data/pct_area_boulder.geojson
pctgeo = gpd.read_file('data/pct_area_boulder.geojson', driver='GEOJSON')

# set pctgeo index to Pct
pctgeo = pctgeo.set_index('Pct', drop=False)

pctgeo.info()
pctgeo.head()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 193 entries, 002 to 916
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Pct        193 non-null    object  
 1   Precinct   193 non-null    object  
 2   AreaShort  193 non-null    object  
 3   CD         193 non-null    object  
 4   SD         193 non-null    object  
 5   HD         193 non-null    object  
 6   geometry   193 non-null    geometry
dtypes: geometry(1), object(6)
memory usage: 16.1+ KB


Unnamed: 0_level_0,Pct,Precinct,AreaShort,CD,SD,HD,geometry
Pct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,2,2151907002,LM-04,2,15,19,"POLYGON ((-105.05519 40.19115, -105.05518 40.1..."
3,3,2184907003,SU-01,2,18,49,"POLYGON ((-105.16920 39.92870, -105.17207 39.9..."
4,4,2151207004,BO-01,2,15,12,"POLYGON ((-105.18469 39.96263, -105.18459 39.9..."
100,100,2181207100,SU-01,2,18,12,"POLYGON ((-105.15439 39.92921, -105.15842 39.9..."
101,101,2181207101,SU-01,2,18,12,"POLYGON ((-105.14702 39.92887, -105.14736 39.9..."


In [4]:
# FUNCTION: Add supersite name column to pctgeo

def add_ss_to_pctgeo(ssdf):

    for ss in ssdf.index:

        # get pctlist in first supersite
        pctlist = ssdf['pctlist'][ss]
        # print(pctlist, '\n')  # list of pcts in supersite

        # get supersite name
        ssname = ssdf.loc[ss,'supersite'] 

        # loop through each pct in pctlist

        for p in pctlist:
            
            # print(p, ssname,'\n')

            # add supersite name to pctgeo
            pctgeo.loc[p, 'supersite'] = ssname
            # print(pctgeo.loc[p, :], '\n')  #  dataframe row
    return



In [5]:
# add supersite column to pctgeo and populate it
pctgeo['supersite'] = ''
add_ss_to_pctgeo(supersites)

pctgeo.info() # check all columns have 193 non-null values
pctgeo.head()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 193 entries, 002 to 916
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Pct        193 non-null    object  
 1   Precinct   193 non-null    object  
 2   AreaShort  193 non-null    object  
 3   CD         193 non-null    object  
 4   SD         193 non-null    object  
 5   HD         193 non-null    object  
 6   geometry   193 non-null    geometry
 7   supersite  193 non-null    object  
dtypes: geometry(1), object(7)
memory usage: 17.6+ KB


Unnamed: 0_level_0,Pct,Precinct,AreaShort,CD,SD,HD,geometry,supersite
Pct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,2,2151907002,LM-04,2,15,19,"POLYGON ((-105.05519 40.19115, -105.05518 40.1...",Trail Ridge MS
3,3,2184907003,SU-01,2,18,49,"POLYGON ((-105.16920 39.92870, -105.17207 39.9...",Eldorado K8
4,4,2151207004,BO-01,2,15,12,"POLYGON ((-105.18469 39.96263, -105.18459 39.9...",Manhattan MS
100,100,2181207100,SU-01,2,18,12,"POLYGON ((-105.15439 39.92921, -105.15842 39.9...",Eldorado K8
101,101,2181207101,SU-01,2,18,12,"POLYGON ((-105.14702 39.92887, -105.14736 39.9...",Eldorado K8


In [None]:
# ### Check precinct data in  geodataframe
# - Number of precincts = 193
# - Number of Areas = 27
# - Check precincts in each Area

pctgeo['Pct'].nunique() # 193
pctgeo.AreaShort.nunique() # 27
pctgeo.value_counts('AreaShort').sort_index()

In [None]:
# check # of supersites - expect 20
pctgeo['supersite'].nunique()

# check precincts per supersite
pctgeo['supersite'].value_counts()

In [None]:
# save pctgeo with supersite column to file 
pctgeo.to_file('data/pct_supersite.geojson', driver='GeoJSON', index=False)

### 3 Create Supersite GeoDataFrame, ssgeo, and add geometry column 

- create ssgeo by dissolving pctgeo('supersite')
- save ssgeo to supersites_region_geom.geojson

In [None]:
# Create ssgeo: Supersite GeoDataFrame with supersite boundaries
#   pctgeo.dissolve('supersite') 
#      group precincts by supersite 
#      combine group precinct geometry into supersite geometry

ssgeo = pctgeo.dissolve('supersite')
ssgeo = ssgeo['geometry']
ssgeo.to_file('data/supersites_region_geom.geojson', driver='GeoJSON')

ssgeo.plot()
ssgeo

### test areas.geojson

In [None]:
supersite_test = gpd.read_file('data/supersites_region_geom.geojson', driver='GeoJSON')
supersite_test.info()

In [None]:
# supersite_test[supersite_test['supersite'] == 'Boulder HS'].plot()
supersite_test[supersite_test['supersite'] == 'Altona MS'].plot()