# Create supersites_geom.geojson geojson file with the caucus supersite borders

#### latest run of notebook: 1/14/2024 7am

## Main Steps
1. create supersites DataFrame from Judi's supersite precinct file
    - INPUT: data/2024_Supersite_list w Chairs & Cochairs.xlsx (update: 1/13/2024)
    - supersites- DataFrame read from Judi's supersite file
    - OUTPUT: none
2. create pctgeo (precinct GeoDataFrame)
    - INPUT: read precinct geometry from current pct_area_boulder.geojson file
    - add supersite name column using supersites dataframe
    - OUTPUT: data/pct_supersite.geojson (save pctgeo with supersite column) 
3. create ssgeo (supersite border geometry) and save geojson file
    - INPUT: pctgeo GeoDataFrame
    - create ssgeo border_geom
        - from pctgeo by using dissolve('supersite')
    - OUTPUT: data/supersites_region_geom.geojson (save ssgeo)
4. add supersite location information and save file
    - from supersite_venues_all_years.xlsx
        - address
        - website
        - google map link
        - location geometry
    - save to supersite_venues.xlsx worksheet=2024
    - save to supersite_venues_2024.geojson

- NOTE: need to update precinct to Supersite geojson every time a Supersite/precinct list is modified 
    - e.g. move precincts 814 and 823 from Frasier to Manhatten

### Notebook History Summary
- full history see git commits
- Started: January 7, 2024
- Update: January 8, 2024 - large revision - add ssname column to supersites
- Update: January 9, 2024 - version 0.1.0 add ssgeom to supersitesgeo
- Update: January 11, 2024 - version 0.2.0 add pctlist column to supersites dataframe
- Update: January 12, 2024 - version 0.3.0 start supersite boundary geometry
- Update: January 13, 2024 - version 0.3.1 finish supersite boundary geometry
- Update: January 19, 2024 - version 0.4.0 add supersite locations


##     ISSUES

#### FIXED Precinct format mismatch
- pct_area_boulder.geojson lists zero-filled precincts, e.g. 002, 003, 004
- Draft Supersites-judi-20240106.xlsx lists single-digit precincts, e.g. 2, 3, 4
    - notebook creates rows for both, 002 and 2, 003 and 3, 004 and 4
    - not critical failure but should be fixed. 
- WORKAROUND: drop extra rows 2, 3, 4
- FIXED: zfill precincts when reading in Judi's file


In [None]:
import pandas as pd
import geopandas as gpd

import supersites as ss
# import fiona
# import numpy as np

## 1. Create supersites - dataframe of supersites with list of precincts in a column  
- start with Judi's spreadsheet
- create pctlist from "Pct #'s" column
    - use pctstr_to_list(ss) function to create series of lists
    - add pctlist series as column on supersites

In [None]:
# check supersites
# - total_precincts matches length of pctlist for each supersites
# - 20 unique supersites in dataframe
# - all columns are 20 non-null


# supersite_input = 'data/Draft Recap Supersites, Precincts & Chairs .xlsx' # 1/13/2024
supersite_input = 'data/Draft Recap Supersites, Precincts & Chairs .xlsx' # 1/19/2024
sheetname ='Draft 1-19'

supersites = ss.read_supersite_pct(supersite_input, sheetname)
supersites.info()
supersites

In [None]:
# pct_ss['pctlist'].nunique()

In [None]:
# pct_ss[pct_ss.duplicated(subset=['pctlist'])]
# pct_ss[pct_ss.duplicated()]

In [None]:
# pct_ss[pct_ss['pctlist']=='631']

In [None]:
# altona_pcts = supersites[supersites['supersite']=='Altona MS']['pctlist'].to_list()
# altona_pcts

## 2 create pctgeo (precinct GeoDataFrame)
- read precinct geometry from current pct_area_boulder.geojson file
- add supersite name column using supersites dataframe
- save as pct_supersite.geojson

In [None]:
# 2. Create pctgeo, GeoDataframe from file with individual precinct boundaries
# data/pct_area_boulder.geojson
pctgeo = gpd.read_file('data/pct_area_boulder.geojson', driver='GEOJSON')

# set pctgeo index to Pct
pctgeo = pctgeo.set_index('Pct', drop=False)

pctgeo.info()
pctgeo.head()

In [None]:
# FUNCTION: Add supersite name column to pctgeo

def add_ss_to_pctgeo(ssdf):

    for ss in ssdf.index:

        # get pctlist in first supersite
        pctlist = ssdf['pctlist'][ss]
        # print(pctlist, '\n')  # list of pcts in supersite

        # get supersite name
        ssname = ssdf.loc[ss,'supersite'] 

        # loop through each pct in pctlist

        for p in pctlist:
            
            # print(p, ssname,'\n')

            # add supersite name to pctgeo
            pctgeo.loc[p, 'supersite'] = ssname
            # print(pctgeo.loc[p, :], '\n')  #  dataframe row
    return



In [None]:
# add supersite column to pctgeo and populate it
pctgeo['supersite'] = ''
add_ss_to_pctgeo(supersites)

pctgeo.info() # check all columns have 193 non-null values
pctgeo.head()

In [None]:
# ### Check precinct data in  geodataframe
# - Number of precincts = 193
# - Number of Areas = 27
# - Check precincts in each Area

pctgeo['Pct'].nunique() # 193
# pctgeo.AreaShort.nunique() # 27
# pctgeo.value_counts('AreaShort').sort_index()

In [None]:
# check # of supersites - expect 20
pctgeo['supersite'].nunique()

# check precincts per supersite
pctgeo['supersite'].value_counts()

In [None]:
# save pctgeo with supersite column to file 
pctgeo.to_file('data/pct_supersite.geojson', driver='GeoJSON', index=False)

### 3 Create Supersite GeoDataFrame, ssgeo, and add geometry column 

- create ssgeo by dissolving pctgeo('supersite')
- save ssgeo to supersites_region_geom.geojson

In [None]:
# Create ssgeo: Supersite GeoDataFrame with supersite boundaries
#   pctgeo.dissolve('supersite') 
#      group precincts by supersite 
#      combine group precinct geometry into supersite geometry

ssgeo = pctgeo.dissolve('supersite')
ssgeo = ssgeo.reset_index()
ssgeo = ssgeo[['supersite','geometry']]
ssgeo['supersite'] = ssgeo['supersite'].str.strip()

ssgeo.to_file('data/supersites_region_geom.geojson', driver='GeoJSON')

ssgeo.plot()
ssgeo

### test supersite_region_geom.geojson

In [None]:
supersite_test = gpd.read_file('data/supersites_region_geom.geojson', driver='GeoJSON')
supersite_test.info()

In [None]:
# supersite_test[supersite_test['supersite'] == 'Boulder HS'].plot()
# supersite_test[supersite_test['supersite'] == 'Allenspark Fire Station'].plot()
supersite_test[supersite_test['supersite'] == 'New Vista'].plot()

## 4 ADD LOCATION COLUMN TO 2024 SUPERSITES
- ssgeo GeoDataFrame
    - add location_geom column from supersite_venues_all_years.xlsx
    - copy geometry column to border
    - set_geometry('border') to border

In [None]:
ssgeo.info()

In [None]:
ssgeo

In [None]:
# read in supersite_venues.xlsx
venues = pd.read_excel('data/venues_all_years_fixed.xlsx')

venues.info()
venues

In [None]:
venues['Venue'] = venues['Venue'].str.strip()

In [None]:
# create GeoDataFrame with location geometry from lon and lat columns
vengdf = gpd.GeoDataFrame(venues, geometry=gpd.points_from_xy(venues['lon'], venues['lat']), crs='EPSG:4326' )

vengdf.info()
# vengdf.plot()

In [None]:
vengdf.to_excel('data/venues_all_years_fixed.xlsx')

In [None]:
# merge in location information to venues2024
ss2024 = pd.merge(ssgeo, vengdf, left_on='supersite', right_on='Venue', how='left')
ss2024 = ss2024.rename(columns={'geometry_x':'border_geom', 'geometry_y': 'location_geom'})
ss2024 = ss2024.set_geometry('location_geom')
ss2024 = ss2024.drop(columns=['geometry-invalid'])
# ss2024 = ss2024.set_geometry('border_geom')

ss2024 = ss2024.set_crs('EPSG:4326')

ss2024.info()
ss2024

In [None]:
ss2024.plot()

In [None]:
ss2024.set_geometry('border_geom').plot()

In [None]:

ss2024.to_excel('data/supersites_geom_2024.xlsx')

In [None]:
# save supersite locations in geojson file

ss2024.set_geometry('location_geom')
ss2024.drop(columns=['border_geom']).to_file('data/supersites_location_geom_2024.geojson', driver='GeoJSON')

In [None]:
ss2024gdf = gpd.read_file('data/supersites_location_geom_2024.geojson', driver='GeoJSON')

ss2024gdf.info()
ss2024gdf.plot()
