# Create supersite locations list
## create list of all potential supersite venues
- Standardize supersite venue names (e.g. "Boulder HS")
- Standardize supersite columns
    - venue (standard name of site, e.g. "Boulder HS")
    - ssid (unique code for venue)
    - address
    - organization (name of organization that owns venue)
    - geometry (geopandas geometry column, e.g. Point (-105.345678,40.123456))
    - lat (latitude)
    - lon (longitude)
- Combine lists of known supersites
    - 2024 supersites
    - 2024 alternate supersites
    - 2020 supersites
- Save Supersite list to file or database
    - supersite_venues.geojson
    - supersite_venues.xlsx

## Create 2024 Supersite locations file
- Read Judi's file: data/2024_Supersite_list w Chairs & Cochairs.xlsx
- Add geolocations for each Supersite
- Write supersite locations geojson for map

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd

import supersites as ss

## create list of all potential supersites
- Standardize supersite names
- Standardize supersite columns
- Combine lists of known supersites
    - 2024 supersites
    - 2024 alternate supersites
    - 2020 supersites


In [None]:
# get list of 2024 supersites: ss_short_2024.geojson
ss2024 = gpd.read_file('data/ss_short_2024.geojson', driver='GEOJSON')

# get list of other potential supersites ss_short_2024_alternate.geojson
ssalt = gpd.read_file('data/ss_short_2024_alternate.geojson', driver='GEOJSON')

# combine all rows in ss2024 and ssalt
ssall = pd.concat([ss2024, ssalt]).sort_values('Venue').reset_index(drop=True)

# add lat and lon columns to ssall
ssall[['lat', 'lon']] = np.NaN

ssall.info()
ssall


In [None]:
ssall.crs

## Compare ssall to supersites in 2024 supersite lists
- Read Judi's supersite spreadsheet
- Merge locations from ssall
- check for missing supersites
- create file of missing supersites
- add lat and lon columns for missing supersites
- create GeoDataFrame of missing supersites
- append missing supersites to ssall 

In [None]:
# Read Judi's supersite file

supersite_input = 'data/2024_Supersite_list w Chairs & Cochairs.xlsx' # 1/13/2024
sheetname ='Recap SS & Precinct #s'

supersites = (ss.read_supersite_pct(supersite_input, sheetname)
              .assign(supersite=lambda df: df['supersite'].str.strip() )
)
supersites.info()
supersites

## add locations for each 2024 supersite
- add supersite location geometry column to supersites

In [None]:
# merge supersites DataFrame with ssall GeoDataFrame

ss_locations = pd.merge(supersites, ssall, left_on='supersite', right_on='Venue', how='left')
ss_locations.info()
ss_locations

In [None]:
# write ss_locations to excel file to add geolocations for missing venues
ss_locations.to_excel('data/ss_locations.xlsx')

## add missing supersite geometries


In [None]:
# get geometry for 2024 Supersites that are missing in ssall
ssmissing = pd.read_excel('data/ss_locations_missing.xlsx', sheet_name='missingVenues', skiprows=0,  ).dropna()


ssmissing.info()
ssmissing


In [None]:
ssmissinggdf = gpd.GeoDataFrame(ssmissing, geometry=gpd.points_from_xy(ssmissing['lon'], ssmissing['lat']), crs='EPSG:4326' )

ssmissinggdf.info()
ssmissinggdf


In [None]:
ssmissinggdf.plot()

In [None]:
# add new supersites to ssall

dropcols = ['Column1', 'supersite', 'dems',
       'attendee_forecast', 'total_precincts', 'pctlist']

ssall = pd.concat([ssall, ssmissinggdf]).drop(columns=dropcols).sort_values('Venue').reset_index(drop=True)

ssall.info()
ssall
# POINT(-105.10862252002309 40.1466293875446 )

In [None]:
# add missing lat and long to ssall
ssall = ssall.assign(
    lon=ssall['geometry'].x,
    lat=ssall['geometry'].y
)

ssall

In [None]:
ssallgdf = gpd.GeoDataFrame(ssall, geometry='geometry', crs='EPSG:4326')

ssallgdf.info()


In [None]:
ssallgdf.plot()

In [None]:

ssallgdf.to_file('data/supersite_venues.geojson', driver='GeoJSON', index=False)

ssallgdf.to_excel('data/supersite_venues.xlsx')
