# Create geojson file with the caucus supersite borders
    
1. create sspct (supersite DataFrame from Judi's supersite precinct file)
    - add ssname column to each precinct
2. create pctgeo (precinct GeoDataFrame with precinct geometry from current pct_area_boulder.geojson file)
3. create sspctgeo and add ssgeom column (supersite GeoDataFrame from sspct)
4. for each supersite create ssgeom around the precincts in the supersite
    - add ssgeom to each supersite in sspctgeo
5. Save files in geojson format ready to be used directly in an online map
    - GeoJSON output files for Leaflet


- NOTE: need to update precinct to Supersite geojson every time a Supersite/precinct list is modified 
    - e.g. move precincts 814 and 823 from Frasier to Manhatten

- Started: January 7, 2024
- Update: January 8, 2024 - large revision - add ssname column to sspct
- Update: January 9, 2023 - add ssgeom to sspctgeo

### Input Files

- Judi Bodinger's Caucus Supersite file with precinct column
    - supersiteDB/data/Draft Supersites-judi-20240106.xlsx
        - Supersite
        - Pct #'s
        ...
- 2022 precinct geometry file
    - data/pct_area_boulder.geojson

### Output Files
- sspct_geom.geojson
  - ssname
  - ssgeom - border around all precincts


In [27]:
import pandas as pd
import geopandas as gpd
# import fiona
# import numpy as np

## 1. Create sspct - dataframe of supersites with list of precincts in a column  
- start with Judi's spreadsheet
- add pctlist column -  list of precincts in Supersite

- sspct - dataframe of 2024 supersites  

        - Supersite
        - Region	
        - (# of Reg Dems)	
        - Forecast of  Attendees	
        - (# of Pct's)	
        - Pct #'s
        - (# of Chairs)	
        - Chair Name
        - Chair2 Name	
        - Chair3 Name	
        - chair 4 Name	
        - Chair 5 Name


In [None]:
# Read current supersite list
sspct = pd.read_excel('data/Draft Supersites-judi-20240106.xlsx', sheet_name='Draft 1-6', skiprows=3)
sspct.info()

In [29]:
cols = sspct.columns
# cols = ['Supersite', 'Region', '# of Reg Dems', 'Forecast of  Attendees',
#        '# of Pct's', 'Pct #'s', '# of Chairs', 'Chair Name', 'Chair2 Name',
#        'Chair3 Name', 'chair 4 Name', 'Chair 5 Name']

In [None]:
sspct["Pct #'s"]

### add pctlist column to sspct
- convert string of precinct numbers to list of precincts
- add pctlist to each supersite

In [31]:
# get string of precincts in each supersite
sspctnums = sspct["Pct #'s"].tolist()

# for each supersite, transform precinct strings to list of precincts
sspctlist = [pct.rstrip(',').split(',') for pct in sspctnums]
sspctlist[:2]

[['810', '817', '818', '820', '821', '828', '829', '831', '832', '833'],
 ['822', '823', '824', '825', '826', '827', '910']]

In [32]:
# add pctlist column to sspct dataframe
sspct['pctlist'] = sspctlist
sspct.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Supersite               20 non-null     object 
 1   Region                  20 non-null     object 
 2   # of Reg Dems           20 non-null     int64  
 3   Forecast of  Attendees  20 non-null     float64
 4   # of Pct's              20 non-null     int64  
 5   Pct #'s                 20 non-null     object 
 6   # of Chairs             20 non-null     int64  
 7   Chair Name              20 non-null     object 
 8   Chair2 Name             13 non-null     object 
 9   Chair3 Name             8 non-null      object 
 10  chair 4 Name            2 non-null      object 
 11  Chair 5 Name            1 non-null      object 
 12  pctlist                 20 non-null     object 
dtypes: float64(1), int64(3), object(9)
memory usage: 2.2+ KB


In [71]:
# check first supersites
sspct.loc[0,'pctlist']

## 2 Create pctgeo - supersite pct Boundary Files


In [None]:
# 2. Create GeoDataframe from file with individual precinct boundaries
# data/pct_area_boulder.geojson
pctgeo = gpd.read_file('data/pct_area_boulder.geojson', driver='GEOJSON')

# set sspct index to Pct
pctgeo = pctgeo.set_index('Pct', drop=False)

pctgeo.info()


### Check precinct data in  geodataframe
- Number of precincts = 193
- Number of Areas = 28
- Review Area_Short column
- Check # of precincts in each Area

In [35]:
pctgeo['Pct'].nunique() # 193

193

In [36]:
pctgeo.AreaShort.nunique() # 27

27

In [None]:
pctgeo.value_counts('AreaShort').sort_index()

In [38]:
pctgeo.head()

Unnamed: 0_level_0,Pct,Precinct,AreaShort,CD,SD,HD,geometry
Pct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,2,2151907002,LM-04,2,15,19,"POLYGON ((-105.05519 40.19115, -105.05518 40.1..."
3,3,2184907003,SU-01,2,18,49,"POLYGON ((-105.16920 39.92870, -105.17207 39.9..."
4,4,2151207004,BO-01,2,15,12,"POLYGON ((-105.18469 39.96263, -105.18459 39.9..."
100,100,2181207100,SU-01,2,18,12,"POLYGON ((-105.15439 39.92921, -105.15842 39.9..."
101,101,2181207101,SU-01,2,18,12,"POLYGON ((-105.14702 39.92887, -105.14736 39.9..."


### Explore adding supersite name to pctgeo

In [39]:
sspct['pctlist']

0     [810, 817, 818, 820, 821, 828, 829, 831, 832, ...
1                   [822, 823, 824, 825, 826, 827, 910]
2     [803, 804, 805, 806, 807, 808, 811, 812, 813, ...
3                                                 [913]
4     [4, 500, 501, 834, 835, 836, 837, 838, 839, 84...
5     [830, 843, 844, 845, 846, 847, 850, 851, 852, ...
6         [400, 401, 402, 403, 404, 405, 406, 407, 408]
7     [503, 504, 505, 506, 507, 508, 509, 510, 800, ...
8     [300, 301, 302, 303, 304, 305, 306, 307, 308, ...
9     [606, 607, 608, 609, 610, 611, 614, 615, 616, ...
10             [600, 601, 602, 603, 604, 605, 612, 613]
11    [626, 627, 628, 629, 643, 644, 645, 646, 617, ...
12    [2, 618, 619, 620, 621, 622, 623, 633, 634, 63...
13    [200, 201, 202, 203, 204, 205, 206, 207, 208, ...
14                                                [914]
15                                                [909]
16                            [700, 701, 702, 915, 916]
17                       [901, 902, 903, 904, 90

In [40]:
sspct['pctlist'][0][0] == '810'

True

In [41]:
pctgeo['supersite'] = None
pctgeo.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 193 entries, 002 to 916
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Pct        193 non-null    object  
 1   Precinct   193 non-null    object  
 2   AreaShort  193 non-null    object  
 3   CD         193 non-null    object  
 4   SD         193 non-null    object  
 5   HD         193 non-null    object  
 6   geometry   193 non-null    geometry
 7   supersite  0 non-null      object  
dtypes: geometry(1), object(7)
memory usage: 17.6+ KB


In [42]:
# # Test on one pct in one ss

# # get first pct in first supersite
# ss = 0
# i = 0
# pct_in_ss = sspct['pctlist'][ss][i] 
# print(pct_in_ss)  # 810

# # get supersite name
# ssname = sspct.loc[0,'Supersite'] 
# print(ssname)  # Boulder HS

# # add supersite name to pctgeo
# pctgeo.loc[pct_in_ss, 'supersite'] = ssname
# print(pctgeo.loc[pct_in_ss, :])  #  dataframe row



In [43]:
# # Test on all precincts in one ss

# # get pctlist in first supersite
# ss = 0
# pctlist = sspct['pctlist'][ss]
# print(pctlist, '\n')  # list of pcts in first supersite

# # get supersite name
# ssname = sspct.loc[ss,'Supersite'] 

# # loop through each pct in pctlist

# for p in pctlist:
    
#     print(p, ssname,'\n')  # Boulder HS

#     # add supersite name to pctgeo
#     pctgeo.loc[p, 'supersite'] = ssname
#     print(pctgeo.loc[p, :], '\n')  #  dataframe row


In [46]:
# Run on all supersites

# loop through all supersites
for ss in sspct.index:

    # get pctlist in first supersite
    # ss = 0
    pctlist = sspct['pctlist'][ss]
    # print(pctlist, '\n')  # list of pcts in first supersite

    # get supersite name
    ssname = sspct.loc[ss,'Supersite'] 

    # loop through each pct in pctlist

    for p in pctlist:
        
        # print(p, ssname,'\n')  # Boulder HS

        # add supersite name to pctgeo
        pctgeo.loc[p, 'supersite'] = ssname
        # print(pctgeo.loc[p, :], '\n')  #  dataframe row



In [47]:
pctgeo.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 196 entries, 002 to 3
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Pct        193 non-null    object  
 1   Precinct   193 non-null    object  
 2   AreaShort  193 non-null    object  
 3   CD         193 non-null    object  
 4   SD         193 non-null    object  
 5   HD         193 non-null    object  
 6   geometry   193 non-null    geometry
 7   supersite  193 non-null    object  
dtypes: geometry(1), object(7)
memory usage: 17.9+ KB


In [48]:
pctgeo.head()

Unnamed: 0_level_0,Pct,Precinct,AreaShort,CD,SD,HD,geometry,supersite
Pct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,2,2151907002,LM-04,2,15,19,"POLYGON ((-105.05519 40.19115, -105.05518 40.1...",
3,3,2184907003,SU-01,2,18,49,"POLYGON ((-105.16920 39.92870, -105.17207 39.9...",
4,4,2151207004,BO-01,2,15,12,"POLYGON ((-105.18469 39.96263, -105.18459 39.9...",
100,100,2181207100,SU-01,2,18,12,"POLYGON ((-105.15439 39.92921, -105.15842 39.9...",Eldorado K8
101,101,2181207101,SU-01,2,18,12,"POLYGON ((-105.14702 39.92887, -105.14736 39.9...",Eldorado K8


In [None]:
# write pctgeo with supersite name to pct_supersite_geom.geojson
pctgeo.to_file('data/pct_supersite.geojson', driver='GeoJSON')


### 3 Create Supersite GeoDataFrame and add geometry column 

- create ssgdf - supersite GeoDataFrame
- add ssgeom column to ssgdf

###      TEST Create pct outline for BOULDER HIGH SCHOOL SUPERSITE 

- create a geodataframe with each precinct at BHS
- create the outline geometry around all the precincts
- add the geometry to a column in the supersite dataframe

In [61]:
pctgeo
# pctgeo.loc[ss,'geometry']

Unnamed: 0_level_0,Pct,Precinct,AreaShort,CD,SD,HD,geometry,supersite
Pct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
002,002,2151907002,LM-04,2,15,19,"POLYGON ((-105.05519 40.19115, -105.05518 40.1...",
003,003,2184907003,SU-01,2,18,49,"POLYGON ((-105.16920 39.92870, -105.17207 39.9...",
004,004,2151207004,BO-01,2,15,12,"POLYGON ((-105.18469 39.96263, -105.18459 39.9...",
100,100,2181207100,SU-01,2,18,12,"POLYGON ((-105.15439 39.92921, -105.15842 39.9...",Eldorado K8
101,101,2181207101,SU-01,2,18,12,"POLYGON ((-105.14702 39.92887, -105.14736 39.9...",Eldorado K8
...,...,...,...,...,...,...,...,...
915,915,2154907915,MT-03,2,15,49,"POLYGON ((-105.24898 40.21229, -105.24935 40.2...",Lyons Middle Senior
916,916,2154907916,MT-03,2,15,49,"POLYGON ((-105.19594 40.26146, -105.19971 40.2...",Lyons Middle Senior
4,,,,,,,,Manhattan MS
2,,,,,,,,Trail Ridge MS


In [70]:
# Test create an outline of all precincts in one ss

# get pctlist in first supersite
ss = 0
ssname = sspct['Supersite'][ss]
pctlist = sspct['pctlist'][ss]
# print(ssname, pctlist, '\n')  # list of pcts in first supersite

# create an empty geodataframe to collect the supersite precints geometry
ssgdfcols = ['Pct', 'Supersite', 'pgeom']
ssgdf = gpd.GeoDataFrame(columns=ssgdfcols, geometry='pgeom')
# print(ssgdf.info())

#  add a row for each precinct in pctlist

for p in pctlist:
    
    # print(p, ssname,'\n')  # Boulder HS

    # add precinct geometry to ssgdf
    pgeom = pctgeo.loc[p,'geometry']
    # print(pgeom)
    ssgdf.loc[p, 'pgeom'] = pgeom
# print(ssgdf)

# calculate supersite border geometry

ssgeom = ssgdf.dissolve()
# ssgeom.plot()

# add supersite geometry to sspctgdf





### 4 Calculate supersite boundaries
- for each supersite
    - calculate ssgeom boundaries 
    - add ssgeom value to supersite row

In [None]:

# this sets all column names and data types correctly
# area_gdf = gpd.GeoDataFrame(area_groups.get_group('BO-01').dissolve())
# area_gdf.plot()

###   SAVE ss_pct_geom.geojson file

### REFERENCE CODE from pct_area_geom.ipynb  Create Area Boundaries
- use .dissolve() to aggregate precincts into Areas and return Area boundary geometry

In [None]:
area_groups = pctgeo.groupby('AreaShort')

In [None]:
area_groups.groups

In [None]:
# type(area_groups.get_group('BO-01'))
area_groups.get_group('BO-01')

In [None]:
# Create boundary for each Area from outline of precincts in Area
for name, group in area_groups:
    area_gdf = pd.concat([area_gdf, group.dissolve()], ignore_index=True)   

In [None]:
area_gdf.info()

In [None]:
area_gdf

In [None]:
# Delete duplicate 1st row
area_gdf.drop(0, inplace=True)

In [None]:
area_gdf = area_gdf[['AreaShort', 'geometry']]
area_gdf

In [None]:
area_gdf.plot()

In [None]:
area_gdf.to_file('../geo/geojson/areas.geojson', driver='GeoJSON')

### test areas.geojson

In [None]:
area_test = gpd.read_file('../geo/geojson/areas.geojson', driver='GeoJSON')
area_test.info()

In [None]:
area_test[area_test.AreaShort == 'BO-09'].plot()

## Transform data in dataframes

## Save transformed data in map format files