# Create geojson file with the caucus supersite borders
    
- read Judi's supersite precinct file
- group precincts by supersite
- create a border file around the each supersite precincts
- Save files in geojson format ready to be used directly in an online map
    - GeoJSON output files for Leaflet


- NOTE: need to update precinct to Supersite geojson every time a Supersite/precinct list is modified 
    - e.g. move precincts 814 and 823 from Frasier to Manhatten

- Started: January 7, 2024
- Updated: January 8, 2024 - large revision

### Input Files

- Judi Bodinger's Caucus Supersite file with precinct column
    - supersiteDB/data/Draft Supersites-judi-20240106.xlsx

        - Supersite
        - Pct #'s
        ...

### Output Files
- ss_pct_geom.geojson file 
  - supersite_pct.geojson
  - ss_name
  - geometry - border around all precincts


In [1]:
import pandas as pd
import geopandas as gpd
import fiona
# import numpy as np

## Create sspct - dataframe of supersites with list of precincts in a column  

- sspct - dataframe of 2024 supersites  

        - Supersite
        - Region	
        - (# of Reg Dems)	
        - Forecast of  Attendees	
        - (# of Pct's)	
        - Pct #'s
        - (# of Chairs)	
        - Chair Name
        - Chair2 Name	
        - Chair3 Name	
        - chair 4 Name	
        - Chair 5 Name


In [2]:
# Read current supersite list
sspct = pd.read_excel('data/Draft Supersites-judi-20240106.xlsx', sheet_name='Draft 1-6', skiprows=3)
sspct.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Supersite               20 non-null     object 
 1   Region                  20 non-null     object 
 2   # of Reg Dems           20 non-null     int64  
 3   Forecast of  Attendees  20 non-null     float64
 4   # of Pct's              20 non-null     int64  
 5   Pct #'s                 20 non-null     object 
 6   # of Chairs             20 non-null     int64  
 7   Chair Name              20 non-null     object 
 8   Chair2 Name             13 non-null     object 
 9   Chair3 Name             8 non-null      object 
 10  chair 4 Name            2 non-null      object 
 11  Chair 5 Name            1 non-null      object 
dtypes: float64(1), int64(3), object(8)
memory usage: 2.0+ KB


In [3]:
cols = sspct.columns
# cols = ['Supersite', 'Region', '# of Reg Dems', 'Forecast of  Attendees',
#        '# of Pct's', 'Pct #'s', '# of Chairs', 'Chair Name', 'Chair2 Name',
#        'Chair3 Name', 'chair 4 Name', 'Chair 5 Name']

In [4]:
sspct["Pct #'s"]

0      810,817,818,820,821,828,829,831,832,833,,,,,,,,,
1               822,823,824,825,826,827,910,,,,,,,,,,,,
2     803,804,805,806,807,808,811,812,813,814,815,81...
3                                 913,,,,,,,,,,,,,,,,,,
4     4,500,501,834,835,836,837,838,839,840,841,842,...
5     830,843,844,845,846,847,850,851,852,853,854,85...
6         400,401,402,403,404,405,406,407,408,,,,,,,,,,
7     503,504,505,506,507,508,509,510,800,801,802,80...
8     300,301,302,303,304,305,306,307,308,309,310,31...
9     606,607,608,609,610,611,614,615,616,624,625,63...
10           600,601,602,603,604,605,612,613,,,,,,,,,,,
11    626,627,628,629,643,644,645,646,617,632,641,64...
12    2,618,619,620,621,622,623,633,634,635,636,637,...
13    200,201,202,203,204,205,206,207,208,209,210,21...
14                                914,,,,,,,,,,,,,,,,,,
15                                909,,,,,,,,,,,,,,,,,,
16                    700,701,702,915,916,,,,,,,,,,,,,,
17                 901,902,903,904,905,907,,,,,,

### Create Supersite borders and save to file

In [5]:
# get string of precincts in each supersite
sspctnums = sspct["Pct #'s"].tolist()

# for each supersite, transform precinct strings to list of precincts
sspctlist = [pct.rstrip(',').split(',') for pct in sspctnums]
sspctlist[:2]

[['810', '817', '818', '820', '821', '828', '829', '831', '832', '833'],
 ['822', '823', '824', '825', '826', '827', '910']]

In [6]:
# add pctlist column to sspct dataframe
sspct['pctlist'] = sspctlist
sspct.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Supersite               20 non-null     object 
 1   Region                  20 non-null     object 
 2   # of Reg Dems           20 non-null     int64  
 3   Forecast of  Attendees  20 non-null     float64
 4   # of Pct's              20 non-null     int64  
 5   Pct #'s                 20 non-null     object 
 6   # of Chairs             20 non-null     int64  
 7   Chair Name              20 non-null     object 
 8   Chair2 Name             13 non-null     object 
 9   Chair3 Name             8 non-null      object 
 10  chair 4 Name            2 non-null      object 
 11  Chair 5 Name            1 non-null      object 
 12  pctlist                 20 non-null     object 
dtypes: float64(1), int64(3), object(9)
memory usage: 2.2+ KB


In [7]:
sspct.loc[0,'pctlist']

['810', '817', '818', '820', '821', '828', '829', '831', '832', '833']

## Create supersite pct Boundary Files

- Create ss_pct boundaries for each supersits
- Save ss_pct.geojson GeoJSON files

In [8]:
# Create GeoDataframe from file with individual precinct boundaries
# data/pct_area_boulder.geojson
pctgeo = gpd.read_file('data/pct_area_boulder.geojson', driver='GEOJSON')

# set sspct index to Pct
pctgeo = pctgeo.set_index('Pct', drop=False)

pctgeo.info()


<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 193 entries, 002 to 916
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Pct        193 non-null    object  
 1   Precinct   193 non-null    object  
 2   AreaShort  193 non-null    object  
 3   CD         193 non-null    object  
 4   SD         193 non-null    object  
 5   HD         193 non-null    object  
 6   geometry   193 non-null    geometry
dtypes: geometry(1), object(6)
memory usage: 16.1+ KB


### Check precinct data in  geodataframe
- Number of precincts = 193
- Number of Areas = 28
- Review Area_Short column
- Check # of precincts in each Area

In [9]:
pctgeo['Pct'].nunique() # 193

193

In [10]:
pctgeo.AreaShort.nunique() # 27

27

In [11]:
pctgeo.value_counts('AreaShort').sort_index()

AreaShort
BO-01     7
BO-02     5
BO-03     6
BO-04    10
BO-05     5
BO-07     6
BO-08     5
BO-09     5
BO-10     7
BO-11     6
ER-01     9
GN-01     7
LF-01     9
LF-02    11
LM-01     7
LM-02     7
LM-03     7
LM-04     9
LM-05     8
LM-06     8
LM-07    10
LV-01     7
LV-02     6
MT-01     6
MT-02     6
MT-03     6
SU-01     8
Name: count, dtype: int64

In [12]:
pctgeo.head()

Unnamed: 0_level_0,Pct,Precinct,AreaShort,CD,SD,HD,geometry
Pct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,2,2151907002,LM-04,2,15,19,"POLYGON ((-105.05519 40.19115, -105.05518 40.1..."
3,3,2184907003,SU-01,2,18,49,"POLYGON ((-105.16920 39.92870, -105.17207 39.9..."
4,4,2151207004,BO-01,2,15,12,"POLYGON ((-105.18469 39.96263, -105.18459 39.9..."
100,100,2181207100,SU-01,2,18,12,"POLYGON ((-105.15439 39.92921, -105.15842 39.9..."
101,101,2181207101,SU-01,2,18,12,"POLYGON ((-105.14702 39.92887, -105.14736 39.9..."


### Explore adding supersite name to pctgeo

In [13]:
sspct['pctlist']

0     [810, 817, 818, 820, 821, 828, 829, 831, 832, ...
1                   [822, 823, 824, 825, 826, 827, 910]
2     [803, 804, 805, 806, 807, 808, 811, 812, 813, ...
3                                                 [913]
4     [4, 500, 501, 834, 835, 836, 837, 838, 839, 84...
5     [830, 843, 844, 845, 846, 847, 850, 851, 852, ...
6         [400, 401, 402, 403, 404, 405, 406, 407, 408]
7     [503, 504, 505, 506, 507, 508, 509, 510, 800, ...
8     [300, 301, 302, 303, 304, 305, 306, 307, 308, ...
9     [606, 607, 608, 609, 610, 611, 614, 615, 616, ...
10             [600, 601, 602, 603, 604, 605, 612, 613]
11    [626, 627, 628, 629, 643, 644, 645, 646, 617, ...
12    [2, 618, 619, 620, 621, 622, 623, 633, 634, 63...
13    [200, 201, 202, 203, 204, 205, 206, 207, 208, ...
14                                                [914]
15                                                [909]
16                            [700, 701, 702, 915, 916]
17                       [901, 902, 903, 904, 90

In [14]:
sspct['pctlist'][0][0] == '810'

True

In [15]:
pctgeo['supersite'] = None
pctgeo.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 193 entries, 002 to 916
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Pct        193 non-null    object  
 1   Precinct   193 non-null    object  
 2   AreaShort  193 non-null    object  
 3   CD         193 non-null    object  
 4   SD         193 non-null    object  
 5   HD         193 non-null    object  
 6   geometry   193 non-null    geometry
 7   supersite  0 non-null      object  
dtypes: geometry(1), object(7)
memory usage: 17.6+ KB


In [16]:
# # Test on one pct in one ss

# # get first pct in first supersite
# ss = 0
# i = 0
# pct_in_ss = sspct['pctlist'][ss][i]
# # get supersite name
# ssname = sspct.loc[0,'Supersite']
# pctgeo.loc[pct_in_ss, 'supersite'] = ssname
# pctgeo.loc[pct_in_ss, :]

sspct['pctlist'][0]

['810', '817', '818', '820', '821', '828', '829', '831', '832', '833']

In [22]:
# add supersite name to all precincts

for i in sspct.index:
    # get supersite name

    ss = sspct.loc[ i, 'Supersite']
    # print(ss)
    for p in sspct['pctlist']:
        print(p)
        # set superite name in pctgeo
        pctgeo.loc[p, 'supersite'] = ss


# pctgeo.info()


['810', '817', '818', '820', '821', '828', '829', '831', '832', '833']
['822', '823', '824', '825', '826', '827', '910']
['803', '804', '805', '806', '807', '808', '811', '812', '813', '814', '815', '816', '911', '912']
['913']
['4', '500', '501', '834', '835', '836', '837', '838', '839', '840', '841', '842', '848', '849', '900']


KeyError: "['4'] not in index"

In [18]:
pctgeo.head()

Unnamed: 0_level_0,Pct,Precinct,AreaShort,CD,SD,HD,geometry,supersite
Pct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,2,2151907002,LM-04,2,15,19,"POLYGON ((-105.05519 40.19115, -105.05518 40.1...",
3,3,2184907003,SU-01,2,18,49,"POLYGON ((-105.16920 39.92870, -105.17207 39.9...",
4,4,2151207004,BO-01,2,15,12,"POLYGON ((-105.18469 39.96263, -105.18459 39.9...",
100,100,2181207100,SU-01,2,18,12,"POLYGON ((-105.15439 39.92921, -105.15842 39.9...",
101,101,2181207101,SU-01,2,18,12,"POLYGON ((-105.14702 39.92887, -105.14736 39.9...",


### Create Supersite borders and save to file

In [None]:
# get string of precincts in each supersite
sspcts = sspct["Pct #'s"].tolist()

# for each supersite, transform precinct strings to list of precincts
sspctlist = [pct.rstrip(',').split(',') for pct in sspcts]
sspctlist[:2]

In [None]:
# add pctlist column to sspct dataframe
sspct['pctlist'] = sspctlist
sspct.info()

In [None]:
sspct.loc[0,'pctlist']

In [None]:
sspct.head()

##      TEST OF BOULDER HIGH SCHOOL SUPERSITE 

In [None]:
# create GeoDataFrame for a single supersite
sspct_str_list = sspct.pctlist
ssbhs = sspct_str_list[0]
ssbhs

In [None]:
# create GeoDataFrame for a single supersite


In [None]:
# create a list with a row for each precinct at Boulder High School supersite
bhsgeo = []
for i, pct in enumerate(pctgeo['Pct']):
    # print(i,pct)
    if pct in ssbhs:
        # print(pctgeo.iloc[i,:])
        bhsgeo.append( pctgeo.iloc[i,:])

bhsgeo


In [None]:
bhsgpd = gpd.GeoDataFrame(bhsgeo)
bhsgpd

In [None]:
# create supersite boundary geometry for BHS
bhsgeom = bhsgpd.dissolve()
bhsgeom.plot()

In [None]:

# this sets all column names and data types correctly
# area_gdf = gpd.GeoDataFrame(area_groups.get_group('BO-01').dissolve())
# area_gdf.plot()

### Create Area Boundaries
- use .dissolve() to aggregate precincts into Areas and return Area boundary geometry

In [None]:
area_groups = pctgeo.groupby('AreaShort')

In [None]:
area_groups.groups

In [None]:
# type(area_groups.get_group('BO-01'))
area_groups.get_group('BO-01')

In [None]:
# Create boundary for each Area from outline of precincts in Area
for name, group in area_groups:
    area_gdf = pd.concat([area_gdf, group.dissolve()], ignore_index=True)   

In [None]:
area_gdf.info()

In [None]:
area_gdf

In [None]:
# Delete duplicate 1st row
area_gdf.drop(0, inplace=True)

In [None]:
area_gdf = area_gdf[['AreaShort', 'geometry']]
area_gdf

In [None]:
area_gdf.plot()

In [None]:
area_gdf.to_file('../geo/geojson/areas.geojson', driver='GeoJSON')

### test areas.geojson

In [None]:
area_test = gpd.read_file('../geo/geojson/areas.geojson', driver='GeoJSON')
area_test.info()

In [None]:
area_test[area_test.AreaShort == 'BO-09'].plot()

## Transform data in dataframes

## Save transformed data in map format files