In [1]:
import os
import pandas as pd
import geopandas as gpd
import numpy as np
import plotly.express as px
import sqlite3

In [2]:
# Source/working directories, geopackage, layers and shapefiles
work_dir = '/Users/arbailey/natcap/idb/data/work/sargassum/shore_segments'
gpkg = 'shoreline_segments.gpkg'
shore_gpkg = os.path.join(work_dir, gpkg)
print(shore_gpkg)

/Users/arbailey/natcap/idb/data/work/sargassum/shore_segments/shoreline_segments.gpkg


# Input Layers -- Shoreline segments, points, and Point Sampled Population

In [3]:
# Shoreline Segments and points
# Import shoreline points and shoreline segments
shore_segments_gdf = gpd.read_file(shore_gpkg, layer='shoreQR_segments')
shore_points_gdf = gpd.read_file(shore_gpkg, layer='shoreQR_50m_pts')
print(shore_segments_gdf.dtypes)
print(shore_points_gdf.crs)
shore_points_gdf

type_geomorph      object
length_km         float64
seg_id              int64
shore_desc         object
desc_abbrev        object
geometry         geometry
dtype: object
{'init': 'epsg:32616'}


Unnamed: 0,shore_desc,desc_abbrev,distance,shore_ptid,type_geomorph,seg_id,nearest_x,nearest_y,geometry
0,Isla Mujeres,imu,10.0,imu_000010,sand,190,2.351487e+06,526079.020764,POINT (526079.021 2351487.282)
1,Isla Mujeres,imu,60.0,imu_000060,sand,190,2.351521e+06,526069.071721,POINT (526069.072 2351520.957)
2,Isla Mujeres,imu,110.0,imu_000110,sand,190,2.351481e+06,526050.024372,POINT (526050.024 2351480.878)
3,Isla Mujeres,imu,160.0,imu_000160,sand,190,2.351494e+06,526015.595971,POINT (526015.596 2351494.100)
4,Isla Mujeres,imu,210.0,imu_000210,sand,190,2.351456e+06,526010.366221,POINT (526010.366 2351455.699)
...,...,...,...,...,...,...,...,...,...
26102,Cozumel,coz,133360.0,coz_133360,cliff or rocky,187,2.271962e+06,514841.054102,POINT (514841.054 2271962.273)
26103,Cozumel,coz,133410.0,coz_133410,cliff or rocky,187,2.272006e+06,514817.592981,POINT (514817.593 2272006.427)
26104,Cozumel,coz,133460.0,coz_133460,cliff or rocky,187,2.272051e+06,514794.066007,POINT (514794.066 2272050.519)
26105,Cozumel,coz,133510.0,coz_133510,cliff or rocky,187,2.272076e+06,514751.274532,POINT (514751.275 2272076.382)


In [4]:
# Point sampled World Pop Data (output from Google Earth)
shoreQR_50m_pts_wp_source = '/Users/arbailey/Google Drive File Stream/My Drive/geeout/shoreQR_50m_pts_wp.geojson'
shoreQR_50m_pts_wp_norm_source = '/Users/arbailey/Google Drive File Stream/My Drive/geeout/shoreQR_50m_pts_wp_norm.geojson'
wp_pts_gdf = gpd.read_file(shoreQR_50m_pts_wp_source)
wp_pts_norm_gdf = gpd.read_file(shoreQR_50m_pts_wp_norm_source)
print(wp_pts_gdf.crs)

{'init': 'epsg:4326'}


In [5]:
wp_pts_gdf

Unnamed: 0,id,BeachTypes,MEX_2015_population,MEX_2016_population,MEX_2017_population,MEX_2018_population,MEX_2019_population,MEX_2020_population,TipoCosta,desc_abbre,distance,fid,length_km,nearest_x,nearest_y,seg_id,shore_desc,shore_ptid,type_geomo,geometry
0,00000000000000005629,Infraestructure,2.001156,2.171475,2.268671,2.409169,2.591065,2.764358,Proteccion artificial,qrm,633160.0,13287,0.407326,2.266580e+06,476561.890870,42,QR mainland,qrm_633160,artificial,POINT (-87.22478 20.49775)
1,0000000000000000562a,Infraestructure,2.001156,2.171475,2.268671,2.409169,2.591065,2.764358,Proteccion artificial,qrm,633210.0,13288,0.407326,2.266530e+06,476563.641307,42,QR mainland,qrm_633210,artificial,POINT (-87.22476 20.49730)
2,0000000000000000562b,Infraestructure,,,,,,,Proteccion artificial,qrm,633260.0,13289,0.407326,2.266483e+06,476573.202228,42,QR mainland,qrm_633260,artificial,POINT (-87.22467 20.49688)
3,0000000000000000562c,Infraestructure,2.001156,2.171475,2.268671,2.409169,2.591065,2.764358,Proteccion artificial,qrm,633310.0,13290,0.407326,2.266526e+06,476580.967375,42,QR mainland,qrm_633310,artificial,POINT (-87.22459 20.49726)
4,0000000000000000562d,Infraestructure,,,,,,,Proteccion artificial,qrm,633360.0,13291,0.407326,2.266506e+06,476614.205614,42,QR mainland,qrm_633360,artificial,POINT (-87.22427 20.49708)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26102,00000000000000003be5,Mixed coast (Mud and sand),0.002826,0.001129,0.001249,0.001292,0.001362,0.001220,Acantilado o rocosa,coz,26560.0,23967,3.554919,2.274303e+06,524354.974656,103,Cozumel,coz_026560,mixed mud and sand,POINT (-86.76632 20.56752)
26103,00000000000000003be6,Mixed coast (Mud and sand),0.002826,0.001129,0.001249,0.001292,0.001362,0.001220,Acantilado o rocosa,coz,26610.0,23968,3.554919,2.274306e+06,524305.269319,103,Cozumel,coz_026610,mixed mud and sand,POINT (-86.76680 20.56755)
26104,00000000000000003be7,Mixed coast (Mud and sand),0.002826,0.001129,0.001249,0.001292,0.001362,0.001220,Acantilado o rocosa,coz,26660.0,23969,3.554919,2.274308e+06,524255.768657,103,Cozumel,coz_026660,mixed mud and sand,POINT (-86.76727 20.56757)
26105,00000000000000003be8,Mixed coast (Mud and sand),0.002826,0.001129,0.001249,0.001292,0.001362,0.001220,Acantilado o rocosa,coz,26710.0,23970,3.554919,2.274355e+06,524241.886448,103,Cozumel,coz_026710,mixed mud and sand,POINT (-86.76741 20.56800)


In [6]:
wp_pts_norm_gdf

Unnamed: 0,id,BeachTypes,TipoCosta,desc_abbre,distance,fid,first,length_km,nearest_x,nearest_y,seg_id,shore_desc,shore_ptid,type_geomo,wpyear,geometry
0,1_00000000000000005629,Infraestructure,Proteccion artificial,qrm,633160.0,13287,2.764358,0.407326,2.266580e+06,476561.890870,42,QR mainland,qrm_633160,artificial,2020.0,POINT (-87.22478 20.49775)
1,1_0000000000000000562a,Infraestructure,Proteccion artificial,qrm,633210.0,13288,2.764358,0.407326,2.266530e+06,476563.641307,42,QR mainland,qrm_633210,artificial,2020.0,POINT (-87.22476 20.49730)
2,1_0000000000000000562b,Infraestructure,Proteccion artificial,qrm,633260.0,13289,,0.407326,2.266483e+06,476573.202228,42,QR mainland,qrm_633260,artificial,2020.0,POINT (-87.22467 20.49688)
3,1_0000000000000000562c,Infraestructure,Proteccion artificial,qrm,633310.0,13290,2.764358,0.407326,2.266526e+06,476580.967375,42,QR mainland,qrm_633310,artificial,2020.0,POINT (-87.22459 20.49726)
4,1_0000000000000000562d,Infraestructure,Proteccion artificial,qrm,633360.0,13291,,0.407326,2.266506e+06,476614.205614,42,QR mainland,qrm_633360,artificial,2020.0,POINT (-87.22427 20.49708)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156637,2_2_2_2_2_00000000000000003be5,Mixed coast (Mud and sand),Acantilado o rocosa,coz,26560.0,23967,0.002826,3.554919,2.274303e+06,524354.974656,103,Cozumel,coz_026560,mixed mud and sand,2015.0,POINT (-86.76632 20.56752)
156638,2_2_2_2_2_00000000000000003be6,Mixed coast (Mud and sand),Acantilado o rocosa,coz,26610.0,23968,0.002826,3.554919,2.274306e+06,524305.269319,103,Cozumel,coz_026610,mixed mud and sand,2015.0,POINT (-86.76680 20.56755)
156639,2_2_2_2_2_00000000000000003be7,Mixed coast (Mud and sand),Acantilado o rocosa,coz,26660.0,23969,0.002826,3.554919,2.274308e+06,524255.768657,103,Cozumel,coz_026660,mixed mud and sand,2015.0,POINT (-86.76727 20.56757)
156640,2_2_2_2_2_00000000000000003be8,Mixed coast (Mud and sand),Acantilado o rocosa,coz,26710.0,23970,0.002826,3.554919,2.274355e+06,524241.886448,103,Cozumel,coz_026710,mixed mud and sand,2015.0,POINT (-86.76741 20.56800)


# Reproject and clean up Population point data GDFs

In [7]:
# Columns to keep and rename
cols_keep = ['shore_ptid', 
             'seg_id', 
             'MEX_2015_population', 
             'MEX_2016_population',
             'MEX_2017_population',
             'MEX_2018_population',
             'MEX_2019_population',
             'MEX_2020_population',
             'geometry',
            ]
cols_rename = {
    'MEX_2015_population':'pop2015', 
    'MEX_2016_population':'pop2016', 
    'MEX_2017_population':'pop2017',
    'MEX_2018_population':'pop2018',
    'MEX_2019_population':'pop2019',    
    'MEX_2020_population':'pop2020',    
}
cols_norm_keep = ['shore_ptid', 
             'seg_id', 
             'wpyear', 
             'first',
             'geometry',
            ]
cols_norm_rename = {'first':'pop_ha'}

In [8]:
# Reproject and subset columns - wide table
pts_gdf = wp_pts_gdf.to_crs(shore_points_gdf.crs)
pts_gdf = pts_gdf[cols_keep]
pts_gdf.rename(columns=cols_rename, inplace=True)
pts_gdf

Unnamed: 0,shore_ptid,seg_id,pop2015,pop2016,pop2017,pop2018,pop2019,pop2020,geometry
0,qrm_633160,42,2.001156,2.171475,2.268671,2.409169,2.591065,2.764358,POINT (476561.729 2266579.636)
1,qrm_633210,42,2.001156,2.171475,2.268671,2.409169,2.591065,2.764358,POINT (476563.715 2266529.997)
2,qrm_633260,42,,,,,,,POINT (476573.146 2266483.337)
3,qrm_633310,42,2.001156,2.171475,2.268671,2.409169,2.591065,2.764358,POINT (476581.088 2266526.026)
4,qrm_633360,42,,,,,,,POINT (476614.346 2266505.674)
...,...,...,...,...,...,...,...,...,...
26102,coz_026560,103,0.002826,0.001129,0.001249,0.001292,0.001362,0.001220,POINT (524355.153 2274302.888)
26103,coz_026610,103,0.002826,0.001129,0.001249,0.001292,0.001362,0.001220,POINT (524305.514 2274305.866)
26104,coz_026660,103,0.002826,0.001129,0.001249,0.001292,0.001362,0.001220,POINT (524255.876 2274307.851)
26105,coz_026710,103,0.002826,0.001129,0.001249,0.001292,0.001362,0.001220,POINT (524241.977 2274355.504)


In [9]:
# Reproject and subset columns - normalized table
pts_norm_gdf = wp_pts_norm_gdf.to_crs(shore_points_gdf.crs)
pts_norm_gdf = pts_norm_gdf[cols_norm_keep]
pts_norm_gdf.rename(columns=cols_norm_rename, inplace=True)
pts_norm_gdf

Unnamed: 0,shore_ptid,seg_id,wpyear,pop_ha,geometry
0,qrm_633160,42,2020.0,2.764358,POINT (476561.729 2266579.636)
1,qrm_633210,42,2020.0,2.764358,POINT (476563.715 2266529.997)
2,qrm_633260,42,2020.0,,POINT (476573.146 2266483.337)
3,qrm_633310,42,2020.0,2.764358,POINT (476581.088 2266526.026)
4,qrm_633360,42,2020.0,,POINT (476614.346 2266505.674)
...,...,...,...,...,...
156637,coz_026560,103,2015.0,0.002826,POINT (524355.153 2274302.888)
156638,coz_026610,103,2015.0,0.002826,POINT (524305.514 2274305.866)
156639,coz_026660,103,2015.0,0.002826,POINT (524255.876 2274307.851)
156640,coz_026710,103,2015.0,0.002826,POINT (524241.977 2274355.504)


In [10]:
# Modify year column to integer
# pts_norm_gdf.dtypes
# pts_norm_gdf['wpyear'] = pts_norm_gdf['wpyear'].astype(float)
# pts_norm_gdf['wpyear'] = pts_norm_gdf['wpyear'].astype(int)
pts_norm_gdf['wpyear'] = pts_norm_gdf['wpyear'].str.slice(0,4)
pts_norm_gdf

Unnamed: 0,shore_ptid,seg_id,wpyear,pop_ha,geometry
0,qrm_633160,42,2020,2.764358,POINT (476561.729 2266579.636)
1,qrm_633210,42,2020,2.764358,POINT (476563.715 2266529.997)
2,qrm_633260,42,2020,,POINT (476573.146 2266483.337)
3,qrm_633310,42,2020,2.764358,POINT (476581.088 2266526.026)
4,qrm_633360,42,2020,,POINT (476614.346 2266505.674)
...,...,...,...,...,...
156637,coz_026560,103,2015,0.002826,POINT (524355.153 2274302.888)
156638,coz_026610,103,2015,0.002826,POINT (524305.514 2274305.866)
156639,coz_026660,103,2015,0.002826,POINT (524255.876 2274307.851)
156640,coz_026710,103,2015,0.002826,POINT (524241.977 2274355.504)


# Summarize population by segments

## Normalized

In [11]:
# Mean and standard deviation of population by Segment and year
# Assign custom field names to grouped columns
seg_year_pop_norm = pts_norm_gdf.groupby(
    ['seg_id','wpyear']).agg(
    pop_mean=('pop_ha', np.mean),
    pop_std=('pop_ha', np.std),
    ).reset_index()
seg_year_pop_norm

Unnamed: 0,seg_id,wpyear,pop_mean,pop_std
0,1,2015,2.934075,2.215845
1,1,2016,3.228393,2.448695
2,1,2017,3.388048,2.678372
3,1,2018,3.436063,2.759110
4,1,2019,3.575080,2.787972
...,...,...,...,...
1177,197,2016,2.528524,1.095212
1178,197,2017,2.679847,1.153038
1179,197,2018,2.828826,1.226984
1180,197,2019,2.956032,1.262459


In [12]:
# Count Unique values of Radiance by segment/date
grouped_seg_date_pop_df = pts_norm_gdf.groupby(
    ['seg_id', 'wpyear', 'pop_ha']).agg(
    unique_pop_count=('seg_id', np.size),).reset_index()
grouped_seg_date_pop_df
print(grouped_seg_date_pop_df)

# Number of pixels (unique radiance values) by segment/date
pixels_per_seg_date_df = grouped_seg_date_pop_df.groupby(
    ['seg_id', 'wpyear']).agg(
    pop_pixels=('seg_id', np.size),).reset_index()
pixels_per_seg_date_df
print(pixels_per_seg_date_df)

# Number of pixels by segment
# There are some segment/dates with a differing number of pixels - take the max 
#   assume that at some date, all pixels have valid values and that they are unique for each pixel
pixels_per_seg_df = pixels_per_seg_date_df.groupby(
    ['seg_id']).agg(
    pop_pixels=('pop_pixels', np.max)).reset_index()
pixels_per_seg_df

       seg_id wpyear    pop_ha  unique_pop_count
0           1   2015  0.370555                 7
1           1   2015  1.344467                 4
2           1   2015  1.509295                 1
3           1   2015  2.164230                11
4           1   2015  4.062676                12
...       ...    ...       ...               ...
13055     197   2019  2.591065                 5
13056     197   2019  3.873566                10
13057     197   2020  0.579059                 3
13058     197   2020  2.764358                 5
13059     197   2020  4.031336                10

[13060 rows x 4 columns]
      seg_id wpyear  pop_pixels
0          1   2015           6
1          1   2016           6
2          1   2017           6
3          1   2018           6
4          1   2019           6
...      ...    ...         ...
1159     197   2016           3
1160     197   2017           3
1161     197   2018           3
1162     197   2019           3
1163     197   2020           3

[

Unnamed: 0,seg_id,pop_pixels
0,1,6
1,2,1
2,3,7
3,4,13
4,5,3
...,...,...
189,192,9
190,193,8
191,194,19
192,195,20


## Wide format - Pivot Table

In [13]:
# Do it as pivot table where aggregation happens at the same time
# slc_norm_gdf['yrmo'] = slc_norm_gdf['ntldate'].str.slice(0,6)
pts_norm_gdf['pop_'] = pts_norm_gdf['pop_ha']
print(pts_norm_gdf.head())

seg_single_pop_mean = pd.pivot_table(pts_norm_gdf, values=['pop_'], index=['seg_id'], columns=['wpyear'],
                    aggfunc={'pop_': [np.mean, np.std]})
# concatenate aggregate columns into a single column name
# If you use an underscore to join, it joins all characters instead of each word (string in the tuple)
seg_single_pop_mean.columns = [''.join(x) for x in seg_single_pop_mean.columns.ravel()]
seg_single_pop_mean = seg_single_pop_mean.reset_index()
seg_single_pop_mean

   shore_ptid  seg_id wpyear    pop_ha                        geometry  \
0  qrm_633160      42   2020  2.764358  POINT (476561.729 2266579.636)   
1  qrm_633210      42   2020  2.764358  POINT (476563.715 2266529.997)   
2  qrm_633260      42   2020       NaN  POINT (476573.146 2266483.337)   
3  qrm_633310      42   2020  2.764358  POINT (476581.088 2266526.026)   
4  qrm_633360      42   2020       NaN  POINT (476614.346 2266505.674)   

       pop_  
0  2.764358  
1  2.764358  
2       NaN  
3  2.764358  
4       NaN  


Unnamed: 0,seg_id,pop_mean2015,pop_mean2016,pop_mean2017,pop_mean2018,pop_mean2019,pop_mean2020,pop_std2015,pop_std2016,pop_std2017,pop_std2018,pop_std2019,pop_std2020
0,1,2.934075,3.228393,3.388048,3.436063,3.575080,3.739626,2.215845,2.448695,2.678372,2.759110,2.787972,2.814124
1,2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,3,0.000206,0.000289,0.000310,0.000331,0.000321,0.000329,0.000017,0.000152,0.000173,0.000212,0.000203,0.000213
3,4,0.002393,0.001792,0.001805,0.001801,0.001852,0.001906,0.001352,0.000635,0.000654,0.000567,0.000521,0.000573
4,5,0.004968,0.011547,0.011573,0.011616,0.012341,0.011575,0.000782,0.002393,0.002501,0.002674,0.002417,0.002004
...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,192,0.347113,0.342331,0.347800,0.351047,0.383922,0.387873,0.525180,0.513250,0.554220,0.557110,0.568724,0.600823
190,193,0.003591,0.002916,0.003174,0.003336,0.003707,0.003548,0.000749,0.001362,0.001459,0.001629,0.001664,0.001622
191,194,0.087949,0.115809,0.117799,0.122232,0.124896,0.127175,0.139563,0.190289,0.193540,0.195412,0.197953,0.198084
192,195,1.175565,1.237490,1.268613,1.317193,1.350337,1.374289,1.703277,1.752987,1.819757,1.876659,1.923799,1.977388


# Joins and Export

In [14]:
# Join data frames -- segments, pixels/segment, population mean & std dev by year
dfs = [df.set_index(['seg_id']) for df in [shore_segments_gdf, pixels_per_seg_df, seg_single_pop_mean]]
segment_popatts_gdf = pd.concat(dfs, axis=1).reset_index()
segment_popatts_gdf.fillna(value={'pop_pixels':0}, inplace=True)
segment_popatts_gdf

Unnamed: 0,seg_id,type_geomorph,length_km,shore_desc,desc_abbrev,geometry,pop_pixels,pop_mean2015,pop_mean2016,pop_mean2017,pop_mean2018,pop_mean2019,pop_mean2020,pop_std2015,pop_std2016,pop_std2017,pop_std2018,pop_std2019,pop_std2020
0,1,cliff or rocky,2.188708,Isla Mujeres,imu,"MULTILINESTRING ((530027.596 2344472.852, 5300...",6.0,2.934075,3.228393,3.388048,3.436063,3.575080,3.739626,2.215845,2.448695,2.678372,2.759110,2.787972,2.814124
1,2,sand,3.738697,Isla Contoy,ico,"MULTILINESTRING ((521010.617 2378995.910, 5210...",1.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,3,mixed mud and sand,5.758523,QR mainland,qrm,"MULTILINESTRING ((439281.990 2130747.294, 4392...",7.0,0.000206,0.000289,0.000310,0.000331,0.000321,0.000329,0.000017,0.000152,0.000173,0.000212,0.000203,0.000213
3,4,sand,5.622333,QR mainland,qrm,"MULTILINESTRING ((433485.994 2090201.431, 4334...",13.0,0.002393,0.001792,0.001805,0.001801,0.001852,0.001906,0.001352,0.000635,0.000654,0.000567,0.000521,0.000573
4,5,sand,2.136723,QR mainland,qrm,"MULTILINESTRING ((410987.143 2013924.228, 4109...",3.0,0.004968,0.011547,0.011573,0.011616,0.012341,0.011575,0.000782,0.002393,0.002501,0.002674,0.002417,0.002004
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,193,cliff or rocky,5.019248,QR mainland,qrm,"MULTILINESTRING ((444590.092 2376499.228, 4447...",8.0,0.003591,0.002916,0.003174,0.003336,0.003707,0.003548,0.000749,0.001362,0.001459,0.001629,0.001664,0.001622
193,194,sand,8.177061,Holbox,hol,"MULTILINESTRING ((488231.217 2388467.313, 4882...",19.0,0.087949,0.115809,0.117799,0.122232,0.124896,0.127175,0.139563,0.190289,0.193540,0.195412,0.197953,0.198084
194,195,sand,11.078166,QR mainland,qrm,"MULTILINESTRING ((516830.778 2314283.226, 5168...",20.0,1.175565,1.237490,1.268613,1.317193,1.350337,1.374289,1.703277,1.752987,1.819757,1.876659,1.923799,1.977388
195,196,sand,0.784313,Isla de la Pasion,ipa,"MULTILINESTRING ((513991.173 2272135.231, 5139...",0.0,,,,,,,,,,,,


## Segment exports

In [21]:
# Export single segment mean radiance by month to non-spatial data in Geopackage
with sqlite3.connect(shore_gpkg) as conn:
    seg_year_pop_norm.to_sql('seg_multiple_pop_mean', conn, if_exists='replace', index=False)

In [22]:
# Export segments with Nightime lights attributes
segment_popatts_gdf.to_file(shore_gpkg, layer='shoreQR_segments_pop', driver="GPKG")

## Point exports

In [20]:
# Export points with Population attributes
pts_gdf.to_file(shore_gpkg, layer='shoreQR_50m_pts_pop', driver="GPKG")
pts_norm_gdf.to_file(shore_gpkg, layer='shoreQR_50m_pts_pop_norm', driver="GPKG")