# Match coastlines to GADM boundaries and geomorphology

* **Products used:** [DE Africa Coastlines]()



### Load packages
Import Python packages that are used for the analysis.

In [1]:
import geopandas as gpd
import numpy as np
from shapely import box
import os

In [2]:
!ls -lrth ../data/

total 14737448
-rw-rw-rw-@ 1 Fangyuan  staff   171M May 13  2022 CoastalGeomorphology.zip
-rw-rw-r--  1 Fangyuan  staff   344K Jul 19  2022 gadm41_NGA_1.json
-rwx------@ 1 Fangyuan  staff   369M Mar 27  2023 [31mgadm_africa.gpkg[m[m
-rw-r--r--@ 1 Fangyuan  staff   109K Apr  8 10:38 gadm41_NGA_1.json.zip
-rw-r--r--@ 1 Fangyuan  staff    34M Apr  8 16:10 coastlineschange_v0.4.0_gadm_level0.gpkg
-rw-r--r--  1 Fangyuan  staff    71K Apr  8 16:15 visual.qgz
-rw-r--r--@ 1 Fangyuan  staff    35M Apr  8 16:15 CoastalGeomorphology_africa.gpkg
-rw-r--r--@ 1 Fangyuan  staff   955M Apr  8 16:15 CoastalGeomorphology_buffered.gpkg
-rwx------@ 1 Fangyuan  staff   1.8G Apr  8 16:15 [31mdeafricacoastlines_v0.4.0.gpkg[m[m
-rw-r--r--@ 1 Fangyuan  staff    36M Apr  8 16:42 coastlineschange_v0.4.0_gadm_level1.gpkg
-rw-r--r--@ 1 Fangyuan  staff   807M Apr  9 09:16 deafricacoastlines_v0.4.0_geomorph_gadm.gpkg
-rw-r--r--@ 1 Fangyuan  staff   2.0G Apr  9 09:27 deafricacoastlines_v0.4.2.gpkg
-rw-r--r--@ 1

## Load Coastline

In [3]:
# coastlines version, v0.4.0(-2022) or v0.4.2 (-2023)
version = 'v0.4.2'

In [4]:
%%time

# load coastlines rate of change and filter to keep only good observations

coastlines = gpd.read_file(f"../data/deafricacoastlines_{version}.gpkg", layer="rates_of_change")
coastlines = coastlines[(coastlines.certainty=='good')]
coastlines.reset_index(inplace=True)

CPU times: user 21.1 s, sys: 953 ms, total: 22.1 s
Wall time: 22.8 s


In [6]:
sig_change = coastlines[coastlines.sig_time<0.01]
print("negative change (km)", len(sig_change[sig_change.rate_time<0])*30./1000)
print("positive change (km)", len(sig_change[sig_change.rate_time>0])*30./1000)

negative change (km) 10461.6
positive change (km) 13695.81


In [5]:
# Step 1: Get the bounding box (minx, miny, maxx, maxy)
bounding_box = coastlines.total_bounds  # returns [minx, miny, maxx, maxy]

# Create a box geometry from the bounding box
bbox_geom = box(*bounding_box)

# Buffer the bounding box by 1000 units (depending on CRS units)
buffered_bbox = bbox_geom.buffer(10000)

## Match to Geomorphology

In [6]:
geomorph_file = '../data/CoastalGeomorphology_africa.gpkg'
if os.path.exists(geomorph_file):
    geomorphology = gpd.read_file(geomorph_file)
else:
    geomorphology = gpd.read_file("../data/CoastalGeomorphology.zip")
    geomorphology = geomorphology.to_crs(coastlines.crs)

    # Filter another GeoDataFrame (let's say 'other_gdf') using the buffered bounding box
    geomorphology = geomorphology[geomorphology.geometry.intersects(buffered_bbox)]

    geomorphology.to_file(geomorph_file, driver="GPKG")

In [7]:
#geomorphology_buffered = geomorphology.buffer(100, cap_style=2)
#geomorphology_buffered.to_file('CoastalGeomorphology_buffered.gpkg', driver="GPKG")

In [8]:
coastlines_geomorph = coastlines.sjoin_nearest(geomorphology, how='left', max_distance=200, lsuffix='left', rsuffix='right', distance_col="geomorph_dist")
coastlines_geomorph = coastlines_geomorph.drop(columns=["index_right"])

In [9]:
(coastlines_geomorph['Preds'] == "Wetland").mean()

np.float64(0.18163991579261463)

In [10]:
(coastlines_geomorph['Preds'] == "Beach").mean()

np.float64(0.3655396962785733)

In [11]:
(coastlines_geomorph['Preds'] == "Bedrock").mean()

np.float64(0.09498617204152447)

In [12]:
(coastlines_geomorph['Preds'] == "Wetland").mean()+(coastlines_geomorph['Preds'] == "Beach").mean()+(coastlines_geomorph['Preds'] == "Bedrock").mean()

np.float64(0.6421657841127124)

## Match coastlines to admin boundaries

In [13]:
%%time

# load level 1 admin_boundaries

gadm_africa  = gpd.read_file("../data/gadm_africa.gpkg")
level = 1 
gadm_level_1 = gadm_africa.dissolve(by=f"GID_{level}")
gadm_level_1 = gadm_level_1.to_crs(coastlines.crs)
gadm_level_1.reset_index(inplace=True)

level = 0
gadm_level_0 = gadm_africa.dissolve(by=f"GID_{level}")
gadm_level_0 = gadm_level_0.to_crs(coastlines.crs)
gadm_level_0.reset_index(inplace=True)


CPU times: user 1min 6s, sys: 962 ms, total: 1min 7s
Wall time: 1min 8s


In [14]:
%%time
#coastlines_geomorph = coastlines_geomorph.sjoin_nearest(gadm_level_0, how='left', max_distance=1000, lsuffix='left', rsuffix='right', distance_col=None)
coastlines_geomorph = coastlines_geomorph.sjoin_nearest(gadm_level_1[["GID_0", "NAME_0", "GID_1", "NAME_1","geometry"]], how='left', max_distance=1000, lsuffix='left', rsuffix='right', distance_col="GID_1_dist")
coastlines_geomorph = coastlines_geomorph.drop(columns=["index_right"])

CPU times: user 31min 25s, sys: 6.3 s, total: 31min 32s
Wall time: 31min 43s


In [15]:
coastlines_geomorph.head()

Unnamed: 0,index,uid,rate_time,sig_time,se_time,outl_time,dist_2000,dist_2001,dist_2002,dist_2003,...,Source,Obs,Preds,Probs,geomorph_dist,GID_0,NAME_0,GID_1,NAME_1,GID_1_dist
0,0,sv0eefepeh,-0.03,0.611,0.05,,3.64,2.26,0.57,0.86,...,,,,,,EGY,Egypt,EGY.22_1,Janub Sina',0.0
1,1,sv0eefg1n9,0.0,0.964,0.05,,3.06,1.73,0.99,0.66,...,,,,,,EGY,Egypt,EGY.22_1,Janub Sina',0.0
2,2,sv0eefg66h,0.04,0.304,0.04,,2.16,1.12,0.01,-0.37,...,,,,,,EGY,Egypt,EGY.22_1,Janub Sina',0.0
3,3,sv0eefg7v8,-0.05,0.357,0.05,,5.65,2.87,2.28,1.4,...,,,,,,EGY,Egypt,EGY.22_1,Janub Sina',0.0
4,4,sv0eefgt19,-0.02,0.621,0.04,,3.5,1.55,1.85,0.99,...,,,,,,EGY,Egypt,EGY.22_1,Janub Sina',0.0


In [16]:
%%time
coastlines_geomorph.to_file(f"../data/deafricacoastlines_{version}_geomorph_gadm.gpkg")


CPU times: user 27.7 s, sys: 1.39 s, total: 29 s
Wall time: 29.9 s


## Per admin boundary stats

In [17]:
# match coastlines to admin region geometries
level = 0
gadm_level = gadm_level_0

ind_coast, ind_level =  gadm_level.sindex.nearest(coastlines.geometry, max_distance= 1000, return_all=False, return_distance=False)

gadm_level[['n_good', 'n_sig99', 'n_neg1', 'n_neg5', 'rate_time_10th', 'rate_time_50th', 'nsm_10th', 'sce_90th']] = [np.nan]*8

for idx in gadm_level.index:
    #print(gadm_level.loc[idx][f"NAME_{level}"])
    
    good_gdf = coastlines.iloc[ind_coast[ind_level == idx]]
    if len(good_gdf)==0:
        #print("no data over", gadm_level.loc[idx][f"NAME_{level}"])
        continue

    # get stats
    gadm_level.at[idx, 'n_good'] = len(good_gdf)
    sig_gdf = good_gdf[(good_gdf.sig_time<0.01)]
    gadm_level.at[idx, 'n_sig99'] = len(sig_gdf)
    gadm_level.at[idx, 'n_neg1'] = (sig_gdf.rate_time<-1).sum()
    gadm_level.at[idx, 'n_neg5'] = (sig_gdf.rate_time<-5).sum()
    gadm_level.at[idx, 'rate_time_10th'] = sig_gdf.rate_time.quantile(0.1)
    gadm_level.at[idx, 'rate_time_50th'] = sig_gdf.rate_time.quantile(0.5)
    gadm_level.at[idx, 'nsm_10th'] = sig_gdf.nsm.quantile(0.1)
    gadm_level.at[idx, 'sce_90th'] = sig_gdf.sce.quantile(0.9)
    
gadm_level['perc_neg1']=gadm_level['n_neg1']*100/gadm_level['n_good']
gadm_level['perc_neg5']=gadm_level['n_neg5']*100/gadm_level['n_good']
gadm_level['km_neg1']=gadm_level['n_neg1']*30/1000
gadm_level['km_neg5']=gadm_level['n_neg5']*30/1000

gadm_level = gadm_level[gadm_level.n_good>0]
gadm_level.to_file(f"../data/coastlineschange_{version}_gadm_level{level}.gpkg")

In [18]:
# match coastlines to admin region geometries
level = 1
gadm_level = gadm_level_1

ind_coast, ind_level =  gadm_level.sindex.nearest(coastlines.geometry, max_distance= 1000, return_all=False, return_distance=False)

gadm_level[['n_good', 'n_sig99', 'n_neg1', 'n_neg5', 'rate_time_10th', 'rate_time_50th', 'nsm_10th', 'sce_90th']] = [np.nan]*8

for idx in gadm_level.index:
    #print(gadm_level.loc[idx][f"NAME_{level}"])
    
    good_gdf = coastlines.iloc[ind_coast[ind_level == idx]]
    if len(good_gdf)==0:
        #print("no data over", gadm_level.loc[idx][f"NAME_{level}"])
        continue

    # get stats
    gadm_level.at[idx, 'n_good'] = len(good_gdf)
    sig_gdf = good_gdf[(good_gdf.sig_time<0.01)]
    gadm_level.at[idx, 'n_sig99'] = len(sig_gdf)
    gadm_level.at[idx, 'n_neg1'] = (sig_gdf.rate_time<-1).sum()
    gadm_level.at[idx, 'n_neg5'] = (sig_gdf.rate_time<-5).sum()
    gadm_level.at[idx, 'rate_time_10th'] = sig_gdf.rate_time.quantile(0.1)
    gadm_level.at[idx, 'rate_time_50th'] = sig_gdf.rate_time.quantile(0.5)
    gadm_level.at[idx, 'nsm_10th'] = sig_gdf.nsm.quantile(0.1)
    gadm_level.at[idx, 'sce_90th'] = sig_gdf.sce.quantile(0.9)
    
gadm_level['perc_neg1']=gadm_level['n_neg1']*100/gadm_level['n_good']
gadm_level['perc_neg5']=gadm_level['n_neg5']*100/gadm_level['n_good']
gadm_level['km_neg1']=gadm_level['n_neg1']*30/1000
gadm_level['km_neg5']=gadm_level['n_neg5']*30/1000

gadm_level = gadm_level[gadm_level.n_good>0]
gadm_level.to_file(f"../data/coastlineschange_{version}_gadm_level{level}.gpkg")