# Match coastlines to GADM boundaries and geomorphology

* **Products used:** [DE Africa Coastlines]()



### Load packages
Import Python packages that are used for the analysis.

In [13]:
import geopandas as gpd
import numpy as np
from shapely import box
import os


In [7]:
!ls -lrth ../data/

total 6944072
-rw-rw-rw-@ 1 Fangyuan  staff   171M May 13  2022 CoastalGeomorphology.zip
-rwx------@ 1 Fangyuan  staff   369M Mar 27  2023 [31mgadm_africa.gpkg[m[m
-rw-r--r--@ 1 Fangyuan  staff   109K Apr  8 10:38 gadm41_NGA_1.json.zip
-rw-r--r--@ 1 Fangyuan  staff   955M Apr  8 11:52 CoastalGeomorphology_buffered.gpkg
-rw-r--r--  1 Fangyuan  staff     0B Apr  8 11:52 CoastalGeomorphology_buffered.gpkg-wal
-rw-r--r--  1 Fangyuan  staff    32K Apr  8 11:52 CoastalGeomorphology_buffered.gpkg-shm
-rwx------@ 1 Fangyuan  staff   1.8G Apr  8 12:24 [31mdeafricacoastlines_v0.4.0.gpkg[m[m
-rwx------  1 Fangyuan  staff     0B Apr  8 12:24 [31mdeafricacoastlines_v0.4.0.gpkg-wal[m[m
-rwx------  1 Fangyuan  staff    32K Apr  8 12:24 [31mdeafricacoastlines_v0.4.0.gpkg-shm[m[m
-rw-r--r--@ 1 Fangyuan  staff    73M Apr  8 12:43 deafricacoastlines_v0.4.2.gpkg.E2846cF6


## Load Coastline

In [None]:
# coastlines version, v0.4.0(-2022) or v0.4.2 (-2023)
version = 'v0.4.0'

In [24]:
%%time

# load coastlines rate of change and filter to keep only good observations

coastlines = gpd.read_file(f"../data/deafricacoastlines_{version}.gpkg", layer="rates_of_change")
coastlines = coastlines[(coastlines.certainty=='good')]
coastlines.reset_index(inplace=True)

CPU times: user 20.1 s, sys: 1 s, total: 21.1 s
Wall time: 21.4 s


In [28]:
# Step 1: Get the bounding box (minx, miny, maxx, maxy)
bounding_box = coastlines.total_bounds  # returns [minx, miny, maxx, maxy]

# Create a box geometry from the bounding box
bbox_geom = box(*bounding_box)

# Buffer the bounding box by 1000 units (depending on CRS units)
buffered_bbox = bbox_geom.buffer(10000)

## Match to Geomorphology

In [30]:
geomorph_file = '../data/CoastalGeomorphology_africa.gpkg'
if os.path.exists(geomorph_file):
    geomorphology = gpd.read_file(geomorph_file)
else:
    geomorphology = gpd.read_file("../data/CoastalGeomorphology.zip")
    geomorphology = geomorphology.to_crs(coastlines.crs)

    # Filter another GeoDataFrame (let's say 'other_gdf') using the buffered bounding box
    geomorphology = geomorphology[geomorphology.geometry.intersects(buffered_bbox)]

    geomorphology.to_file(geomorph_file, driver="GPKG")

In [3]:
#geomorphology_buffered = geomorphology.buffer(100, cap_style=2)
#geomorphology_buffered.to_file('CoastalGeomorphology_buffered.gpkg', driver="GPKG")

0         POLYGON ((-191812.967 5669921.319, -191844.535...
1         POLYGON ((-106350.298 5680824.839, -106349.869...
2         POLYGON ((1222455.52 6049638.658, 1222454.362 ...
3         POLYGON ((1223063.793 6048738.774, 1223063.464...
4         POLYGON ((1220755.382 6048165.832, 1220755.278...
                                ...                        
294014    POLYGON ((-8455301.937 3686984.918, -8455311.1...
294015    POLYGON ((-8457140.351 3686517.331, -8457143.9...
294016    POLYGON ((9798145.837 266484.773, 9798133.668 ...
294017    POLYGON ((9797673.76 267094.984, 9797677.652 2...
294018    POLYGON ((12512321.931 -161238.076, 12512316.1...
Length: 294019, dtype: geometry

In [31]:
coastlines_geomorph = coastlines.sjoin_nearest(geomorphology, how='left', max_distance=100, lsuffix='left', rsuffix='right', distance_col=None)

In [32]:
(coastlines_geomorph['Preds'] == "Wetland").mean()

np.float64(0.15623081645181092)

In [33]:
(coastlines_geomorph['Preds'] == "Beach").mean()

np.float64(0.3557646493053478)

In [34]:
(coastlines_geomorph['Preds'] == "Bedrock").mean()

np.float64(0.09202174929581834)

In [35]:
(coastlines_geomorph['Preds'] == "Wetland").mean()+(coastlines_geomorph['Preds'] == "Beach").mean()+(coastlines_geomorph['Preds'] == "Bedrock").mean()

np.float64(0.604017215052977)

## Match coastlines to admin boundaries

In [None]:
%%time

# load level 1 admin_boundaries

gadm_africa  = gpd.read_file("data/gadm_africa.gpkg")
level = 1 
gadm_level_1 = gadm_africa.dissolve(by=f"GID_{level}")
gadm_level_1 = gadm_level_1.to_crs(coastlines.crs)
gadm_level_1.reset_index(inplace=True)

level = 0
gadm_level_0 = gadm_africa.dissolve(by=f"GID_{level}")
gadm_level_0 = gadm_level_0.to_crs(coastlines.crs)
gadm_level_0.reset_index(inplace=True)


CPU times: user 2min 51s, sys: 755 ms, total: 2min 52s
Wall time: 2min 54s


In [None]:
%%time
coastlines_geomorph = coastlines_geomorph.sjoin_nearest(gadm_level_0, how='left', max_distance=1000, lsuffix='left', rsuffix='right', distance_col=None)
coastlines_geomorph = coastlines_geomorph.sjoin_nearest(gadm_level_1, how='left', max_distance=1000, lsuffix='left', rsuffix='right', distance_col=None)

ValueError: 'index_right' cannot be a column name in the frames being joined

In [None]:
%%time
coastlines_geomorph.to_file(f"~/dev/data/deafricacoastlines_{version}_geomorph_gadm.gpkg")


DataSourceError: sqlite3_open(/home/jovyan/dev/data/deafricacoastlines_v0.4.2_geomorph_gadm.gpkg) failed: unable to open database file

## Per admin boundary stats

In [None]:
# match coastlines to admin region geometries
level = 0
gadm_level = gadm_level_0

ind_coast, ind_level =  gadm_level.sindex.nearest(coastlines.geometry, max_distance= 1000, return_all=False, return_distance=False)

gadm_level[['n_good', 'n_sig99', 'n_neg1', 'n_neg5', 'rate_time_10th', 'rate_time_50th', 'nsm_10th', 'sce_90th']] = [np.nan]*8

for idx in gadm_level.index:
    #print(gadm_level.loc[idx][f"NAME_{level}"])
    
    good_gdf = coastlines.iloc[ind_coast[ind_level == idx]]
    if len(good_gdf)==0:
        #print("no data over", gadm_level.loc[idx][f"NAME_{level}"])
        continue

    # get stats
    gadm_level.at[idx, 'n_good'] = len(good_gdf)
    sig_gdf = good_gdf[(good_gdf.sig_time<0.01)]
    gadm_level.at[idx, 'n_sig99'] = len(sig_gdf)
    gadm_level.at[idx, 'n_neg1'] = (sig_gdf.rate_time<-1).sum()
    gadm_level.at[idx, 'n_neg5'] = (sig_gdf.rate_time<-5).sum()
    gadm_level.at[idx, 'rate_time_10th'] = sig_gdf.rate_time.quantile(0.1)
    gadm_level.at[idx, 'rate_time_50th'] = sig_gdf.rate_time.quantile(0.5)
    gadm_level.at[idx, 'nsm_10th'] = sig_gdf.nsm.quantile(0.1)
    gadm_level.at[idx, 'sce_90th'] = sig_gdf.sce.quantile(0.9)
    
gadm_level['perc_neg1']=gadm_level['n_neg1']*100/gadm_level['n_good']
gadm_level['perc_neg5']=gadm_level['n_neg5']*100/gadm_level['n_good']
gadm_level['km_neg1']=gadm_level['n_neg1']*30/1000
gadm_level['km_neg5']=gadm_level['n_neg5']*30/1000

gadm_level = gadm_level[gadm_level.n_good>0]
gadm_level.to_file(f"../data/coastlineschange_{version}_gadm_level{level}.gpkg")

In [None]:
# match coastlines to admin region geometries
level = 1
gadm_level = gadm_level_1

ind_coast, ind_level =  gadm_level.sindex.nearest(coastlines.geometry, max_distance= 1000, return_all=False, return_distance=False)

gadm_level[['n_good', 'n_sig99', 'n_neg1', 'n_neg5', 'rate_time_10th', 'rate_time_50th', 'nsm_10th', 'sce_90th']] = [np.nan]*8

for idx in gadm_level.index:
    #print(gadm_level.loc[idx][f"NAME_{level}"])
    
    good_gdf = coastlines.iloc[ind_coast[ind_level == idx]]
    if len(good_gdf)==0:
        #print("no data over", gadm_level.loc[idx][f"NAME_{level}"])
        continue

    # get stats
    gadm_level.at[idx, 'n_good'] = len(good_gdf)
    sig_gdf = good_gdf[(good_gdf.sig_time<0.01)]
    gadm_level.at[idx, 'n_sig99'] = len(sig_gdf)
    gadm_level.at[idx, 'n_neg1'] = (sig_gdf.rate_time<-1).sum()
    gadm_level.at[idx, 'n_neg5'] = (sig_gdf.rate_time<-5).sum()
    gadm_level.at[idx, 'rate_time_10th'] = sig_gdf.rate_time.quantile(0.1)
    gadm_level.at[idx, 'rate_time_50th'] = sig_gdf.rate_time.quantile(0.5)
    gadm_level.at[idx, 'nsm_10th'] = sig_gdf.nsm.quantile(0.1)
    gadm_level.at[idx, 'sce_90th'] = sig_gdf.sce.quantile(0.9)
    
gadm_level['perc_neg1']=gadm_level['n_neg1']*100/gadm_level['n_good']
gadm_level['perc_neg5']=gadm_level['n_neg5']*100/gadm_level['n_good']
gadm_level['km_neg1']=gadm_level['n_neg1']*30/1000
gadm_level['km_neg5']=gadm_level['n_neg5']*30/1000

gadm_level = gadm_level[gadm_level.n_good>0]
gadm_level.to_file(f"../data/coastlineschange_{version}_gadm_level{level}.gpkg")