In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np

from shapely.geometry import MultiPolygon
from shapely.prepared import prep

In [2]:
wrecks_df = pd.read_excel("data/shipwrecks_with_bermuda.xlsx")
wrecks_df

Unnamed: 0,date,lat,lon,near,in_bermuda
0,1996-05-13 05:50:00,40.876665,-91.031665,United States of America,False
1,1996-05-13 05:50:01,40.876665,-91.031665,United States of America,False
2,1996-05-13 05:50:01,40.876665,-91.031665,United States of America,False
3,1996-05-13 05:50:02,40.876665,-91.031665,United States of America,False
4,1996-05-13 05:50:02,40.876665,-91.031665,United States of America,False
...,...,...,...,...,...
106261,2015-06-22 14:10:01,37.310490,-89.513615,United States of America,False
106262,2015-06-22 16:35:00,25.760800,-79.956670,United States of America,True
106263,2015-06-22 16:35:00,25.760800,-79.956670,United States of America,True
106264,2015-06-24 13:52:00,29.732314,-95.127879,United States of America,False


In [3]:
worldmap = gpd.read_file(
    "data/natural_earth/ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp"
).to_crs("EPSG:3857")[["NAME", "geometry"]].rename(columns={"NAME": "country"})
country_geometry = {n: g for n, g in zip(worldmap.country, worldmap.geometry)}
wrecks_gdf = gpd.GeoDataFrame(
    wrecks_df,
    geometry=gpd.points_from_xy(wrecks_df.lon, wrecks_df.lat),
    crs="EPSG:4326"
).to_crs("EPSG:3857")

def distance_to_coast(row):
    poly = country_geometry.get(row.near)
    if poly is None or row.geometry is None:
        return np.nan
    pt = row.geometry
    if isinstance(poly, MultiPolygon):
        parts = list(poly.geoms)
        poly = min(parts, key=lambda p: pt.distance(p))
    prepared = prep(poly)
    if prepared.contains(pt):
        return pt.distance(poly.boundary)
    return pt.distance(poly)

wrecks_gdf["m_to_coast"] = wrecks_gdf.apply(distance_to_coast, axis=1)
wrecks_gdf["km_to_coast"] = wrecks_gdf["m_to_coast"] / 1000
wrecks_df = wrecks_gdf[["date", "lat", "lon", "near", "in_bermuda", "km_to_coast"]].copy()
print(wrecks_df["km_to_coast"].describe())

count    106266.000000
mean        204.584209
std        1040.508657
min           0.000581
25%           1.680167
50%          13.765298
75%         305.725243
max      224099.958016
Name: km_to_coast, dtype: float64


In [4]:
wrecks_df.to_excel("data/shipwrecks_with_distance_to_coast.xlsx", index=False)