In [69]:
import geopandas as gpd # Geospatial data operations
import rasterio as rio # Geospatial imagery manipulation
import rasterio.plot
import pandas as pd # Tabular data
import os
import re
import rapidfuzz # Fuzzy string matching
from tqdm.auto import tqdm # Progress bars
from tqdm.contrib.concurrent import thread_map, process_map # Parallel operations
import matplotlib # Plots
import matplotlib.pyplot as plt
import shapely # Polygon operations
#import solaris.tile as tile # Tile splitting
#import solaris.data.coco as coco
import contextlib
import io
import rasterio # Raster imagery operations
from rasterio.vrt import WarpedVRT
from rasterio import transform
from rasterio.merge import merge # Merging tiles into mosaics
from glob import glob # Finding files
from shapely.geometry import box # Bounding box operations
matplotlib.rcParams['figure.figsize'] = (20, 10)
tqdm.pandas()
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 130)
import platform
if platform.system() == "Windows":
  prefix = "Z:/"
else:
  prefix = "ressci201900060-RNC2-Coastal/"

## Match shapefiles to images

In [70]:
filename = prefix + "Nick/filelist.txt"
if os.path.isfile(filename):
    filelist = pd.read_csv(filename, header=None).iloc[:,0]
else:
    def find_files(root):
        return pd.Series(glob(prefix + root + "**/**", recursive=True)).str.replace(prefix, "")
    filelist = pd.concat(thread_map(find_files, ["Gabrielle", "MaxarImagery", "Retrolens"]))
    if platform.system() == "Windows":
        filelist = filelist.str.replace("\\", "/", regex=False)
    filelist.to_csv(filename, index=False, header=False)
filelist

0                                                                                 Gabrielle/
1                                                             Gabrielle/GabrielleSummary.mxd
2                                                                          Gabrielle/Imagery
3                                                   Gabrielle/Imagery/600407 Uni of Auck.pdf
4                                                              Gabrielle/Imagery/Clipper.cpg
                                                 ...                                        
188703    Retrolens/WestCoast/WoodpeckerBay/Stack/WoodpeckerBay_18NOV1948_mosaic.jp2.aux.xml
188704        Retrolens/WestCoast/WoodpeckerBay/Stack/WoodpeckerBay_18NOV1948_mosaic.jp2.ovr
188705                    Retrolens/WestCoast/WoodpeckerBay/Stack/WoodpeckerBay_DigReady.mxd
188706                   Retrolens/WestCoast/WoodpeckerBay/Stack/WoodpeckerBay_DigReady1.mxd
188707                                   Retrolens/WestCoast/Woodpecke

In [71]:
def check_filename(filename):
    # This regex only matches shapefiles that contain something date-like in their names
    match = re.search(r'/Shorelines/.+\d{4}\w*.shp$', filename)
    return bool(match)

shapefiles = filelist[filelist.apply(check_filename)]
df = shapefiles.to_frame(name="filename")
df

Unnamed: 0,filename
29724,Gabrielle/Shorelines/Auckland/LongBay/LongBay_11JAN2023.shp
29731,Gabrielle/Shorelines/Auckland/LongBay/LongBay_25FEB2023.shp
29737,Gabrielle/Shorelines/Auckland/LongBay/LongBay_28DEC2022.shp
29745,Gabrielle/Shorelines/Auckland/Managawhai/MangawhaiNorth_28JUN2023.shp
29752,Gabrielle/Shorelines/Auckland/Managawhai/Mangawhai_11DEC2022.shp
...,...
188551,Retrolens/WestCoast/WhareateaRiver/Shorelines/WhareateaRiver_23FEB2023.shp
188559,Retrolens/WestCoast/WhareateaRiver/Shorelines/WhareateaRiver_25NOV1974.shp
188672,Retrolens/WestCoast/WoodpeckerBay/Shorelines/WoodpeckerBay_17APR1976_mosaic.shp
188678,Retrolens/WestCoast/WoodpeckerBay/Shorelines/WoodpeckerBay_18FEB1988_mosaic.shp


In [72]:
images = filelist[filelist.str.contains("/Stack/", case=False) & filelist.str.endswith((".jpg", ".jp2", ".tif", ".TIF"))]
images

36779       MaxarImagery/HighFreq/Auckland/KarekareBethells/Imagery/Stack/KarekareBethells_01MAR2015.tif
36785       MaxarImagery/HighFreq/Auckland/KarekareBethells/Imagery/Stack/KarekareBethells_04JAN2017.tif
36791     MaxarImagery/HighFreq/Auckland/KarekareBethells/Imagery/Stack/KarekareBethells_07APRIL2010.tif
36797       MaxarImagery/HighFreq/Auckland/KarekareBethells/Imagery/Stack/KarekareBethells_20SEP2008.tif
36803       MaxarImagery/HighFreq/Auckland/KarekareBethells/Imagery/Stack/KarekareBethells_21MAR2021.tif
                                                       ...                                              
188567                      Retrolens/WestCoast/WhareateaRiver/Stack/WhareateaRiver_15JAN1988_mosaic.jp2
188570                      Retrolens/WestCoast/WhareateaRiver/Stack/WhareateaRiver_25NOV1974_mosaic.jp2
188696                        Retrolens/WestCoast/WoodpeckerBay/Stack/WoodpeckerBay_17APR1976_mosaic.jp2
188699                        Retrolens/WestCoast/Woodp

In [73]:
# When fuzzy matching, ignore these strings
# _0 will ignore leading zeros in dates
strings_to_delete = ["_mosaic", "_mosiac", "_mosaid", ".mosaic", "_cliff", "_beach", "_beachcliffsegment", "_MF.shp", "_MT.shp", "_0", "_1.tif", "_2.tif", "_3.tif", "_LDS", "_"]

def fuzz_preprocess(filename):
    for s in strings_to_delete:
        filename = filename.replace(s, "")
    # Case-insensitive
    filename = filename.lower()
    # Ignore extension
    filename = os.path.splitext(filename)[0]
    # Basename only
    filename = os.path.basename(filename)
    return filename

def get_matching_image(filename):
    dirname = os.path.dirname(filename)
    RL_dirname = dirname.replace("Stack/", "").replace("Shorelines", "Stack").replace("MaxarImagery/HighFreq", "Retrolens")
    Maxar_dirname = dirname.replace("Imagery/Shorelines", "Imagery/Stack").replace("Shorelines", "Imagery/Stack").replace("Retrolens", "MaxarImagery/HighFreq")
    Maxar_dirname_uppercase = Maxar_dirname.replace("Stack", "STACK")
    Maxar_dirname_outside_Imagery = Maxar_dirname.replace("Imagery/Stack", "Stack")
    all_files_in_folder = images[images.str.startswith((RL_dirname, Maxar_dirname, Maxar_dirname_uppercase, Maxar_dirname_outside_Imagery))]
    if len(all_files_in_folder) == 0:
        return "", 0
    match, score, index = rapidfuzz.process.extractOne(query=filename, choices=all_files_in_folder, processor=fuzz_preprocess)
    return match, score

df["matched_image"], df["match_score"] = zip(*df.filename.apply(get_matching_image))
print("Perfect matches:", sum(df.match_score == 100))
print("Imperfect matches:", sum(df.match_score < 100))
df[["filename", "matched_image", "match_score"]].sort_values(by="match_score").to_csv("shoreline_image_matching.csv", index=False)

Perfect matches: 1504
Imperfect matches: 916


In [74]:
df[df.filename.str.contains("Ahipara")]

Unnamed: 0,filename,matched_image,match_score
49544,MaxarImagery/HighFreq/Northland/Ahipara/Shorelines/Ahipara_05NOV2016.shp,MaxarImagery/HighFreq/Northland/Ahipara/Imagery/Stack/Ahipara_05NOV2016.tif,100.0
49552,MaxarImagery/HighFreq/Northland/Ahipara/Shorelines/Ahipara_10JUN2012.shp,MaxarImagery/HighFreq/Northland/Ahipara/Imagery/Stack/Ahipara_10JUN2012.tif,100.0
49560,MaxarImagery/HighFreq/Northland/Ahipara/Shorelines/Ahipara_12SEP2018.shp,MaxarImagery/HighFreq/Northland/Ahipara/Imagery/Stack/Ahipara_12SEP2018.TIF,100.0
49568,MaxarImagery/HighFreq/Northland/Ahipara/Shorelines/Ahipara_15JUL2006.shp,MaxarImagery/HighFreq/Northland/Ahipara/Imagery/Stack/Ahipara_15JUL2006.tif,100.0
49576,MaxarImagery/HighFreq/Northland/Ahipara/Shorelines/Ahipara_16FEB2004.shp,MaxarImagery/HighFreq/Northland/Ahipara/Imagery/Stack/Ahipara_16FEB2004.TIF,100.0
49584,MaxarImagery/HighFreq/Northland/Ahipara/Shorelines/Ahipara_18APR2011.shp,MaxarImagery/HighFreq/Northland/Ahipara/Imagery/Stack/Ahipara_18APR2011.tif,100.0
49592,MaxarImagery/HighFreq/Northland/Ahipara/Shorelines/Ahipara_22JUN2019.shp,MaxarImagery/HighFreq/Northland/Ahipara/Imagery/Stack/Ahipara_22JUN2019.TIF,100.0
49600,MaxarImagery/HighFreq/Northland/Ahipara/Shorelines/Ahipara_28APR2014.shp,MaxarImagery/HighFreq/Northland/Ahipara/Imagery/Stack/Ahipara_28APR2014.tif,100.0
49608,MaxarImagery/HighFreq/Northland/Ahipara/Shorelines/Ahipara_28JUN2013.shp,MaxarImagery/HighFreq/Northland/Ahipara/Imagery/Stack/Ahipara_28JUN2013.tif,100.0
49616,MaxarImagery/HighFreq/Northland/Ahipara/Shorelines/Ahipara_31AUG2015.shp,MaxarImagery/HighFreq/Northland/Ahipara/Imagery/Stack/Ahipara_31AUG2015.tif,100.0


In [75]:
df[df.filename.str.startswith("Gabrielle")]

Unnamed: 0,filename,matched_image,match_score
29724,Gabrielle/Shorelines/Auckland/LongBay/LongBay_11JAN2023.shp,,0.0
29731,Gabrielle/Shorelines/Auckland/LongBay/LongBay_25FEB2023.shp,,0.0
29737,Gabrielle/Shorelines/Auckland/LongBay/LongBay_28DEC2022.shp,,0.0
29745,Gabrielle/Shorelines/Auckland/Managawhai/MangawhaiNorth_28JUN2023.shp,,0.0
29752,Gabrielle/Shorelines/Auckland/Managawhai/Mangawhai_11DEC2022.shp,,0.0
...,...,...,...
32075,Gabrielle/Shorelines/Waikato/Whitianga/Whitianga_03DEC2022.shp,,0.0
32081,Gabrielle/Shorelines/Waikato/Whitianga/Whitianga_15JAN2023.shp,,0.0
32091,Gabrielle/Shorelines/Waikato/Whitianga/Whitianga_18FEB2023.shp,,0.0
32099,Gabrielle/Shorelines/Waikato/Whitianga/Whitianga_21AUG2023.shp,,0.0


In [76]:
Gabrielle = df[df.filename.str.startswith("Gabrielle")].copy()
Gabrielle_images = filelist[filelist.str.startswith("Gabrielle") & filelist.str.endswith((".jpg", ".jp2", ".tif"))]
len(Gabrielle), len(Gabrielle_images)

(295, 5786)

In [77]:
def get_matching_image(filename):
    match, score, index = rapidfuzz.process.extractOne(query=filename, choices=Gabrielle_images, processor=fuzz_preprocess)
    return match, score

Gabrielle["matched_image"], Gabrielle["match_score"] = zip(*Gabrielle.filename.apply(get_matching_image))
print("Perfect matches:", sum(Gabrielle.match_score == 100))
print("Imperfect matches:", sum(Gabrielle.match_score < 100))

Perfect matches: 218
Imperfect matches: 77


In [78]:
#pd.set_option("display.max_rows",None)
Gabrielle.filename = Gabrielle.filename.str.replace(prefix,"")
Gabrielle.matched_image = Gabrielle.matched_image.str.replace(prefix,"") 
Gabrielle.sort_values(by="match_score")

Unnamed: 0,filename,matched_image,match_score
30084,Gabrielle/Shorelines/BayofPlenty/Opotiki/BOPLINZ_Opotiki_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/BayofPlenty/Opotiki/Opotiki_28FEB2023.tif,57.894737
30479,Gabrielle/Shorelines/Gisborne/TeAraroa/EastCape_18DEC2021.shp,Gabrielle/Imagery/pre-storm/Auckland/Waiheke/Onetangi_21DEC2022.tif,58.823529
29941,Gabrielle/Shorelines/BayofPlenty/EasternBoP/EasternBoP_20DEC2021.shp,Gabrielle/Imagery/pre-storm/Northland/HelenaBay/HelenaBay_27DEC2022.tif,59.459459
30360,Gabrielle/Shorelines/Delivery/PostGabrielle_shorelines_21022023.shp,Gabrielle/Imagery/pre-storm/Bay of Plenty/Tauranga/tauranga-winter-01m-urban-aerial-photos-2022/BD37_500_022023.jpg,60.000000
30171,Gabrielle/Shorelines/BayofPlenty/Waihi/BOPLINZ_Waihi_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Omaha/PNEO/OmahaPakiri_04APR2023.tif,61.538462
...,...,...,...
30863,Gabrielle/Shorelines/Merged/Auckland/PieMelonBay_27DEC2022.shp,Gabrielle/Imagery/pre-storm/Auckland/Waiheke/PieMelonBay_27DEC2022.tif,100.000000
30868,Gabrielle/Shorelines/Merged/Auckland/SnellsBeach_28FEB2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/SnellsBeach/SnellsBeach_28FEB2023.tif,100.000000
30873,Gabrielle/Shorelines/Merged/Auckland/Tawharanui_01MAR2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Tawharanui/Tawharanui_01MAR2023.tif,100.000000
30806,Gabrielle/Shorelines/Merged/Auckland/Omaha_21MAR2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Omaha/Omaha_21MAR2023.tif,100.000000


In [79]:
index_tiles = filelist[filelist.str.contains("Gabrielle/.+index-tiles.+.shp$")]
index_tiles

2390                              Gabrielle/Imagery/post_storm/LINZ/BayofPlenty/bay-of-plenty-01m-urban-aerial-photos-index-tiles-2023.shp
4273                           Gabrielle/Imagery/post_storm/LINZ/Gisborne/gisborne-02m-cyclone-gabrielle-aerial-photos-index-tiles-202.shp
5874                      Gabrielle/Imagery/post_storm/LINZ/HawkesBay/hawkes-bay-010m-cyclone-gabrielle-aerial-photos-index-tiles-Copy.shp
20155    Gabrielle/Imagery/pre-storm/Bay of Plenty/Tauranga/tauranga-winter-01m-urban-aerial-photos-index-tiles-2022/tauranga-winter-01...
24846    Gabrielle/Imagery/pre-storm/LINZ/BayofPlenty/LINZ20212022/bay-of-plenty-02m-rural-aerial-photos-index-tiles-2021-2022/bay-of-p...
24853    Gabrielle/Imagery/pre-storm/LINZ/BayofPlenty/LINZ20212022/bay-of-plenty-02m-rural-aerial-photos-index-tiles-2021-2022/bay-of-p...
24860            Gabrielle/Imagery/pre-storm/LINZ/BayofPlenty/LINZ20212022/bay-of-plenty-02m-rural-aerial-photos-index-tiles-2021-2022.shp
25768                      

In [80]:
def read_index_tile(f):
    gdf = gpd.read_file(f)
    gdf["filename"] = f
    return gdf
Gabrielle_tiles = pd.concat(read_index_tile(prefix+f) for f in index_tiles)
len(Gabrielle_tiles)

16012

In [81]:
maybe_LDS = Gabrielle[(Gabrielle.match_score < 100) & ~Gabrielle.filename.str.contains("Delivery")].sort_values("match_score")
maybe_LDS

Unnamed: 0,filename,matched_image,match_score
30084,Gabrielle/Shorelines/BayofPlenty/Opotiki/BOPLINZ_Opotiki_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/BayofPlenty/Opotiki/Opotiki_28FEB2023.tif,57.894737
30479,Gabrielle/Shorelines/Gisborne/TeAraroa/EastCape_18DEC2021.shp,Gabrielle/Imagery/pre-storm/Auckland/Waiheke/Onetangi_21DEC2022.tif,58.823529
29941,Gabrielle/Shorelines/BayofPlenty/EasternBoP/EasternBoP_20DEC2021.shp,Gabrielle/Imagery/pre-storm/Northland/HelenaBay/HelenaBay_27DEC2022.tif,59.459459
30171,Gabrielle/Shorelines/BayofPlenty/Waihi/BOPLINZ_Waihi_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Omaha/PNEO/OmahaPakiri_04APR2023.tif,61.538462
29929,Gabrielle/Shorelines/BayofPlenty/EasternBoP/EasternBoP_02JAN2022.shp,Gabrielle/Imagery/pre-storm/Waikato/TeKaroBay/TeKaroBay_03JAN2022.tif,62.857143
...,...,...,...
30891,Gabrielle/Shorelines/Merged/Auckland/Whangapoua_09FEB2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Whangapoua/Whangapoua_19FEB2023.tif,97.297297
30781,Gabrielle/Shorelines/Merged/Auckland/Mangawhai_29JUN2021.shp,Gabrielle/Imagery/pre-storm/Northland/Mangawhai/Mangawhai_29JUNE2021.tif,97.297297
29767,Gabrielle/Shorelines/Auckland/Managawhai/Mangawhai_29JUN2021.shp,Gabrielle/Imagery/pre-storm/Northland/Mangawhai/Mangawhai_29JUNE2021.tif,97.297297
31453,Gabrielle/Shorelines/Northland/LangsBeach/LangsBeach_29JUN2021.shp,Gabrielle/Imagery/pre-storm/Northland/LangsBeach/LangsBeach_29JUNE2021.tif,97.435897


In [88]:
def get_resolution(filename):
  gdf = gpd.read_file(prefix+filename)
  if "LDS" not in gdf.Source.unique():
    return
  bounds = gdf.total_bounds
  intersecting_tiles = Gabrielle_tiles[Gabrielle_tiles.intersects(box(*bounds))]
  if len(intersecting_tiles) == 0:
    print(f"{filename} doesn't intersect any index tiles")
    return
  date = gdf.Date.unique()[0]
  DSASdate = gdf.DSASDate.unique()[0]
  date_options = intersecting_tiles.FLOWN.dropna().unique().tolist()
  if not date_options and "hawkes-bay-010m-cyclone-gabrielle-aerial-photos-index" in intersecting_tiles.filename.unique()[0]:
    return .1
  else:
    if DSASdate in date_options:
      date = DSASdate
    for option in date_options:
      if DSASdate in option:
        date = option
    match, score, index = rapidfuzz.process.extractOne(query=date, choices=date_options, processor=lambda s: s.replace("-", ""))
    if score != 100:
      print(f"{date} in {filename} is likely a typo for {match} ({score}% match)")
    tiles_from_this_date = intersecting_tiles[intersecting_tiles.FLOWN == match]
    assert len(tiles_from_this_date) > 0
    resolutions = tiles_from_this_date.GSDM.astype(str).str.strip("m").astype(float).unique()
    return resolutions[0]

maybe_LDS["Pixel_ER"] = maybe_LDS.filename.apply(get_resolution)
maybe_LDS[["filename", "Pixel_ER"]].dropna().sort_values("filename").to_csv("Gabrielle_LDS.csv", index=False)

2023-04-25 in Gabrielle/Shorelines/BayofPlenty/Opotiki/BOPLINZ_Opotiki_05APR2023.shp is likely a typo for 2023-04-05 (87.5% match)


In [83]:
df = pd.concat((df, Gabrielle))
df = df[df.match_score >= 100].sort_values(by="match_score")
df

Unnamed: 0,filename,matched_image,match_score
36721,MaxarImagery/HighFreq/Auckland/KarekareBethells/Imagery/Shorelines/KarekareBethells_01MAR2015.shp,MaxarImagery/HighFreq/Auckland/KarekareBethells/Imagery/Stack/KarekareBethells_01MAR2015.tif,100.0
158794,Retrolens/Southland/TeWaewaeBay_East1/Shorelines/TeWaewaeBay_East1_05MAR1976.shp,Retrolens/Southland/TeWaewaeBay_East1/Stack/TeWaewaeBay_East1_05MAR1976_mosaic.jp2,100.0
158288,Retrolens/Southland/Riverton/Shorelines/Riverton_27Feb1976.shp,Retrolens/Southland/Riverton/Stack/Riverton_27FEB1976_mosaic.jp2,100.0
158282,Retrolens/Southland/Riverton/Shorelines/Riverton_24Feb1968.shp,Retrolens/Southland/Riverton/Stack/Riverton_24FEB1968_mosaic.jp2,100.0
158274,Retrolens/Southland/Riverton/Shorelines/Riverton_21Feb1967.shp,Retrolens/Southland/Riverton/Stack/Riverton_21FEB1967_mosaic.jp2,100.0
...,...,...,...
86166,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_29MAR1949.shp,Retrolens/Auckland/Whatipu/Stack/Whatipu_29MAR1949_mosaic.tif,100.0
86160,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_24OCT1980.shp,Retrolens/Auckland/Whatipu/Stack/Whatipu_24OCT1980_mosaic.tif,100.0
86154,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_19AUG1960.shp,Retrolens/Auckland/Whatipu/Stack/Whatipu_19AUG1960_mosaic.tif,100.0
86142,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_14MAY1954.shp,Retrolens/Auckland/Whatipu/Stack/Whatipu_14MAY1954_mosaic.tif,100.0


## Investigate metadata about the matched images

In [86]:
def get_meta(tup):
    i, row = tup
    image = rio.open(prefix + row.matched_image)
    try:
        gdf = gpd.read_file(prefix + row.filename)
        row = row.to_dict()
        row["n_lines"] = len(gdf.dropna(subset="geometry").explode(index_parts=False))
    except: 
        print(f"Can't read{row['filename']}")
    
    row.update(image.profile)
    row["GCPs"] = len(image.gcps[0])
    row["res"] = image.res
    row["CPS"] = "CPS" in gdf.columns
    return row

metafile = "meta.csv"
if os.path.isfile(metafile):
    meta = pd.read_csv(metafile)
else:
    meta = pd.DataFrame(thread_map(get_meta, df.iterrows(), total=len(df)))
    meta.to_csv(metafile, index=False)
meta

  0%|          | 0/1722 [00:00<?, ?it/s]

Unnamed: 0,filename,matched_image,match_score,n_lines,driver,dtype,nodata,width,height,count,crs,transform,blockxsize,blockysize,tiled,compress,interleave,GCPs,res,CPS,photometric
0,MaxarImagery/HighFreq/Auckland/KarekareBethells/Imagery/Shorelines/KarekareBethells_01MAR2015.shp,MaxarImagery/HighFreq/Auckland/KarekareBethells/Imagery/Stack/KarekareBethells_01MAR2015.tif,100.0,15,GTiff,uint8,,8354,23227,3,,"(0.5, 0.0, 1727262.7889462265, 0.0, -0.5, 5917711.902718019, 0.0, 0.0, 1.0)",128.0,128,True,lzw,pixel,28,"(0.5, 0.5)",True,
1,Retrolens/Southland/TeWaewaeBay_East1/Shorelines/TeWaewaeBay_East1_05MAR1976.shp,Retrolens/Southland/TeWaewaeBay_East1/Stack/TeWaewaeBay_East1_05MAR1976_mosaic.jp2,100.0,10,JP2OpenJPEG,uint16,256.0,4307,8657,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.7659482382436198, 0.0, 1192099.4904454553, 0.0, -0.7659482382436074, 4864509.403271275, 0.0, 0.0, 1.0)",1024.0,1024,True,,pixel,0,"(0.7659482382436198, 0.7659482382436074)",True,
2,Retrolens/Southland/Riverton/Shorelines/Riverton_27Feb1976.shp,Retrolens/Southland/Riverton/Stack/Riverton_27FEB1976_mosaic.jp2,100.0,5,JP2OpenJPEG,uint16,256.0,10714,9968,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.7000000000000044, 0.0, 1215744.331318585, 0.0, -0.6999999999999627, 4857766.89250715, 0.0, 0.0, 1.0)",1024.0,1024,True,,pixel,0,"(0.7000000000000044, 0.6999999999999627)",True,
3,Retrolens/Southland/Riverton/Shorelines/Riverton_24Feb1968.shp,Retrolens/Southland/Riverton/Stack/Riverton_24FEB1968_mosaic.jp2,100.0,0,JP2OpenJPEG,uint16,256.0,3954,3739,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(1.9000000000000234, 0.0, 1215341.0511565432, 0.0, -1.8999999999999004, 4858268.254304423, 0.0, 0.0, 1.0)",1024.0,1024,True,,pixel,0,"(1.9000000000000234, 1.8999999999999004)",False,
4,Retrolens/Southland/Riverton/Shorelines/Riverton_21Feb1967.shp,Retrolens/Southland/Riverton/Stack/Riverton_21FEB1967_mosaic.jp2,100.0,6,JP2OpenJPEG,uint16,256.0,3390,2625,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(1.8, 0.0, 1215560.3578031869, 0.0, -1.8, 4857025.942022507, 0.0, 0.0, 1.0)",1024.0,1024,True,,pixel,0,"(1.8, 1.8)",True,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1717,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_29MAR1949.shp,Retrolens/Auckland/Whatipu/Stack/Whatipu_29MAR1949_mosaic.tif,100.0,3,GTiff,uint16,256.0,4470,5092,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.5, 0.0, 1731590.5214523363, 0.0, -0.5, 5900619.493466633, 0.0, 0.0, 1.0)",128.0,128,True,,pixel,0,"(0.5, 0.5)",True,
1718,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_24OCT1980.shp,Retrolens/Auckland/Whatipu/Stack/Whatipu_24OCT1980_mosaic.tif,100.0,7,GTiff,uint16,256.0,6774,11727,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.75, 0.0, 1730510.386385566, 0.0, -0.75, 5906719.399330548, 0.0, 0.0, 1.0)",128.0,128,True,,pixel,0,"(0.75, 0.75)",True,
1719,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_19AUG1960.shp,Retrolens/Auckland/Whatipu/Stack/Whatipu_19AUG1960_mosaic.tif,100.0,9,GTiff,uint16,256.0,11933,21980,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.3999999999999961, 0.0, 1730294.5112870587, 0.0, -0.4, 5907001.244605399, 0.0, 0.0, 1.0)",128.0,128,True,,pixel,0,"(0.3999999999999961, 0.4)",True,
1720,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_14MAY1954.shp,Retrolens/Auckland/Whatipu/Stack/Whatipu_14MAY1954_mosaic.tif,100.0,5,GTiff,uint16,256.0,6615,5136,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.55, 0.0, 1731537.8621117289, 0.0, -0.5499999999999637, 5900213.777841544, 0.0, 0.0, 1.0)",128.0,128,True,,pixel,0,"(0.55, 0.5499999999999637)",True,


In [63]:
empty = meta[meta.n_lines == 0]
empty.shape

(72, 21)

In [64]:
def get_mtime(filename):
    return pd.to_datetime(os.path.getmtime(prefix+filename), unit="s", origin="unix", utc=True).tz_convert("Pacific/Auckland")
empty["mtime"] = empty.filename.apply(get_mtime)
empty["size_bytes"] = (prefix + empty.filename).apply(os.path.getsize)
#pd.set_option("display.max_rows",None)
empty[["filename", "n_lines", "mtime", "size_bytes"]].sort_values("mtime")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty["mtime"] = empty.filename.apply(get_mtime)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty["size_bytes"] = (prefix + empty.filename).apply(os.path.getsize)


Unnamed: 0,filename,n_lines,mtime,size_bytes
6,Retrolens/Southland/Riverton/Shorelines/Riverton_24Feb1968.shp,0,2021-06-16 12:17:29.273264896+12:00,100
9,Retrolens/Southland/Riverton/Shorelines/Riverton_11Feb1978.shp,0,2021-06-16 12:17:29.273264896+12:00,100
10,Retrolens/Southland/Riverton/Shorelines/Riverton_10Dec1958.shp,0,2021-06-16 12:17:29.273264896+12:00,100
1589,MaxarImagery/HighFreq/WestCoast/Hunt Beach/Shorelines/HuntBeach_11DEC2017.shp,0,2021-06-16 12:17:29.273264896+12:00,100
1583,MaxarImagery/HighFreq/WestCoast/Ohinemaka/Shorelines/Ohinemaka_12MAR2012.shp,0,2021-06-16 12:17:29.273264896+12:00,100
...,...,...,...,...
1029,MaxarImagery/HighFreq/Southland/ToetoesHarbour/Shorelines/ToetoesHarbour_28Jan2010.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1034,MaxarImagery/HighFreq/Southland/ToetoesHarbour/Shorelines/ToetoesHarbour_02Feb2016.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1629,Retrolens/Bay of Plenty/MountMaunganui_South/Shorelines/MountMaunganui_South_06FEB1961.shp,0,2022-01-16 15:09:57.307988992+13:00,100
201,Retrolens/Taranaki/New Plymouth District Council/Waitara/Shorelines/Waitara_16SEP1958.shp,0,2022-05-06 20:21:39.995812864+12:00,100


In [65]:
with pd.option_context("display.max_rows", 70):
  display(empty[["filename", "n_lines", "mtime", "size_bytes"]][empty.mtime > "2021-11-23"].sort_values("mtime"))

Unnamed: 0,filename,n_lines,mtime,size_bytes
36,Retrolens/Southland/Toetoes_Harbour/Shorelines/Toetoes_Harbour_26FEB1977.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1087,MaxarImagery/HighFreq/Canterbury/RakaiaSouth/Shorelines/RakaiaSouth_18JAN2020.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1090,MaxarImagery/HighFreq/Canterbury/PareoraRiver_Timaru/Shorelines/PareoraRiver_Timaru_30JAN2021.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1091,MaxarImagery/HighFreq/Canterbury/PareoraRiver_Timaru/Shorelines/PareoraRiver_Timaru_16MAR2012.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1092,MaxarImagery/HighFreq/Canterbury/PareoraRiver_Timaru/Shorelines/PareoraRiver_Timaru_12MAY2019.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1093,MaxarImagery/HighFreq/Canterbury/PareoraRiver_Timaru/Shorelines/PareoraRiver_Timaru_11FEB2015.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1095,MaxarImagery/HighFreq/Canterbury/Washdyke/Shorelines/Washdyke_19SEP2014.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1096,MaxarImagery/HighFreq/Canterbury/Washdyke/Shorelines/Washdyke_24FEB2019.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1097,MaxarImagery/HighFreq/Canterbury/Washdyke/Shorelines/Washdyke_24FEB2020.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1105,MaxarImagery/HighFreq/Gisborne/Tolaga_KaiauaBay/Shorelines/Tolaga_KaiauaBay_31MAY2007.shp,0,2022-01-16 15:09:57.307988992+13:00,100


In [20]:
meta[meta.filename.str.startswith("Gabrielle")].CPS.value_counts()

CPS
False    126
True      17
Name: count, dtype: int64

In [21]:
meta.crs.value_counts(dropna=False)

crs
PROJCS["NZGD2000 / New Zealand Transverse Mercator 2000",GEOGCS["NZGD2000",DATUM["New_Zealand_Geodetic_Datum_2000",SPHEROID["GRS 1980",6378137,298.257222101004,AUTHORITY["EPSG","7019"]],AUTHORITY["EPSG","6167"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4167"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",173],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",1600000],PARAMETER["false_northing",10000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","2193"]]                          937
NaN                                                                                                                                                                                                                                                                                                                                       

In [22]:
meta.GCPs.value_counts()

GCPs
0     1082
4       96
5       91
3       77
6       66
7       62
8       31
10      20
9       15
11      11
12      10
13       8
14       7
15       4
21       4
16       4
22       3
19       2
28       2
18       2
23       1
49       1
17       1
40       1
26       1
48       1
36       1
50       1
29       1
31       1
20       1
33       1
Name: count, dtype: int64

In [23]:
meta.columns

Index(['filename', 'matched_image', 'match_score', 'n_lines', 'driver',
       'dtype', 'nodata', 'width', 'height', 'count', 'crs', 'transform',
       'blockxsize', 'blockysize', 'tiled', 'compress', 'interleave', 'GCPs',
       'res', 'CPS', 'photometric'],
      dtype='object')

In [24]:
meta.driver.value_counts()

driver
JP2OpenJPEG    902
GTiff          703
JPEG             4
Name: count, dtype: int64

In [25]:
meta["count"].value_counts()

count
3    1479
4      72
1      58
Name: count, dtype: int64

In [26]:
meta.dtype.value_counts()

dtype
uint16     937
uint8      653
int32       15
uint32       3
float32      1
Name: count, dtype: int64

In [27]:
meta.nodata.value_counts()

nodata
2.560000e+02    912
2.147484e+09     15
0.000000e+00     13
6.553500e+04      6
2.550000e+02      4
6.553600e+04      3
Name: count, dtype: int64

## Make mosaics for LINZ images

In [28]:
maybe_LDS = df[(df.match_score < 100) & df.filename.str.startswith("Retrolens")].copy()
maybe_LDS

Unnamed: 0,filename,matched_image,match_score


In [29]:
if not os.path.isfile("maybe_LDS.csv"):
    maybe_LDS.filename.to_csv("maybe_LDS.csv", index=False)

## Match shapefiles with the corresponding index tiles shapefile
- First get the bounds of every tile
- Tiles that spatially match the bounds of a drawn EOV shapefile will be used to create the corresponding mosaic

In [30]:
if os.path.isfile("tilelist.parquet"):
    tilelist = gpd.read_parquet("tilelist.parquet")
else:
    tilelist = pd.DataFrame({"filename": glob("DigitalJPGs/**/*.jpg", recursive=True)})
    tilelist["region"] = tilelist.filename.str.split("/").str[1]
    tilelist["tilename"] = tilelist.filename.str.split("/").str[-1].str.replace(".jpg", "")
    def get_bounds(f):
        return rio.open(f).bounds
    tilelist["bounds"] = thread_map(get_bounds, tilelist.filename)
    tilelist.bounds = tilelist.bounds.progress_apply(lambda b: box(*b))
    tilelist = gpd.GeoDataFrame(tilelist, geometry="bounds")
    tilelist.to_parquet("tilelist.parquet")

In [31]:
# This cell might useful for finding matches, based on geospatial correlation
for filename in tqdm(maybe_LDS.filename):
    break
    df = gpd.read_file(filename)
    if len(df) == 0:
        continue
    bounds = df.total_bounds
    intersecting_tiles = tilelist[tilelist.intersects(box(*bounds))]
    print(f"{filename} matches:\n\t{len(intersecting_tiles)} tiles from:\n\t\t{intersecting_tiles.filename}")

0it [00:00, ?it/s]

In [48]:
LDS = pd.read_csv("maybe_LDS.csv").dropna()
LDS

Unnamed: 0,filename,matched_tile_root
0,Retrolens/Nelson/BoulderBank/Shorelines/BoulderBank_16JAN2019_NEL18R.shp,DigitalJPGs/Nelson/NEL18R
18,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_07APR2010.shp,DigitalJPGs/Auckland/Auckland 2010R
22,Retrolens/Auckland/Omaha/Shorelines/Omaha_04DEC2012.shp,DigitalJPGs/Auckland/RNC2 Auckland/2012
23,Retrolens/Auckland/PakiriBeach_North/Shorelines/PakiriBeach_North_06NOV2015.shp,DigitalJPGs/Northland/Northland 0.40m Rural Aerial Photos 2014-16
24,Retrolens/Auckland/TeArai/Shorelines/TeArai_06NOV2015.shp,DigitalJPGs/Northland/Northland 0.40m Rural Aerial Photos 2014-16
25,Retrolens/Auckland/Orewa/Shorelines/Orewa_08MAR2011.shp,DigitalJPGs/Auckland/RNC2 Auckland/2011
26,Retrolens/Auckland/PakiriBeach/Shorelines/PakiriBeach_04MAR2012.shp,DigitalJPGs/Auckland/RNC2 Auckland/2012
27,Retrolens/Auckland/KawakawaBay/Shorelines/KawakawaBay_03JAN2011.shp,DigitalJPGs/Auckland/RNC2 Auckland/2011
31,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Stack/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_29JAN2019.shp,DigitalJPGs/Otago/otago-03m-rural-aerial-photos-2017-2019
36,Retrolens/Otago/Tautuku_Beach/Shorelines/Tautuku_02May2013.shp,DigitalJPGs/Southland/STH13R


For each file, create a mosaic from the corresponding tiles

In [49]:
def get_match(filename):
    match, score, index = rapidfuzz.process.extractOne(query=filename, choices=shapefiles[~shapefiles.str.contains("Old shorelines")], processor=fuzz_preprocess)
    return match, score
LDS["matched_filename"], LDS["match_score"] = zip(*LDS.filename.apply(get_match))
LDS[LDS.filename != LDS.matched_filename][["filename", "matched_filename", "match_score"]]

Unnamed: 0,filename,matched_filename,match_score
18,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_07APR2010.shp,MaxarImagery/HighFreq/Auckland/Whatipu/Shorelines/Whatipu_07APR2010.shp,100.0
31,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Stack/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_29JAN2019.shp,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_29JAN2019.shp,100.0
36,Retrolens/Otago/Tautuku_Beach/Shorelines/Tautuku_02May2013.shp,MaxarImagery/HighFreq/Otago/Tautuku/Shorelines/Tautuku_02May2013.shp,100.0
39,Retrolens/Otago/Aramoana/Stack/Shorelines/Aramoana_08JULY2019.shp,MaxarImagery/HighFreq/Otago/Aramoana/Shorelines/Aramoana_08JULY2019.shp,100.0
48,Retrolens/Otago/StKilda_Tomahawk_SmaillsBeach/Stack/Shorelines/StKilda_Tomahawk_SmaillsBeach_31JAN2017.shp,MaxarImagery/HighFreq/Otago/StKilda_Tomahawk_SmaillsBeach/Shorelines/StKilda_Tomahawk_SmaillsBeach_31JAN2017.shp,100.0
50,Retrolens/Otago/BoulderBeach_SandflyBay/Stack/Shorelines/BoulderBeach_SandflyBay_29JAN2019.shp,Retrolens/Otago/BoulderBeach_SandflyBay/Shorelines/BoulderBeach_SandflyBay_29JAN2019.shp,100.0
53,Retrolens/Otago/PapanuiBeach_WickliffeBay/Stack/Shorelines/PapanuiBeach_WickliffeBay_29JAN2019.shp,Retrolens/Otago/PapanuiBeach_WickliffeBay/Shorelines/PapanuiBeach_WickliffeBay_29JAN2019.shp,100.0
100,Retrolens/Bay of Plenty/OhopeBeach/Shorelines/OhopeBeach_3DEC2014.shp,Retrolens/Bay of Plenty/PortOhope/Shorelines/OhopeBeach_3DEC2014.shp,100.0
104,Retrolens/Wellington/KapitiNorth/Shorelines/NorthKapiti_15MAR2017.shp,MaxarImagery/HighFreq/Wellington/KapitiNorth/Shorelines/NorthKapiti_15MAR2017.shp,100.0


In [54]:
LDS.filename = LDS.matched_filename

In [51]:
LDS["done"] = LDS.matched_filename.apply(lambda f: os.path.isfile(prefix + f.replace(".shp", ".tif")))
LDS["done"].value_counts(dropna=False)
LDS

Unnamed: 0,filename,matched_tile_root,matched_filename,match_score,done
0,Retrolens/Nelson/BoulderBank/Shorelines/BoulderBank_16JAN2019_NEL18R.shp,DigitalJPGs/Nelson/NEL18R,Retrolens/Nelson/BoulderBank/Shorelines/BoulderBank_16JAN2019_NEL18R.shp,100.0,True
18,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_07APR2010.shp,DigitalJPGs/Auckland/Auckland 2010R,MaxarImagery/HighFreq/Auckland/Whatipu/Shorelines/Whatipu_07APR2010.shp,100.0,True
22,Retrolens/Auckland/Omaha/Shorelines/Omaha_04DEC2012.shp,DigitalJPGs/Auckland/RNC2 Auckland/2012,Retrolens/Auckland/Omaha/Shorelines/Omaha_04DEC2012.shp,100.0,True
23,Retrolens/Auckland/PakiriBeach_North/Shorelines/PakiriBeach_North_06NOV2015.shp,DigitalJPGs/Northland/Northland 0.40m Rural Aerial Photos 2014-16,Retrolens/Auckland/PakiriBeach_North/Shorelines/PakiriBeach_North_06NOV2015.shp,100.0,True
24,Retrolens/Auckland/TeArai/Shorelines/TeArai_06NOV2015.shp,DigitalJPGs/Northland/Northland 0.40m Rural Aerial Photos 2014-16,Retrolens/Auckland/TeArai/Shorelines/TeArai_06NOV2015.shp,100.0,True
25,Retrolens/Auckland/Orewa/Shorelines/Orewa_08MAR2011.shp,DigitalJPGs/Auckland/RNC2 Auckland/2011,Retrolens/Auckland/Orewa/Shorelines/Orewa_08MAR2011.shp,100.0,True
26,Retrolens/Auckland/PakiriBeach/Shorelines/PakiriBeach_04MAR2012.shp,DigitalJPGs/Auckland/RNC2 Auckland/2012,Retrolens/Auckland/PakiriBeach/Shorelines/PakiriBeach_04MAR2012.shp,100.0,True
27,Retrolens/Auckland/KawakawaBay/Shorelines/KawakawaBay_03JAN2011.shp,DigitalJPGs/Auckland/RNC2 Auckland/2011,Retrolens/Auckland/KawakawaBay/Shorelines/KawakawaBay_03JAN2011.shp,100.0,True
31,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Stack/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_29JAN2019.shp,DigitalJPGs/Otago/otago-03m-rural-aerial-photos-2017-2019,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_29JAN2019.shp,100.0,True
36,Retrolens/Otago/Tautuku_Beach/Shorelines/Tautuku_02May2013.shp,DigitalJPGs/Southland/STH13R,MaxarImagery/HighFreq/Otago/Tautuku/Shorelines/Tautuku_02May2013.shp,100.0,True


In [53]:
display(LDS.done.value_counts(dropna=False))

for i, row in tqdm(LDS[~LDS.done].iterrows(), total=len(LDS[~LDS.done])):
    filename = row.matched_filename
    mosaic_filename = filename.replace(".shp", ".tif")
    shapefile = gpd.read_file(filename)
    bounds = shapefile.total_bounds
    intersecting_tiles = tilelist[tilelist.intersects(box(*bounds)) & tilelist.filename.str.startswith(row.matched_tile_root)]
    tiles = list(intersecting_tiles.filename)
    print(len(tiles))
    Z, transform = merge(tiles)
    with rasterio.open(
        mosaic_filename,
        'w',
        driver='GTiff',
        height=Z.shape[1],
        width=Z.shape[2],
        count=Z.shape[0],
        dtype=Z.dtype,
        crs=shapefile.crs,
        transform=transform,
        compress='lzw',
        BIGTIFF = "IF_SAFER"
    ) as dst:
        dst.write(Z)

done
True    56
Name: count, dtype: int64

0it [00:00, ?it/s]

In [None]:
LDS["matched_image"] = LDS.filename.str.replace(".shp", ".tif")
LDS.to_csv("LDS_matches.csv", index=False)

In [None]:
metafile = "LDS_meta.csv"
if os.path.isfile(metafile):
    meta = pd.read_csv(metafile)
else:
    meta = pd.DataFrame(process_map(get_meta, LDS.iterrows(), total=len(LDS)))
    meta.to_csv(metafile, index=False)
meta

### Algorithm for converting polyline shapefile to polygon annotations, labelled as sea or land

In [None]:
coastline = gpd.read_file("lds-nz-coastlines-and-islands-polygons-topo-150k-FGDB.zip!nz-coastlines-and-islands-polygons-topo-150k.gdb")

In [None]:
# Get a random (known-good) annotation
sample = LDS.sample(1)
display(sample)
image_filename = sample.matched_image.iloc[0]
image = rio.open(image_filename)
sample_gdf = gpd.read_file(sample.filename.iloc[0])
sample_gdf

In [None]:
def line_to_split_bbox(geo):
    bounding_box = geo.envelope
    split_bbox = shapely.ops.split(bounding_box, geo)
    return split_bbox

split_bboxes = sample_gdf.geometry.apply(line_to_split_bbox).explode(index_parts=True).reset_index()
#split_bboxes.geometry = split_bboxes.geometry.buffer(0)
split_bboxes["area"] = split_bboxes.area
split_bboxes = split_bboxes[split_bboxes.area > 1e5]
split_bboxes

In [None]:
relevant_coastline = coastline.clip(split_bboxes.total_bounds)
split_bboxes["area_inland"] = split_bboxes.clip(relevant_coastline).area
split_bboxes["fraction_inland"] = split_bboxes.area_inland / split_bboxes.area
split_bboxes["class"] = split_bboxes.fraction_inland.apply(lambda f: "land" if f > .5 else "sea")
split_bboxes

In [None]:
# Plot the results, and check it all looks ok
fig, ax = plt.subplots()
ax = rasterio.plot.show(image, ax=ax)

cmap = matplotlib.colors.ListedColormap(['green', 'blue'])
split_bboxes.plot(ax=ax, alpha=.5, column='class', cmap=cmap, categorical=True, legend=True, edgecolor='black')
split_bboxes.apply(lambda x: ax.annotate(text=round(x.fraction_inland, 2), xy=x.geometry.centroid.coords[0], ha='center'), axis=1)

#relevant_coastline.plot(ax=ax, alpha=.5, edgecolor="cyan")

b = split_bboxes.total_bounds
xlim = ([b[0], b[2]])
ylim = ([b[1], b[3]])
ax.set_xlim(xlim)
ax.set_ylim(ylim)