In [27]:
import geopandas as gpd # Geospatial data operations
import rasterio as rio # Geospatial imagery manipulation
import rasterio.plot
import pandas as pd # Tabular data
import os
import re
import rapidfuzz # Fuzzy string matching
from tqdm.auto import tqdm # Progress bars
from tqdm.contrib.concurrent import thread_map, process_map # Parallel operations
import matplotlib # Plots
import matplotlib.pyplot as plt
import shapely # Polygon operations
#import solaris.tile as tile # Tile splitting
#import solaris.data.coco as coco
import contextlib
import io
import rasterio # Raster imagery operations
from rasterio.vrt import WarpedVRT
from rasterio import transform
from rasterio.merge import merge # Merging tiles into mosaics
from glob import glob # Finding files
from shapely.geometry import box # Bounding box operations
matplotlib.rcParams['figure.figsize'] = (20, 10)
tqdm.pandas()
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 200)
import platform
if platform.system() == "Windows":
  prefix = "Z:/"
else:
  prefix = "ressci201900060-RNC2-Coastal/"

## Match shapefiles to images

In [2]:
filename = prefix + "Nick/filelist.txt"
if os.path.isfile(filename):
    filelist = pd.read_csv(filename, header=None).iloc[:,0]
else:
    def find_files(root):
        return pd.Series(glob(prefix + root + "**/**", recursive=True)).str.replace(prefix, "")
    filelist = pd.concat(thread_map(find_files, ["Gabrielle", "MaxarImagery", "Retrolens", "SpatialData/Mosaics"]))
    if platform.system() == "Windows":
        filelist = filelist.str.replace("\\", "/", regex=False)
    filelist.to_csv(filename, index=False, header=False)
filelist

0                                                                                       Gabrielle/
1                                                                                 Gabrielle/Orders
2                                                                            Gabrielle/Orders/AOIs
3                                          Gabrielle/Orders/AOIs/Pauanui_Tairua_07JAN2023WGS84.sbn
4                                               Gabrielle/Orders/AOIs/Pauanui_Tairua_07JAN2023.sbx
                                                    ...                                           
249721    SpatialData/Mosaics/Nelson.Overviews/NEL18R.Overviews/Ov_i03_L02_R00000062_C00000065.tfw
249722    SpatialData/Mosaics/Nelson.Overviews/NEL18R.Overviews/Ov_i03_L03_R00000020_C00000022.tfw
249723    SpatialData/Mosaics/Nelson.Overviews/NEL18R.Overviews/Ov_i03_L01_R00000125_C00000131.tif
249724    SpatialData/Mosaics/Nelson.Overviews/NEL18R.Overviews/Ov_i03_L01_R00000127_C00000133.tif
249725    

In [3]:
def check_filename(filename):
    # This regex only matches shapefiles that contain something date-like in their names
    match = re.search(r'/Shorelines/.+\d{4}\w*.shp$', filename)
    return bool(match)

shapefiles = filelist[filelist.apply(check_filename)]
df = shapefiles.to_frame(name="filename")
df

Unnamed: 0,filename
29735,Gabrielle/Shorelines/Merged/Auckland/Whangapoua_19FEB2023.shp
29762,Gabrielle/Shorelines/Merged/Auckland/Oneroa_27DEC2022.shp
29765,Gabrielle/Shorelines/Merged/Auckland/PakiriNorth_14OCT2021.shp
29769,Gabrielle/Shorelines/Merged/Auckland/Tawharanui_23DEC2021.shp
29771,Gabrielle/Shorelines/Merged/Auckland/Tawharanui_01MAR2023.shp
...,...
193514,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_02JAN1988.shp
193520,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_06OCT1980.shp
194627,Retrolens/Wellington/PukeruaBay/Shorelines/PukeruaBay_22AUG1961.shp
194638,Retrolens/Wellington/PukeruaBay/Shorelines/PukeruaBay_Wellington_13FEB2021.shp


In [4]:
images = filelist[filelist.str.contains("Stack", case=False) & filelist.str.endswith((".jpg", ".jp2", ".tif", ".TIF"))]
images

33158     MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_08NOV2019_2.tif
33159     MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_08NOV2019_1.tif
33161       MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_12MAR2018.tif
33164       MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_31AUG2005.tif
33178       MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_25DEC2015.tif
                                              ...                                      
194864            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_19NOV1972_mosaic.jp2
194870            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_04APR1986_mosaic.jp2
194873            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_10NOV1977_mosaic.tif
194874            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_22AUG1961_mosaic.tif
194877            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_01AUG1942_mosaic.jp2
Name: 0, Length: 2702, dtype: ob

In [5]:
Gabrielle_images = filelist[filelist.str.startswith("Gabrielle") & filelist.str.endswith((".jpg", ".jp2", ".tif"))]

In [6]:
# When fuzzy matching, ignore these strings
# _0 will ignore leading zeros in dates
strings_to_delete = ["_mosaic", "_mosiac", "_mosaid", ".mosaic", "_cliff", "_beach", "_beachcliffsegment", "_MF.shp", "_MT.shp", "_0", "_1.tif", "_2.tif", "_3.tif", "_LDS", "_", " "]

def fuzz_preprocess(filename):
    for s in strings_to_delete:
        filename = filename.replace(s, "")
    # Case-insensitive
    filename = filename.lower()
    # Ignore extension
    filename = os.path.splitext(filename)[0]
    # Basename only
    filename = os.path.basename(filename)
    return filename

def get_matching_image(filename):
    if filename.startswith("Gabrielle"):
        match, score, index = rapidfuzz.process.extractOne(query=filename, choices=Gabrielle_images, processor=fuzz_preprocess)
        return match, score
    dirname = os.path.dirname(filename)
    dirname = os.path.dirname(dirname)
    RL_dirname = dirname.replace("MaxarImagery/HighFreq", "Retrolens")
    Maxar_dirname = dirname.replace("Retrolens", "MaxarImagery/HighFreq")
    all_files_in_folder = images[images.str.startswith((RL_dirname, Maxar_dirname))]
    if len(all_files_in_folder) == 0:
        return "", 0
    match, score, index = rapidfuzz.process.extractOne(query=filename, choices=all_files_in_folder, processor=fuzz_preprocess)
    return match, score

df["matched_image"], df["match_score"] = zip(*df.filename.apply(get_matching_image))
print("Perfect matches:", sum(df.match_score == 100))
print("Imperfect matches:", sum(df.match_score < 100))
df[["filename", "matched_image", "match_score"]].sort_values(by="match_score").to_csv("shoreline_image_matching.csv", index=False)

Perfect matches: 2072
Imperfect matches: 698


In [7]:
def get_source(filename):
    try:
        shapefile = gpd.read_file(prefix+filename)
    except:
        print(f"Can't read {filename}")
        return "Unknown"
    if "Source" not in shapefile.columns:
        if filename.startswith("Retrolens"):
            return "RL"
        elif filename.startswith("MaxarImagery/HighFreq"):
            return "MAX"
        else:
            return "Unknown"
    else:
        sources = shapefile.Source.unique()
        if len(sources) == 0 or not sources[0]:
            #print(f"{filename} has no sources")
            if filename.startswith("MaxarImagery/HighFreq"):
                return "MAX"
            else:
                return "Unknown"
        if len(sources) > 1:
            print(f"{filename} has ambiguous sources: {sources}")
        return sources[0]

failures = df[df.match_score < 100]
failures["Source"] = process_map(get_source, failures.filename)
failures.Source.value_counts(dropna=False)

  0%|          | 0/698 [00:00<?, ?it/s]

Gabrielle/Shorelines/BayofPlenty/Papamoa/BOPLINZ_Papamoa_04APR2023.shp has ambiguous sources: ['LDS' None]
Retrolens/HawkesBay/Awatoto/Shorelines/Awatoto_06MAR2019.shp has ambiguous sources: ['LDS' None]
Retrolens/Taranaki/OpunakeBeach/Shorelines/OpunakeBeach_11FEB2017.shp has ambiguous sources: ['LDS' None]
Retrolens/Taranaki/South Taranaki/OpunakeBeach/Shorelines/OpunakeBeach_11FEB2017.shp has ambiguous sources: ['LDS' None]
Retrolens/Canterbury/Motunau/Shorelines/Motunau_09JAN2015.shp has ambiguous sources: ['LDS' None]
Retrolens/Otago/Moeraki_HampdenBeach/Stack/Shorelines/Moeraki_HampdenBeah_2006.shp has ambiguous sources: ['LDS' None]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  failures["Source"] = process_map(get_source, failures.filename)


Source
LDS        398
RL         118
MAX         70
Unknown     52
CRI         24
PLE         20
RLN          8
NEO          3
Max          2
JIL          1
SAT          1
RLS          1
Name: count, dtype: int64

In [8]:
failures[failures.filename.str.startswith("Gabrielle")].sort_values(by=["match_score", "filename"])

Unnamed: 0,filename,matched_image,match_score,Source
30570,Gabrielle/Shorelines/BayofPlenty/Opotiki/BOPLINZ_Opotiki_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/BayofPlenty/Opotiki/Opotiki_28FEB2023.tif,57.894737,LDS
31691,Gabrielle/Shorelines/Gisborne/TeAraroa/EastCape_18DEC2021.shp,Gabrielle/Imagery/pre-storm/Auckland/Waiheke/Onetangi_21DEC2022.tif,58.823529,LDS
30739,Gabrielle/Shorelines/BayofPlenty/EasternBoP/EasternBoP_20DEC2021.shp,Gabrielle/Imagery/pre-storm/Waikato/Matarangi/Matarangi_24DEC2022.tif,59.459459,LDS
30931,Gabrielle/Shorelines/Delivery/PostGabrielle_shorelines_21022023.shp,Gabrielle/Imagery/pre-storm/Bay of Plenty/Tauranga/tauranga-winter-01m-urban-aerial-photos-2022/BD37_500_022023.jpg,60.000000,Unknown
30509,Gabrielle/Shorelines/BayofPlenty/Waihi/BOPLINZ_Waihi_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Omaha/PNEO/OmahaPakiri_04APR2023.tif,61.538462,LDS
...,...,...,...,...
31209,Gabrielle/Shorelines/Auckland/Whangapoua/Whangapoua_09FEB2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Whangapoua/Whangapoua_19FEB2023.tif,97.297297,MAX
29780,Gabrielle/Shorelines/Merged/Auckland/Mangawhai_29JUN2021.shp,Gabrielle/Imagery/pre-storm/Northland/Mangawhai/Mangawhai_29JUNE2021.tif,97.297297,MAX
29873,Gabrielle/Shorelines/Merged/Auckland/Whangapoua_09FEB2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Whangapoua/Whangapoua_19FEB2023.tif,97.297297,MAX
30159,Gabrielle/Shorelines/Merged/Northland/LangsBeach_29JUN2021.shp,Gabrielle/Imagery/pre-storm/Northland/LangsBeach/LangsBeach_29JUNE2021.tif,97.435897,MAX


In [9]:
failures[failures.Source != "LDS"].sort_values(by=["match_score", "filename"]).to_csv("failures.csv", index=False)

In [10]:
index_tiles = filelist[filelist.str.contains(".+index-tiles.+.shp$")]
index_tiles

4480      Gabrielle/Imagery/post_storm/LINZ/HawkesBay/hawkes-bay-010m-cyclone-gabrielle-aerial-photos-index-tiles-Copy.shp
10041             Gabrielle/Imagery/post_storm/LINZ/BayofPlenty/bay-of-plenty-01m-urban-aerial-photos-index-tiles-2023.shp
11918          Gabrielle/Imagery/post_storm/LINZ/Gisborne/gisborne-02m-cyclone-gabrielle-aerial-photos-index-tiles-202.shp
13888          Gabrielle/Imagery/pre-storm/Waikato/TairuaPauanui/waikato-03m-rural-aerial-photos-index-tiles-2021-2023.shp
14007               Gabrielle/Imagery/pre-storm/Waikato/LINZtemp/waikato-03m-rural-aerial-photos-index-tiles-2021-2023.shp
                                                                ...                                                       
230613             SpatialData/Mosaics/Bay of Plenty/Footprints/bay-of-plenty-03m-rural-aerial-photos-index-tiles-2019.shp
230618        SpatialData/Mosaics/Bay of Plenty/Footprints/bay-of-plenty-01m-urban-aerial-photos-index-tiles-2018-2019.shp
238223          

In [11]:
def read_index_tile(f):
    gdf = gpd.read_file(prefix+f).to_crs(2193)
    gdf["filename"] = f
    return gdf

index_tiles = pd.concat(process_map(read_index_tile, index_tiles))
len(index_tiles)

  0%|          | 0/79 [00:00<?, ?it/s]

147407

## LDS index tile matching

In [18]:
LDS = failures[failures.Source == "LDS"].sort_values("match_score")
LDS

Unnamed: 0,filename,matched_image,match_score,Source
112791,Retrolens/Waikato/Awakino/Shorelines/Awakino_15FEB2022.shp,,0.000000,LDS
110781,Retrolens/Waikato/WaikawauRiver/Shorelines/WaikawauRiver_10MAR2018.shp,,0.000000,LDS
113453,Retrolens/Waikato/UrawhitikiPoint/Shorelines/UrawhitikiPoint_10MAR2018.shp,,0.000000,LDS
113448,Retrolens/Waikato/UrawhitikiPoint/Shorelines/UrawhitikiPoint_15FEB2022.shp,,0.000000,LDS
113345,Retrolens/Waikato/KennedyBay/Shorelines/KennedyBay_16FEB2018.shp,,0.000000,LDS
...,...,...,...,...
161370,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_01APR2006.shp,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Stack/Ryans_Pipikaretu_Penguin_TeRauoneBeach_17MAR2000_mosaic.jp2,94.252874,LDS
128917,Retrolens/Taranaki/Oakura/Shorelines/Oakura_31OCT2016_cliff.shp,MaxarImagery/HighFreq/Taranaki/Oakura/Imagery/Stack/Oakura_03OCT2016.tif,96.551724,LDS
128869,Retrolens/Taranaki/Oakura/Shorelines/Oakura_31OCT2016_beach.shp,MaxarImagery/HighFreq/Taranaki/Oakura/Imagery/Stack/Oakura_03OCT2016.tif,96.551724,LDS
151566,Retrolens/Canterbury/GoreBay/Shorelines/GoreBay_8FEB2022.shp,Retrolens/Canterbury/GoreBay/Stack/GoreBay_LDS_18FEB2022.TIF,96.774194,LDS


In [39]:
def get_resolution(filename):
  gdf = gpd.read_file(prefix+filename)
  if "LDS" not in gdf.Source.unique():
    return {"filename": filename}
  bounds = gdf.total_bounds
  intersecting_tiles = index_tiles[index_tiles.intersects(box(*bounds))]
  if filename.startswith("Gabrielle"):
    intersecting_tiles = intersecting_tiles[intersecting_tiles.filename.str.startswith("Gabrielle")]
  if len(intersecting_tiles) == 0:
    print(f"{filename} doesn't intersect any index tiles")
    return {"filename": filename}
  if "Date" not in gdf.columns:
    #print(f"{filename} missing Date column")
    date = gdf.DSASdate.unique()[0]
    DSASdate = gdf.DSASdate.unique()[0]
  elif "DSASDate" not in gdf.columns:
    #print(f"{filename} missing DSASDate column")
    date = gdf.Date.unique()[0]
    DSASdate = gdf.DSASdate.unique()[0]
  else:
    date = gdf.Date.unique()[0]
    DSASdate = gdf.DSASDate.unique()[0]
  if not DSASdate:
    DSASdate = date

  date_options = []
  date_to_col = {}
  for col in ['Date_Flown', 'Date_Suppl', 'DATE', 'DATE_DMY', 'FLOWN_DATE', 'FLY_DATE', 'ACQ_DATE', "FLYING_DAT", "FLOWN"]:
    options_for_col = intersecting_tiles[col].dropna().unique().tolist()
    date_options.extend(options_for_col)
    for date_option in options_for_col:
      date_to_col[date_option] = col
  if not date_options:
    if "hawkes-bay-010m-cyclone-gabrielle-aerial-photos-index" in intersecting_tiles.filename.unique()[0]:
      return {
        "filename": filename,
        "matched_index_tiles": intersecting_tiles.filename.unique(),
        "date": date,
        "matched_date": "SPECIAL_OVERRIDE",
        "match_score": 100,
        "Pixel_ER": .1
      }
    else:
      print(f"No date options in {intersecting_tiles.filename.unique()}")
      return {"filename": filename}
  else:
    match = False
    if DSASdate in date_options:
      match = DSASdate
      score = 100
    for option in date_options:
      if DSASdate in option or date in option:
        match = option
        score = 100
    if not match:
      match, score, index = rapidfuzz.process.extractOne(query=date, choices=date_options, processor=lambda s: s.replace("-", ""))
    col_for_match = date_to_col[match]
    tiles_from_this_date = intersecting_tiles[intersecting_tiles[col_for_match] == match]
    GSDM = []
    for col in ['GSDM', 'ORTHO_GSD', 'Ortho_GSD', 'GSDm', 'GSD', 'GSD_M', 'GSD_CM', 'gsdM']:
      GSDM.extend(tiles_from_this_date[col].dropna().astype(str).str.strip("m").unique())
    if len(GSDM) == 0:
      tilefile = tiles_from_this_date.filename.unique()[0]
      if "-04m" in tilefile:
        GSDM = .4
      elif "-0075m" in tilefile:
        GSDM = .075
    elif len(GSDM) == 1:
      GSDM = GSDM[0]
    elif len(GSDM) > 1:
      #print(f"Ambiguous GSDM: {GSDM}")
      GSDM = GSDM[0]
    return {
      "filename": filename,
      "matched_index_tiles": tiles_from_this_date.filename.unique().tolist(),
      "date": date,
      "DSASDate": DSASdate,
      "matched_date": match,
      "match_score": score,
      "Pixel_ER": GSDM
    }

LDS = pd.DataFrame(process_map(get_resolution, LDS.filename)).sort_values("match_score")
LDS

  0%|          | 0/398 [00:00<?, ?it/s]

Unnamed: 0,filename,matched_index_tiles,date,DSASDate,matched_date,match_score,Pixel_ER
0,Retrolens/Canterbury/Motunau/Shorelines/Motunau_09JAN2015.shp,[SpatialData/Mosaics/Footprint shapefiles/Kaikoura/kaikoura-030m-rural-aerial-photos-index-tiles-2016-2017.shp],2015-01-09,09/01/2015,"20/12/2016,21/12/2016,15/02/2017,16/02/2017,21/02/2017,24/02/2017",30.000000,0.3
1,Retrolens/Canterbury/KaitoreteSpitWest/Shorelines/KaitoreteSpitWest_22APR2023.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2015-2016-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2015-2016.shp],2003-04-22,22/04/2003,151125 151228,33.750000,0.3
2,Retrolens/Otago/Moeraki_HampdenBeach/Stack/Shorelines/Moeraki_HampdenBeah_2006.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],1899-12-30,1899-12-30,02/12/17 to 08/02/18,36.000000,0.3
3,Retrolens/Canterbury/GoreBay/Shorelines/GoreBay_8FEB2022.shp,[SpatialData/Mosaics/Footprint shapefiles/Kaikoura/kaikoura-030m-rural-aerial-photos-index-tiles-2016-2017.shp],2022-02-08,08/02/2022,"20/12/2016,21/12/2016,15/02/2017,16/02/2017,21/02/2017,24/02/2017",37.500000,0.3
4,Retrolens/Canterbury/KaitoreteSpitEast/Shorelines/KaitoreteSpitEast_22APR2023.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2015-2016-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2015-2016.shp],2023-04-22,22/04/2023,151228 151229,38.095238,0.3
...,...,...,...,...,...,...,...
223,Retrolens/Southland/OretiBeachSouth/Shorelines/OreitBeachSouth_09APR2023.shp,[SpatialData/Mosaics/Southland 2023/southland-025m-rural-aerial-photos-index-tiles-2023.shp],2023-04-09,09/04/2023,2023-04-09,100.000000,0.25
224,MaxarImagery/HighFreq/Otago/Waikouaiti/Shorelines/Waikouaiti_30MAR2006.shp,[SpatialData/Mosaics/Footprint shapefiles/Otago footprints/otago-075m-rural-aerial-photos-index-tiles-2004-2011.shp],2006-03-30,30/03/2006,2006-03-30,100.000000,0.75
225,Retrolens/Otago/CoalPoint_SmithsBeach/Shorelines/CoalPoint_SmithsBeach_26MAR2006.shp,[SpatialData/Mosaics/Footprint shapefiles/Otago footprints/otago-075m-rural-aerial-photos-index-tiles-2004-2011.shp],2006-03-26,26/03/2006,2006-03-26,100.000000,0.75
211,Retrolens/Waikato/Pauanui_Tairua/Shorelines/Paunanui_Tairua_03JAN2022.shp,"[Gabrielle/Imagery/pre-storm/Waikato/TairuaPauanui/waikato-03m-rural-aerial-photos-index-tiles-2021-2023.shp, Gabrielle/Imagery/pre-storm/Waikato/LINZtemp/waikato-03m-rural-aerial-photos-index-til...",2022-01-03,03/01/2022,03/01/2022,100.000000,0.3


In [41]:
# Stop Excel interpreting dates
for col in ["date", "DSASDate", "matched_date"]:
    LDS[col] = "'" + LDS[col]
LDS.to_csv("LDS_matches.csv", index=False)

In [86]:
df = df[df.match_score == 100]
df

Unnamed: 0,filename,matched_image,match_score
29735,Gabrielle/Shorelines/Merged/Auckland/Whangapoua_19FEB2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Whangapoua/Whangapoua_19FEB2023.tif,100.0
29762,Gabrielle/Shorelines/Merged/Auckland/Oneroa_27DEC2022.shp,Gabrielle/Imagery/pre-storm/Auckland/Waiheke/Oneroa_27DEC2022.tif,100.0
29765,Gabrielle/Shorelines/Merged/Auckland/PakiriNorth_14OCT2021.shp,Gabrielle/Imagery/pre-storm/Auckland/Pakiri/PakiriNorth_14OCT2021.tif,100.0
29769,Gabrielle/Shorelines/Merged/Auckland/Tawharanui_23DEC2021.shp,Gabrielle/Imagery/pre-storm/Auckland/Tawharanui/Tawharanui_23DEC2021.tif,100.0
29771,Gabrielle/Shorelines/Merged/Auckland/Tawharanui_01MAR2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Tawharanui/Tawharanui_01MAR2023.tif,100.0
...,...,...,...
193166,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_17APR1952.shp,Retrolens/Wellington/KapitiSouth/Stack/KapitiSouth_17APR1952_mosaic.tif,100.0
193169,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_02JAN1988.shp,Retrolens/Wellington/KapitiSouth/Stack/KapitiSouth_02JAN1988_mosaic.jp2,100.0
193174,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_06OCT1980.shp,Retrolens/Wellington/KapitiSouth/Stack/KapitiSouth_06OCT1980_mosaic.tif,100.0
194278,Retrolens/Wellington/PukeruaBay/Shorelines/PukeruaBay_22AUG1961.shp,Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_22AUG1961_mosaic.tif,100.0


## Investigate metadata about the matched images

In [87]:
def get_meta(tup):
    i, row = tup
    image = rio.open(prefix + row.matched_image)
    try:
        gdf = gpd.read_file(prefix + row.filename)
        row = row.to_dict()
        row["n_lines"] = len(gdf.dropna(subset="geometry").explode(index_parts=False))
    except: 
        print(f"Can't read{row['filename']}")
    
    row.update(image.profile)
    row["GCPs"] = len(image.gcps[0])
    row["res"] = image.res
    row["CPS"] = "CPS" in gdf.columns
    return row

metafile = "meta.csv"
if os.path.isfile(metafile):
    meta = pd.read_csv(metafile)
else:
    meta = pd.DataFrame(thread_map(get_meta, df.iterrows(), total=len(df)))
    meta.to_csv(metafile, index=False)
meta

  0%|          | 0/2039 [00:00<?, ?it/s]

Unnamed: 0,filename,matched_image,match_score,n_lines,driver,dtype,nodata,width,height,count,crs,transform,blockxsize,blockysize,tiled,interleave,GCPs,res,CPS,compress,photometric
0,Gabrielle/Shorelines/Merged/Auckland/Whangapoua_19FEB2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Whangapoua/Whangapoua_19FEB2023.tif,100.0,3,GTiff,uint32,6.553600e+04,2804,6572,1,,"(0.5999999999999668, 0.0, 1817329.4439153846, 0.0, -0.6000000000000283, 6001088.4262609035, 0.0, 0.0, 1.0)",128.0,128,True,band,5,"(0.5999999999999668, 0.6000000000000283)",True,,
1,Gabrielle/Shorelines/Merged/Auckland/Oneroa_27DEC2022.shp,Gabrielle/Imagery/pre-storm/Auckland/Waiheke/Oneroa_27DEC2022.tif,100.0,3,GTiff,uint8,,4482,3815,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.3000000000000208, 0.0, 1779029.7246191516, 0.0, -0.3, 5928138.707998816, 0.0, 0.0, 1.0)",128.0,128,True,pixel,0,"(0.3000000000000208, 0.3)",True,,
2,Gabrielle/Shorelines/Merged/Auckland/PakiriNorth_14OCT2021.shp,Gabrielle/Imagery/pre-storm/Auckland/Pakiri/PakiriNorth_14OCT2021.tif,100.0,14,GTiff,uint8,,16238,19418,4,,"(0.5, 0.0, 1747718.2635112763, 0.0, -0.5, 5996962.310366099, 0.0, 0.0, 1.0)",128.0,128,True,pixel,4,"(0.5, 0.5)",True,,
3,Gabrielle/Shorelines/Merged/Auckland/Tawharanui_23DEC2021.shp,Gabrielle/Imagery/pre-storm/Auckland/Tawharanui/Tawharanui_23DEC2021.tif,100.0,8,GTiff,uint8,,7201,4822,4,,"(0.5, 0.0, 1762106.896298701, 0.0, -0.5, 5975556.637291731, 0.0, 0.0, 1.0)",128.0,128,True,pixel,7,"(0.5, 0.5)",True,,
4,Gabrielle/Shorelines/Merged/Auckland/Tawharanui_01MAR2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Tawharanui/Tawharanui_01MAR2023.tif,100.0,8,GTiff,uint8,,12351,8084,3,,"(0.30000000000000376, 0.0, 1761979.7113803248, 0.0, -0.300000000000023, 5975633.3566919025, 0.0, 0.0, 1.0)",128.0,128,True,pixel,7,"(0.30000000000000376, 0.300000000000023)",True,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2034,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_17APR1952.shp,Retrolens/Wellington/KapitiSouth/Stack/KapitiSouth_17APR1952_mosaic.tif,100.0,5,GTiff,int32,2.147484e+09,14167,31280,1,,"(0.26838646488296874, 0.0, 1763537.2549408695, 0.0, -0.2683864648829636, 5469157.97689158, 0.0, 0.0, 1.0)",128.0,128,True,band,7,"(0.26838646488296874, 0.2683864648829636)",True,,
2035,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_02JAN1988.shp,Retrolens/Wellington/KapitiSouth/Stack/KapitiSouth_02JAN1988_mosaic.jp2,100.0,9,JP2OpenJPEG,uint16,2.560000e+02,10654,18819,1,,"(0.3894503758518079, 0.0, 1762602.5790819884, 0.0, -0.3894503758518002, 5466346.255854383, 0.0, 0.0, 1.0)",1024.0,1024,True,,4,"(0.3894503758518079, 0.3894503758518002)",True,,
2036,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_06OCT1980.shp,Retrolens/Wellington/KapitiSouth/Stack/KapitiSouth_06OCT1980_mosaic.tif,100.0,11,GTiff,int32,2.147484e+09,12620,29841,1,,"(0.35964806541597927, 0.0, 1762518.6032545054, 0.0, -0.3596480654159714, 5469658.2440681225, 0.0, 0.0, 1.0)",128.0,128,True,band,10,"(0.35964806541597927, 0.3596480654159714)",True,,
2037,Retrolens/Wellington/PukeruaBay/Shorelines/PukeruaBay_22AUG1961.shp,Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_22AUG1961_mosaic.tif,100.0,6,GTiff,uint16,2.560000e+02,40393,28128,1,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.1284827509237925, 0.0, 1757756.3757313853, 0.0, -0.12848275092379952, 5459482.043794165, 0.0, 0.0, 1.0)",128.0,128,True,band,0,"(0.1284827509237925, 0.12848275092379952)",True,,


In [19]:
empty = meta[meta.n_lines == 0]
empty.shape

(71, 21)

In [20]:
def get_mtime(filename):
    return pd.to_datetime(os.path.getmtime(prefix+filename), unit="s", origin="unix", utc=True).tz_convert("Pacific/Auckland")
empty["mtime"] = empty.filename.apply(get_mtime)
empty["size_bytes"] = (prefix + empty.filename).apply(os.path.getsize)
#pd.set_option("display.max_rows",None)
empty[["filename", "n_lines", "mtime", "size_bytes"]].sort_values("mtime")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty["mtime"] = empty.filename.apply(get_mtime)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  empty["size_bytes"] = (prefix + empty.filename).apply(os.path.getsize)


Unnamed: 0,filename,n_lines,mtime,size_bytes
1085,MaxarImagery/HighFreq/WestCoast/Hunt Beach/Shorelines/HuntBeach_11DEC2017.shp,0,2021-06-16 12:17:29.273264896+12:00,100
1829,Retrolens/Southland/Riverton/Shorelines/Riverton_11Feb1978.shp,0,2021-06-16 12:17:29.273264896+12:00,100
1495,Retrolens/Taranaki/New Plymouth District Council/UrenuiRiver_North_AOI/Shorelines/UrenuiRiver_North_9FEB1982_beach.shp,0,2021-06-16 12:17:29.273264896+12:00,100
1395,MaxarImagery/HighFreq/Southland/Riverton/Shorelines/Riverton_27Dec2015.shp,0,2021-06-16 12:17:29.273264896+12:00,100
1830,Retrolens/Southland/Riverton/Shorelines/Riverton_10Dec1958.shp,0,2021-06-16 12:17:29.273264896+12:00,100
...,...,...,...,...
1055,MaxarImagery/HighFreq/Canterbury/PareoraRiver_Timaru/Shorelines/PareoraRiver_Timaru_30JAN2021.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1060,MaxarImagery/HighFreq/Canterbury/PareoraRiver_Timaru/Shorelines/PareoraRiver_Timaru_09APR2010.shp,0,2022-01-16 15:09:57.307988992+13:00,100
713,Retrolens/Gisborne/EastCape/Shorelines/EastCape_14SEP1998.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1513,Retrolens/Taranaki/New Plymouth District Council/Waitara/Shorelines/Waitara_16SEP1958.shp,0,2022-05-06 20:21:39.995812864+12:00,100


In [21]:
with pd.option_context("display.max_rows", 70):
  display(empty[["filename", "n_lines", "mtime", "size_bytes"]][empty.mtime > "2021-11-23"].sort_values("mtime"))

Unnamed: 0,filename,n_lines,mtime,size_bytes
713,Retrolens/Gisborne/EastCape/Shorelines/EastCape_14SEP1998.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1095,MaxarImagery/HighFreq/Gisborne/Tolaga_KaiauaBay/Shorelines/Tolaga_KaiauaBay_13SEP2012.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1096,MaxarImagery/HighFreq/Gisborne/Tolaga_KaiauaBay/Shorelines/Tolaga_KaiauaBay_20FEB2021.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1097,MaxarImagery/HighFreq/Gisborne/Tolaga_KaiauaBay/Shorelines/Tolaga_KaiauaBay_10JAN2017.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1122,MaxarImagery/HighFreq/Gisborne/Tolaga_KaiauaBay/Shorelines/Tolaga_KaiauaBay_22MAR2003.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1123,MaxarImagery/HighFreq/Gisborne/Tolaga_KaiauaBay/Shorelines/Tolaga_KaiauaBay_31MAY2007.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1188,MaxarImagery/HighFreq/Manawatu-Whanganui/Castlecliff/Shorelines/Castlecliff_28AUG2014.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1189,MaxarImagery/HighFreq/Manawatu-Whanganui/Castlecliff/Shorelines/Castlecliff_07JUNE2016.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1190,MaxarImagery/HighFreq/Manawatu-Whanganui/Castlecliff/Shorelines/Castlecliff_11APR2020.shp,0,2022-01-16 15:09:57.307988992+13:00,100
1196,MaxarImagery/HighFreq/BayOfPlenty/OhopeBeach/Shorelines/OhopeBeach_16MAY2014.shp,0,2022-01-16 15:09:57.307988992+13:00,100


In [22]:
meta[meta.filename.str.startswith("Gabrielle")].CPS.value_counts()

CPS
True     212
False      6
Name: count, dtype: int64

In [23]:
meta.crs.value_counts(dropna=False)

crs
(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)    1153
None                                                                  596
(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)     124
(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)       7
(proj, lat_0, lon_0, k, x_0, y_0, ellps, units, no_defs)                5
(init)                                                                  3
(proj, lat_0, lon_0, k, x_0, y_0, ellps, units, no_defs)                3
(init)                                                                  2
(init)                                                                  2
()                                                                      2
(init)                                                                  1
(proj, lat_0, lon_0, k, x_0, y_0, ellps, units, no_defs)                1
Name: count, dtype: int64

In [24]:
meta.GCPs.value_counts()

GCPs
0     1303
4      108
5      104
3       80
7       74
6       73
8       38
10      25
9       19
12      13
11      11
13       9
14       7
21       5
16       5
15       4
22       3
28       2
18       2
19       2
29       1
23       1
40       1
17       1
26       1
49       1
48       1
36       1
50       1
31       1
20       1
33       1
Name: count, dtype: int64

In [25]:
meta.columns

Index(['filename', 'matched_image', 'match_score', 'n_lines', 'driver',
       'dtype', 'nodata', 'width', 'height', 'count', 'crs', 'transform',
       'blockxsize', 'blockysize', 'tiled', 'compress', 'interleave', 'GCPs',
       'res', 'CPS', 'photometric'],
      dtype='object')

In [26]:
meta.driver.value_counts()

driver
GTiff          956
JP2OpenJPEG    939
JPEG             4
Name: count, dtype: int64

In [27]:
meta["count"].value_counts()

count
3    1551
1     243
4     105
Name: count, dtype: int64

In [28]:
meta.dtype.value_counts()

dtype
uint16     1172
uint8       706
int32        15
uint32        4
float32       2
Name: count, dtype: int64

In [29]:
meta.nodata.value_counts()

nodata
2.560000e+02    981
6.553500e+04    165
2.550000e+02     20
0.000000e+00     20
2.147484e+09     15
6.553600e+04      4
Name: count, dtype: int64

## Make mosaics for LINZ images

In [104]:
LDS = failures[(failures.Source == "LDS") & ~failures.filename.str.startswith("Gabrielle")].copy()
LDS

Unnamed: 0,filename,matched_image,match_score,Source
43644,MaxarImagery/HighFreq/Southland/ColacBay/Shorelines/ColacBay_19SEP2007.shp,MaxarImagery/HighFreq/Southland/ColacBay/Imagery/Stack/ColacBay_04SEP2018.tif,78.787879,LDS
47612,MaxarImagery/HighFreq/Taranaki/TongaporutuRiver/Shorelines/TongaporutuRiver_17JAN2022.shp,Retrolens/Taranaki/TongaporutuRiver/Stack/TongaporutuRiver_7NOV1970_mosaic.jp2,77.551020,LDS
50705,MaxarImagery/HighFreq/Auckland/KarekareBethells/Imagery/Shorelines/KarekareBethells_04JAN2022.shp,MaxarImagery/HighFreq/Auckland/KarekareBethells/Imagery/Stack/KarekareBethells_04JAN2017.tif,91.666667,LDS
52703,MaxarImagery/HighFreq/Auckland/Muriwai4/Shorelines/Muriwai4_17JAN2022.shp,Retrolens/Auckland/Muriwai4/Stack/Muriwai4_02JAN2004_mosaic.jp2,78.787879,LDS
53086,MaxarImagery/HighFreq/Auckland/MuriwaiSouth/Shorelines/MuriwaiSouth17JAN2022.shp,MaxarImagery/HighFreq/Auckland/MuriwaiSouth/Stack/MuriwaiSouth_25JAN2020.tif,85.714286,LDS
...,...,...,...,...
191975,Retrolens/Wellington/Otaki/Shorelines/Otaki_24FEB2017.shp,MaxarImagery/HighFreq/Wellington/Otaki/Imagery/Stack/Otaki_22FEB2021_2.tif,85.714286,LDS
192273,Retrolens/Wellington/MakaraBeach/Shorelines/MakaraBeach_13FEB2021.shp,Retrolens/Wellington/MakaraBeach/Stack/MakaraBeach_17FEB_1941_mosaic.jp2,80.000000,LDS
192427,Retrolens/Wellington/Tora/Shorelines/Tora_13FEB2021.shp,Retrolens/Wellington/Tora/Stack/Tora_13OCT1969_mosaic.jp2,53.846154,LDS
192780,Retrolens/Wellington/Riversdale/Shorelines/Riversdale_Wellington_13FEB2021.shp,Retrolens/Wellington/Riversdale/stacks/Riversdale_27FEB1942_mosaic.jp2,63.870968,LDS


## Match shapefiles with the corresponding index tiles shapefile
- First get the bounds of every tile
- Tiles that spatially match the bounds of a drawn EOV shapefile will be used to create the corresponding mosaic

In [95]:
if os.path.isfile("tilelist.parquet"):
    tilelist = gpd.read_parquet("tilelist.parquet")
else:
    tilelist = pd.DataFrame({"filename": glob("DigitalJPGs/**/*.jpg", recursive=True)})
    tilelist["region"] = tilelist.filename.str.split("/").str[1]
    tilelist["tilename"] = tilelist.filename.str.split("/").str[-1].str.replace(".jpg", "")
    def get_bounds(f):
        return rio.open(f).bounds
    tilelist["bounds"] = thread_map(get_bounds, tilelist.filename)
    tilelist.bounds = tilelist.bounds.progress_apply(lambda b: box(*b))
    tilelist = gpd.GeoDataFrame(tilelist, geometry="bounds")
    tilelist.to_parquet("tilelist.parquet")

In [96]:
tilelist

Unnamed: 0,filename,region,tilename,bounds
0,DigitalJPGs/HawkesBay/HBY14R/2015_BJ43_5000_0103_RGB.jpg,HawkesBay,2015_BJ43_5000_0103_RGB,"POLYGON ((2027200.000 5654400.000, 2027200.000 5658000.000, 2024800.000 5658000.000, 2024800.000 5654400.000, 2027200.000 5654..."
1,DigitalJPGs/HawkesBay/HBY14R/2015_BH43_5000_0905_RGB.jpg,HawkesBay,2015_BH43_5000_0905_RGB,"POLYGON ((2032000.000 5661600.000, 2032000.000 5665200.000, 2029600.000 5665200.000, 2029600.000 5661600.000, 2032000.000 5661..."
2,DigitalJPGs/HawkesBay/HBY14R/2015_BJ43_5000_0601_RGB.jpg,HawkesBay,2015_BJ43_5000_0601_RGB,"POLYGON ((2022400.000 5636400.000, 2022400.000 5640000.000, 2020000.000 5640000.000, 2020000.000 5636400.000, 2022400.000 5636..."
3,DigitalJPGs/HawkesBay/HBY14R/2015_BH43_5000_1004_RGB.jpg,HawkesBay,2015_BH43_5000_1004_RGB,"POLYGON ((2029600.000 5658000.000, 2029600.000 5661600.000, 2027200.000 5661600.000, 2027200.000 5658000.000, 2029600.000 5658..."
4,DigitalJPGs/HawkesBay/HBY14R/2015_BH42_5000_0709_RGB.jpg,HawkesBay,2015_BH42_5000_0709_RGB,"POLYGON ((2017600.000 5668800.000, 2017600.000 5671290.600, 2015200.000 5671290.600, 2015200.000 5668800.000, 2017600.000 5668..."
...,...,...,...,...
54687,DigitalJPGs/Wellington/Wellington Rural 2016-17/BN32/BN32_5K_1004.jpg,Wellington,BN32_5K_1004,"POLYGON ((1765600.000 5478000.000, 1765600.000 5481600.000, 1763200.000 5481600.000, 1763200.000 5478000.000, 1765600.000 5478..."
54688,DigitalJPGs/Wellington/Wellington Rural 2016-17/BN32/BN32_5K_1009.jpg,Wellington,BN32_5K_1009,"POLYGON ((1777600.000 5478000.000, 1777600.000 5481600.000, 1775200.000 5481600.000, 1775200.000 5478000.000, 1777600.000 5478..."
54689,DigitalJPGs/Wellington/Wellington Rural 2016-17/BN32/BN32_5K_1010.jpg,Wellington,BN32_5K_1010,"POLYGON ((1780000.000 5478000.000, 1780000.000 5481600.000, 1777600.000 5481600.000, 1777600.000 5478000.000, 1780000.000 5478..."
54690,DigitalJPGs/Wellington/Wellington Rural 2016-17/BN32/BN32_5K_1003.jpg,Wellington,BN32_5K_1003,"POLYGON ((1763200.000 5478000.000, 1763200.000 5481600.000, 1760800.000 5481600.000, 1760800.000 5478000.000, 1763200.000 5478..."


In [31]:
# This cell might useful for finding matches, based on geospatial correlation
for filename in tqdm(maybe_LDS.filename):
    break
    df = gpd.read_file(filename)
    if len(df) == 0:
        continue
    bounds = df.total_bounds
    intersecting_tiles = tilelist[tilelist.intersects(box(*bounds))]
    print(f"{filename} matches:\n\t{len(intersecting_tiles)} tiles from:\n\t\t{intersecting_tiles.filename}")

0it [00:00, ?it/s]

In [48]:
LDS = pd.read_csv("maybe_LDS.csv").dropna()
LDS

Unnamed: 0,filename,matched_tile_root
0,Retrolens/Nelson/BoulderBank/Shorelines/BoulderBank_16JAN2019_NEL18R.shp,DigitalJPGs/Nelson/NEL18R
18,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_07APR2010.shp,DigitalJPGs/Auckland/Auckland 2010R
22,Retrolens/Auckland/Omaha/Shorelines/Omaha_04DEC2012.shp,DigitalJPGs/Auckland/RNC2 Auckland/2012
23,Retrolens/Auckland/PakiriBeach_North/Shorelines/PakiriBeach_North_06NOV2015.shp,DigitalJPGs/Northland/Northland 0.40m Rural Aerial Photos 2014-16
24,Retrolens/Auckland/TeArai/Shorelines/TeArai_06NOV2015.shp,DigitalJPGs/Northland/Northland 0.40m Rural Aerial Photos 2014-16
25,Retrolens/Auckland/Orewa/Shorelines/Orewa_08MAR2011.shp,DigitalJPGs/Auckland/RNC2 Auckland/2011
26,Retrolens/Auckland/PakiriBeach/Shorelines/PakiriBeach_04MAR2012.shp,DigitalJPGs/Auckland/RNC2 Auckland/2012
27,Retrolens/Auckland/KawakawaBay/Shorelines/KawakawaBay_03JAN2011.shp,DigitalJPGs/Auckland/RNC2 Auckland/2011
31,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Stack/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_29JAN2019.shp,DigitalJPGs/Otago/otago-03m-rural-aerial-photos-2017-2019
36,Retrolens/Otago/Tautuku_Beach/Shorelines/Tautuku_02May2013.shp,DigitalJPGs/Southland/STH13R


For each file, create a mosaic from the corresponding tiles

In [49]:
def get_match(filename):
    match, score, index = rapidfuzz.process.extractOne(query=filename, choices=shapefiles[~shapefiles.str.contains("Old shorelines")], processor=fuzz_preprocess)
    return match, score
LDS["matched_filename"], LDS["match_score"] = zip(*LDS.filename.apply(get_match))
LDS[LDS.filename != LDS.matched_filename][["filename", "matched_filename", "match_score"]]

Unnamed: 0,filename,matched_filename,match_score
18,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_07APR2010.shp,MaxarImagery/HighFreq/Auckland/Whatipu/Shorelines/Whatipu_07APR2010.shp,100.0
31,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Stack/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_29JAN2019.shp,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_29JAN2019.shp,100.0
36,Retrolens/Otago/Tautuku_Beach/Shorelines/Tautuku_02May2013.shp,MaxarImagery/HighFreq/Otago/Tautuku/Shorelines/Tautuku_02May2013.shp,100.0
39,Retrolens/Otago/Aramoana/Stack/Shorelines/Aramoana_08JULY2019.shp,MaxarImagery/HighFreq/Otago/Aramoana/Shorelines/Aramoana_08JULY2019.shp,100.0
48,Retrolens/Otago/StKilda_Tomahawk_SmaillsBeach/Stack/Shorelines/StKilda_Tomahawk_SmaillsBeach_31JAN2017.shp,MaxarImagery/HighFreq/Otago/StKilda_Tomahawk_SmaillsBeach/Shorelines/StKilda_Tomahawk_SmaillsBeach_31JAN2017.shp,100.0
50,Retrolens/Otago/BoulderBeach_SandflyBay/Stack/Shorelines/BoulderBeach_SandflyBay_29JAN2019.shp,Retrolens/Otago/BoulderBeach_SandflyBay/Shorelines/BoulderBeach_SandflyBay_29JAN2019.shp,100.0
53,Retrolens/Otago/PapanuiBeach_WickliffeBay/Stack/Shorelines/PapanuiBeach_WickliffeBay_29JAN2019.shp,Retrolens/Otago/PapanuiBeach_WickliffeBay/Shorelines/PapanuiBeach_WickliffeBay_29JAN2019.shp,100.0
100,Retrolens/Bay of Plenty/OhopeBeach/Shorelines/OhopeBeach_3DEC2014.shp,Retrolens/Bay of Plenty/PortOhope/Shorelines/OhopeBeach_3DEC2014.shp,100.0
104,Retrolens/Wellington/KapitiNorth/Shorelines/NorthKapiti_15MAR2017.shp,MaxarImagery/HighFreq/Wellington/KapitiNorth/Shorelines/NorthKapiti_15MAR2017.shp,100.0


In [54]:
LDS.filename = LDS.matched_filename

In [51]:
LDS["done"] = LDS.matched_filename.apply(lambda f: os.path.isfile(prefix + f.replace(".shp", ".tif")))
LDS["done"].value_counts(dropna=False)
LDS

Unnamed: 0,filename,matched_tile_root,matched_filename,match_score,done
0,Retrolens/Nelson/BoulderBank/Shorelines/BoulderBank_16JAN2019_NEL18R.shp,DigitalJPGs/Nelson/NEL18R,Retrolens/Nelson/BoulderBank/Shorelines/BoulderBank_16JAN2019_NEL18R.shp,100.0,True
18,Retrolens/Auckland/Whatipu/Shorelines/Whatipu_07APR2010.shp,DigitalJPGs/Auckland/Auckland 2010R,MaxarImagery/HighFreq/Auckland/Whatipu/Shorelines/Whatipu_07APR2010.shp,100.0,True
22,Retrolens/Auckland/Omaha/Shorelines/Omaha_04DEC2012.shp,DigitalJPGs/Auckland/RNC2 Auckland/2012,Retrolens/Auckland/Omaha/Shorelines/Omaha_04DEC2012.shp,100.0,True
23,Retrolens/Auckland/PakiriBeach_North/Shorelines/PakiriBeach_North_06NOV2015.shp,DigitalJPGs/Northland/Northland 0.40m Rural Aerial Photos 2014-16,Retrolens/Auckland/PakiriBeach_North/Shorelines/PakiriBeach_North_06NOV2015.shp,100.0,True
24,Retrolens/Auckland/TeArai/Shorelines/TeArai_06NOV2015.shp,DigitalJPGs/Northland/Northland 0.40m Rural Aerial Photos 2014-16,Retrolens/Auckland/TeArai/Shorelines/TeArai_06NOV2015.shp,100.0,True
25,Retrolens/Auckland/Orewa/Shorelines/Orewa_08MAR2011.shp,DigitalJPGs/Auckland/RNC2 Auckland/2011,Retrolens/Auckland/Orewa/Shorelines/Orewa_08MAR2011.shp,100.0,True
26,Retrolens/Auckland/PakiriBeach/Shorelines/PakiriBeach_04MAR2012.shp,DigitalJPGs/Auckland/RNC2 Auckland/2012,Retrolens/Auckland/PakiriBeach/Shorelines/PakiriBeach_04MAR2012.shp,100.0,True
27,Retrolens/Auckland/KawakawaBay/Shorelines/KawakawaBay_03JAN2011.shp,DigitalJPGs/Auckland/RNC2 Auckland/2011,Retrolens/Auckland/KawakawaBay/Shorelines/KawakawaBay_03JAN2011.shp,100.0,True
31,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Stack/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_29JAN2019.shp,DigitalJPGs/Otago/otago-03m-rural-aerial-photos-2017-2019,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_29JAN2019.shp,100.0,True
36,Retrolens/Otago/Tautuku_Beach/Shorelines/Tautuku_02May2013.shp,DigitalJPGs/Southland/STH13R,MaxarImagery/HighFreq/Otago/Tautuku/Shorelines/Tautuku_02May2013.shp,100.0,True


In [53]:
display(LDS.done.value_counts(dropna=False))

for i, row in tqdm(LDS[~LDS.done].iterrows(), total=len(LDS[~LDS.done])):
    filename = row.matched_filename
    mosaic_filename = filename.replace(".shp", ".tif")
    shapefile = gpd.read_file(filename)
    bounds = shapefile.total_bounds
    intersecting_tiles = tilelist[tilelist.intersects(box(*bounds)) & tilelist.filename.str.startswith(row.matched_tile_root)]
    tiles = list(intersecting_tiles.filename)
    print(len(tiles))
    Z, transform = merge(tiles)
    with rasterio.open(
        mosaic_filename,
        'w',
        driver='GTiff',
        height=Z.shape[1],
        width=Z.shape[2],
        count=Z.shape[0],
        dtype=Z.dtype,
        crs=shapefile.crs,
        transform=transform,
        compress='lzw',
        BIGTIFF = "IF_SAFER"
    ) as dst:
        dst.write(Z)

done
True    56
Name: count, dtype: int64

0it [00:00, ?it/s]

In [None]:
LDS["matched_image"] = LDS.filename.str.replace(".shp", ".tif")
LDS.to_csv("LDS_matches.csv", index=False)

In [None]:
metafile = "LDS_meta.csv"
if os.path.isfile(metafile):
    meta = pd.read_csv(metafile)
else:
    meta = pd.DataFrame(process_map(get_meta, LDS.iterrows(), total=len(LDS)))
    meta.to_csv(metafile, index=False)
meta

### Algorithm for converting polyline shapefile to polygon annotations, labelled as sea or land

In [None]:
coastline = gpd.read_file("lds-nz-coastlines-and-islands-polygons-topo-150k-FGDB.zip!nz-coastlines-and-islands-polygons-topo-150k.gdb")

In [None]:
# Get a random (known-good) annotation
sample = LDS.sample(1)
display(sample)
image_filename = sample.matched_image.iloc[0]
image = rio.open(image_filename)
sample_gdf = gpd.read_file(sample.filename.iloc[0])
sample_gdf

In [None]:
def line_to_split_bbox(geo):
    bounding_box = geo.envelope
    split_bbox = shapely.ops.split(bounding_box, geo)
    return split_bbox

split_bboxes = sample_gdf.geometry.apply(line_to_split_bbox).explode(index_parts=True).reset_index()
#split_bboxes.geometry = split_bboxes.geometry.buffer(0)
split_bboxes["area"] = split_bboxes.area
split_bboxes = split_bboxes[split_bboxes.area > 1e5]
split_bboxes

In [None]:
relevant_coastline = coastline.clip(split_bboxes.total_bounds)
split_bboxes["area_inland"] = split_bboxes.clip(relevant_coastline).area
split_bboxes["fraction_inland"] = split_bboxes.area_inland / split_bboxes.area
split_bboxes["class"] = split_bboxes.fraction_inland.apply(lambda f: "land" if f > .5 else "sea")
split_bboxes

In [None]:
# Plot the results, and check it all looks ok
fig, ax = plt.subplots()
ax = rasterio.plot.show(image, ax=ax)

cmap = matplotlib.colors.ListedColormap(['green', 'blue'])
split_bboxes.plot(ax=ax, alpha=.5, column='class', cmap=cmap, categorical=True, legend=True, edgecolor='black')
split_bboxes.apply(lambda x: ax.annotate(text=round(x.fraction_inland, 2), xy=x.geometry.centroid.coords[0], ha='center'), axis=1)

#relevant_coastline.plot(ax=ax, alpha=.5, edgecolor="cyan")

b = split_bboxes.total_bounds
xlim = ([b[0], b[2]])
ylim = ([b[1], b[3]])
ax.set_xlim(xlim)
ax.set_ylim(ylim)