In [1]:
%load_ext autotime
import geopandas as gpd # Geospatial data operations
import rasterio as rio # Geospatial imagery manipulation
import rasterio.plot
import pandas as pd # Tabular data
import os
import re
import rapidfuzz # Fuzzy string matching
from tqdm.auto import tqdm # Progress bars
from tqdm.contrib.concurrent import thread_map, process_map # Parallel operations
import matplotlib # Plots
import matplotlib.pyplot as plt
import shapely # Polygon operations
#import solaris.tile as tile # Tile splitting
#import solaris.data.coco as coco
import contextlib
import io
import rasterio # Raster imagery operations
from rasterio.vrt import WarpedVRT
from rasterio import transform
from rasterio.merge import merge # Merging tiles into mosaics
from glob import glob # Finding files
from shapely.geometry import box # Bounding box operations
matplotlib.rcParams['figure.figsize'] = (20, 10)
tqdm.pandas()
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 200)
import platform
if platform.system() == "Windows":
  prefix = "Z:/"
else:
  prefix = "ressci201900060-RNC2-Coastal/"

## Match shapefiles to images

In [2]:
# Set this to False to rebuild filelist.txt and meta.csv
use_cache = False

In [3]:
# This cell will read a list of files from Nick/filelist.txt if it exists, or build a list of files and save it to that location
# This is useful for quickly loading the list of files without having to search the entire directory structure
# You will need to delete the file and rerun this cell if files are created, renamed or moved

filename = prefix + "Nick/filelist.txt"
if use_cache and os.path.isfile(filename):
    filelist = pd.read_csv(filename, header=None).iloc[:,0]
else:
    def find_files(root):
        return pd.Series(glob(prefix + root + "**/**", recursive=True)).str.replace(prefix, "")
    filelist = pd.concat(thread_map(find_files, ["Gabrielle", "MaxarImagery", "Retrolens", "SpatialData/Mosaics"]))
    if platform.system() == "Windows":
        filelist = filelist.str.replace("\\", "/", regex=False)
    filelist.to_csv(filename, index=False, header=False)
filelist

  0%|          | 0/4 [00:00<?, ?it/s]

0                                                                                      Gabrielle/
1                                                                                Gabrielle/Orders
2                                                                           Gabrielle/Orders/AOIs
3                                         Gabrielle/Orders/AOIs/Pauanui_Tairua_07JAN2023WGS84.sbn
4                                              Gabrielle/Orders/AOIs/Pauanui_Tairua_07JAN2023.sbx
                                                   ...                                           
54825    SpatialData/Mosaics/Nelson.Overviews/NEL18R.Overviews/Ov_i03_L02_R00000062_C00000065.tfw
54826    SpatialData/Mosaics/Nelson.Overviews/NEL18R.Overviews/Ov_i03_L03_R00000020_C00000022.tfw
54827    SpatialData/Mosaics/Nelson.Overviews/NEL18R.Overviews/Ov_i03_L01_R00000125_C00000131.tif
54828    SpatialData/Mosaics/Nelson.Overviews/NEL18R.Overviews/Ov_i03_L01_R00000127_C00000133.tif
54829               

In [4]:
def check_filename(filename):
    # This regex only matches shapefiles that contain something date-like in their names
    match = re.search(r'/Shorelines/.+\d{4}\w*.shp$', filename)
    return bool(match)

shapefiles = filelist[filelist.apply(check_filename)]
df = shapefiles.to_frame(name="filename")
df

Unnamed: 0,filename
29906,Gabrielle/Shorelines/Waikato/Matarangi and surrounds/Matarangi_18FEB2023.shp
29907,Gabrielle/Shorelines/Waikato/Matarangi and surrounds/Matarangi_24DEC2022.shp
29910,Gabrielle/Shorelines/Waikato/Matarangi and surrounds/NewChums_18FEB2023.shp
29911,Gabrielle/Shorelines/Waikato/Matarangi and surrounds/Whangapoua_24DEC2022.shp
29918,Gabrielle/Shorelines/Waikato/Matarangi and surrounds/Whangapoua_18FEB2023.shp
...,...
109278,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_02JAN1988.shp
109283,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_06OCT1980.shp
110387,Retrolens/Wellington/PukeruaBay/Shorelines/PukeruaBay_22AUG1961.shp
110398,Retrolens/Wellington/PukeruaBay/Shorelines/PukeruaBay_Wellington_13FEB2021.shp


In [5]:
image_extensions = (".jpg", ".jp2", ".tif", ".TIF", ".png")

In [6]:
images = filelist[filelist.str.contains("Stack", case=False) & filelist.str.endswith(image_extensions)]
images

1276      MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_08NOV2019_2.tif
1277      MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_08NOV2019_1.tif
1279        MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_12MAR2018.tif
1282        MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_31AUG2005.tif
1296        MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_25DEC2015.tif
                                              ...                                      
110624            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_19NOV1972_mosaic.jp2
110630            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_04APR1986_mosaic.jp2
110633            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_10NOV1977_mosaic.tif
110634            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_22AUG1961_mosaic.tif
110637            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_01AUG1942_mosaic.jp2
Length: 2652, dtype: object

In [7]:
Gabrielle_images = filelist[filelist.str.startswith("Gabrielle") & filelist.str.endswith(image_extensions)]

In [8]:
# When fuzzy matching, ignore these strings
# _0 will ignore leading zeros in dates
strings_to_delete = ["_mosaic", "_mosiac", "_mosaid", ".mosaic", "_cliff", "_beach", "_beachcliffsegment", "_MF.shp", "_MT.shp", "_0", "_1.tif", "_2.tif", "_3.tif", "_LDS", "_", " "]

def fuzz_preprocess(filename):
    for s in strings_to_delete:
        filename = filename.replace(s, "")
    # Case-insensitive
    filename = filename.lower()
    # Ignore extension
    filename = os.path.splitext(filename)[0]
    # Basename only
    filename = os.path.basename(filename)
    return filename

def get_matching_image(filename):
    if filename.startswith("Gabrielle"):
        match, score, index = rapidfuzz.process.extractOne(query=filename, choices=Gabrielle_images, processor=fuzz_preprocess)
        return match, score
    else:
        match, score, index = rapidfuzz.process.extractOne(query=filename, choices=images, processor=fuzz_preprocess)
        return match, score

df["matched_image"], df["match_score"] = zip(*process_map(get_matching_image, df.filename))
print("Perfect matches:", sum(df.match_score == 100))
print("Imperfect matches:", sum(df.match_score < 100))

  df["matched_image"], df["match_score"] = zip(*process_map(get_matching_image, df.filename))


  0%|          | 0/2438 [00:00<?, ?it/s]

Perfect matches: 2054
Imperfect matches: 384


In [9]:
df = df.sort_values("match_score")
df

Unnamed: 0,filename,matched_image,match_score
30274,Gabrielle/Shorelines/BayofPlenty/Opotiki/BOPLINZ_Opotiki_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/BayofPlenty/Opotiki/Opotiki_28FEB2023.tif,57.894737
31160,Gabrielle/Shorelines/Gisborne/TeAraroa/EastCape_18DEC2021.shp,Gabrielle/Imagery/pre-storm/Auckland/Waiheke/Onetangi_21DEC2022.tif,58.823529
30441,Gabrielle/Shorelines/BayofPlenty/EasternBoP/EasternBoP_20DEC2021.shp,Gabrielle/Imagery/pre-storm/Waikato/Matarangi/Matarangi_24DEC2022.tif,59.459459
30213,Gabrielle/Shorelines/BayofPlenty/Waihi/BOPLINZ_Waihi_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Omaha/PNEO/OmahaPakiri_04APR2023.tif,61.538462
44135,Retrolens/Southland/Riverton/Shorelines/Riverton_Mosaic_STH13R_05FEB2014.shp,MaxarImagery/HighFreq/Southland/Riverton/Stack/Riverton_13MAR2021.tif,62.307692
...,...,...,...
3571,Retrolens/HawkesBay/Porangahau/Shorelines/Porangahau_14OCT1980.shp,Retrolens/HawkesBay/Porangahau/Stack/Porangahau_14OCT1980_mosaic.jp2,100.000000
3576,Retrolens/HawkesBay/Porangahau/Shorelines/Porangahau_25MAR1965.shp,Retrolens/HawkesBay/Porangahau/Stack/Porangahau_25MAR1965_mosaic.jp2,100.000000
3586,Retrolens/HawkesBay/Porangahau/Shorelines/Porangahau_07SEP1944.shp,Retrolens/HawkesBay/Porangahau/Stack/Porangahau_07SEP1944_mosaic.jp2,100.000000
3367,Retrolens/HawkesBay/WhakakiLagoon/Shorelines/WhakakiLagoon_01NOV1942.shp,Retrolens/HawkesBay/WhakakiLagoon/Stack/WhakakiLagoon_01NOV1942_mosaic.jp2,100.000000


In [10]:
def get_shapefile_meta(tup):
    i, row = tup
    filename = row.filename
    source = "Unknown"
    try:
        shapefile = gpd.read_file(prefix+filename)
    except:
        print(f"Can't read {filename}")
        return {"Source": source, "n_lines": 0}
    if "Source" not in shapefile.columns:
        if filename.startswith("Retrolens"):
            source = "RL"
        elif filename.startswith("MaxarImagery/HighFreq"):
            source = "MAX"
    else:
        sources = shapefile.Source.unique()
        if len(sources) == 0 or not sources[0]:
            #print(f"{filename} has no sources")
            if filename.startswith("MaxarImagery/HighFreq"):
                source = "MAX"
        elif len(sources) == 1:
            source = sources[0]
        elif len(sources) > 1:
            print(f"{filename} has ambiguous sources: {sources}")
            source = sources[0]
    n_lines = len(shapefile.dropna(subset="geometry").explode(index_parts=False))
    return {
        "Source": source,
        "n_lines": n_lines,
        "CPS": "CPS" in shapefile.columns,
        "Photoscale": "Photoscale" in shapefile.columns,
        "Pixel_Er": "Pixel_Er" in shapefile.columns
    }

shp_meta = pd.DataFrame(process_map(get_shapefile_meta, df.iterrows(), total=len(df)), index=df.index)
df = pd.concat([df, shp_meta], axis=1)
df

  0%|          | 0/2438 [00:00<?, ?it/s]

Retrolens/HawkesBay/Awatoto/Shorelines/Awatoto_06MAR2019.shp has ambiguous sources: ['LDS' None]
Retrolens/Canterbury/Motunau/Shorelines/Motunau_09JAN2015.shp has ambiguous sources: ['LDS' None]
Retrolens/Taranaki/OpunakeBeach/Shorelines/OpunakeBeach_11FEB2017.shp has ambiguous sources: ['LDS' None]
Retrolens/Otago/Warrington/Shorelines/Warrington_17MAR2000.shp has ambiguous sources: ['RL' None]
Retrolens/Tasman/MoutereRiver/Shorelines/MoutereRiver_13SEP1985.shp has ambiguous sources: ['RL' None]
Retrolens/WestCoast/Westport/Shorelines/Westport_15JAN1988.shp has ambiguous sources: ['Rl' 'RL']
Retrolens/Southland/HaldaneBay/Shorelines/HaldaneBay_07OCT1985.shp has ambiguous sources: ['RL' None]
Retrolens/Auckland/PakiriBeach/Shorelines/PakiriBeach_24OCT1953.shp has ambiguous sources: ['RL' None]
MaxarImagery/HighFreq/Wellington/KapitiMid/Shorelines/KapitiMid_31OCT2019.shp has ambiguous sources: ['MAX' None]
MaxarImagery/HighFreq/Northland/Aranga/Shorelines/Aranga_09NOV2012.shp has ambigu

Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er
30274,Gabrielle/Shorelines/BayofPlenty/Opotiki/BOPLINZ_Opotiki_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/BayofPlenty/Opotiki/Opotiki_28FEB2023.tif,57.894737,LDS,2,True,True,True
31160,Gabrielle/Shorelines/Gisborne/TeAraroa/EastCape_18DEC2021.shp,Gabrielle/Imagery/pre-storm/Auckland/Waiheke/Onetangi_21DEC2022.tif,58.823529,LDS,48,True,True,True
30441,Gabrielle/Shorelines/BayofPlenty/EasternBoP/EasternBoP_20DEC2021.shp,Gabrielle/Imagery/pre-storm/Waikato/Matarangi/Matarangi_24DEC2022.tif,59.459459,LDS,34,True,True,True
30213,Gabrielle/Shorelines/BayofPlenty/Waihi/BOPLINZ_Waihi_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Omaha/PNEO/OmahaPakiri_04APR2023.tif,61.538462,LDS,6,True,True,True
44135,Retrolens/Southland/Riverton/Shorelines/Riverton_Mosaic_STH13R_05FEB2014.shp,MaxarImagery/HighFreq/Southland/Riverton/Stack/Riverton_13MAR2021.tif,62.307692,LDS,8,True,False,False
...,...,...,...,...,...,...,...,...
3571,Retrolens/HawkesBay/Porangahau/Shorelines/Porangahau_14OCT1980.shp,Retrolens/HawkesBay/Porangahau/Stack/Porangahau_14OCT1980_mosaic.jp2,100.000000,RL,2,True,True,True
3576,Retrolens/HawkesBay/Porangahau/Shorelines/Porangahau_25MAR1965.shp,Retrolens/HawkesBay/Porangahau/Stack/Porangahau_25MAR1965_mosaic.jp2,100.000000,RL,1,True,True,True
3586,Retrolens/HawkesBay/Porangahau/Shorelines/Porangahau_07SEP1944.shp,Retrolens/HawkesBay/Porangahau/Stack/Porangahau_07SEP1944_mosaic.jp2,100.000000,RL,4,True,True,True
3367,Retrolens/HawkesBay/WhakakiLagoon/Shorelines/WhakakiLagoon_01NOV1942.shp,Retrolens/HawkesBay/WhakakiLagoon/Stack/WhakakiLagoon_01NOV1942_mosaic.jp2,100.000000,RL,3,False,False,False


In [11]:
df.sort_values(by="match_score")

Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er
30274,Gabrielle/Shorelines/BayofPlenty/Opotiki/BOPLINZ_Opotiki_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/BayofPlenty/Opotiki/Opotiki_28FEB2023.tif,57.894737,LDS,2,True,True,True
31160,Gabrielle/Shorelines/Gisborne/TeAraroa/EastCape_18DEC2021.shp,Gabrielle/Imagery/pre-storm/Auckland/Waiheke/Onetangi_21DEC2022.tif,58.823529,LDS,48,True,True,True
30441,Gabrielle/Shorelines/BayofPlenty/EasternBoP/EasternBoP_20DEC2021.shp,Gabrielle/Imagery/pre-storm/Waikato/Matarangi/Matarangi_24DEC2022.tif,59.459459,LDS,34,True,True,True
30213,Gabrielle/Shorelines/BayofPlenty/Waihi/BOPLINZ_Waihi_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Omaha/PNEO/OmahaPakiri_04APR2023.tif,61.538462,LDS,6,True,True,True
44135,Retrolens/Southland/Riverton/Shorelines/Riverton_Mosaic_STH13R_05FEB2014.shp,MaxarImagery/HighFreq/Southland/Riverton/Stack/Riverton_13MAR2021.tif,62.307692,LDS,8,True,False,False
...,...,...,...,...,...,...,...,...
86369,Retrolens/Northland/Rawara/Shorelines/Rawara_24NOV1979.shp,Retrolens/Northland/Rawara/Stack/Rawara_24NOV1979.tif,100.000000,RL,11,True,True,True
86366,Retrolens/Northland/Rawara/Shorelines/Rawara_26MAY1947.shp,Retrolens/Northland/Rawara/Stack/Rawara_26MAY1947.tif,100.000000,RL,14,True,True,True
86283,Retrolens/Northland/TokerauBeach/Shorelines/TokerauBeach_20FEB1984.shp,Retrolens/Northland/TokerauBeach/Stack/TokerauBeach_20FEB1984.tif,100.000000,RL,26,True,True,True
85998,Retrolens/Northland/TaupiriBay/Shorelines/TaupiriBay_03FEB1959.shp,Retrolens/Northland/TaupiriBay/Stack/TaupiriBay_03FEB1959.tif,100.000000,RL,15,True,True,False


In [12]:
failures = (df.match_score < 100) & (df.Source != "LDS") & (df.n_lines > 0)
print(sum(failures))
try:
  df[failures].to_csv(prefix+"Nick/failures.csv", index=False)
except:
  print("Can't write Nick/failures.csv")
df[failures]

1


Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er
85771,Retrolens/Northland/MarsdenPoint/Shorelines/MarsdenPoint_02NOV1970.shp,Retrolens/Northland/MarsdenPoint/Stack/MarsdenPoint_21NOV1986_mosaic.jp2,87.804878,RL,4,False,True,False


In [22]:
gpd.read_file("Retrolens/Northland/MarsdenPoint/Shorelines/MarsdenPoint_02NOV1970.shp")

Unnamed: 0,OBJECTID_1,OBJECTID,Id,Digitiser,Scale,DSASdate,Region,Site,Date,Photoscale,Source,geometry
0,1471,1577,0,BJ,1000,02/11/1970,Northland,MarsdenPoint,1970-11-02,24000,RL,"LINESTRING (1735515.150 6032733.014, 1735510.388 6032766.881, 1735486.840 6032830.381, 1735451.915 6032903.935, 1735421.223 6032955.264, 1735387.885 6032996.275, 1735356.665 6033027.231, 1735337.8..."
1,1472,1578,0,BJ,1000,02/11/1970,Northland,MarsdenPoint,1970-11-02,24000,RL,"LINESTRING (1735323.856 6033046.281, 1735296.075 6033058.452, 1735253.212 6033070.623, 1735218.023 6033077.238, 1735166.958 6033086.233, 1735147.908 6033089.673)"
2,1473,1579,0,BJ,1000,02/11/1970,Northland,MarsdenPoint,1970-11-02,24000,RL,"LINESTRING (1735036.114 6033073.274, 1735079.505 6033080.153, 1735109.668 6033086.768, 1735135.333 6033092.059, 1735144.858 6033092.059, 1735169.199 6033086.768, 1735215.766 6033079.624, 1735257.5..."
3,1474,1580,0,BJ,1000,02/11/1970,Northland,MarsdenPoint,1970-11-02,24000,RL,"LINESTRING (1735338.533 6033040.201, 1735371.341 6033016.388, 1735409.441 6032971.938, 1735439.869 6032926.430, 1735450.187 6032902.088, 1735482.996 6032833.825, 1735502.310 6032792.550, 1735509.9..."


In [13]:
df.to_csv(prefix+"Nick/shoreline_image_matching.csv", index=False)

### For those perfect matches, get the metadata from the corresponding matched image

In [14]:
# Investigate metadata about the matched images

def get_meta(tup):
    i, row = tup
    try:
        image = rio.open(prefix + row.matched_image)
    except:
        print(f"Can't read {row.matched_image}")
        return {}
    try:
        gdf = gpd.read_file(prefix + row.filename)
        row = row.to_dict()
        row["n_lines"] = len(gdf.dropna(subset="geometry").explode(index_parts=False))
    except: 
        print(f"Can't read {row['filename']}")
        return {}
    
    row.update(image.profile)
    row["GCPs"] = len(image.gcps[0])
    row["res"] = image.res
    row["CPS"] = "CPS" in gdf.columns
    return row

metafile = "meta.csv"
if use_cache and os.path.isfile(metafile):
    meta = pd.read_csv(metafile)
else:
    meta = pd.DataFrame(process_map(get_meta, df[df.match_score == 100].iterrows(), total=sum(df.match_score == 100)))
    meta.to_csv(metafile, index=False)
meta

  0%|          | 0/2054 [00:00<?, ?it/s]

  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er,driver,dtype,nodata,width,height,count,crs,transform,blockxsize,blockysize,tiled,interleave,GCPs,res,compress,photometric
0,Retrolens/Otago/AllansBeach/Shorelines/AllansBeach_18FEB1970_mosaic.shp,Retrolens/Otago/AllansBeach/Stack/AllansBeach_18FEB1970_mosaic.jp2,100.0,RLN,5,True,True,True,JP2OpenJPEG,uint16,256.0,5996,4025,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.7067293982912914, 0.0, 1419154.9678825692, 0.0, -0.7067293982913497, 4917998.07473455, 0.0, 0.0, 1.0)",1024.0,1024,True,pixel,0,"(0.7067293982912914, 0.7067293982913497)",,
1,Retrolens/Otago/AllansBeach/Shorelines/AllansBeach_15OCT1942_mosaic.shp,Retrolens/Otago/AllansBeach/Stack/AllansBeach_15OCT1942_mosaic.jp2,100.0,RLN,6,True,True,True,JP2OpenJPEG,uint16,256.0,8899,5738,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.48367299928392365, 0.0, 1419198.3424617096, 0.0, -0.4836729992839708, 4917975.849639777, 0.0, 0.0, 1.0)",1024.0,1024,True,pixel,0,"(0.48367299928392365, 0.4836729992839708)",,
2,Retrolens/Otago/CoalPoint_SmithsBeach/Shorelines/CoalPoint_SmithsBeach_20FEB1975.shp,Retrolens/Otago/CoalPoint_SmithsBeach/Stack/CoalPoint_SmithsBeach_20FEB1975_mosaic.jp2,100.0,RL,11,True,True,True,JP2OpenJPEG,uint16,256.0,9522,8052,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.7111884325875153, 0.0, 1360345.9266482175, 0.0, -0.7111884325875041, 4871576.50750212, 0.0, 0.0, 1.0)",1024.0,1024,True,pixel,0,"(0.7111884325875153, 0.7111884325875041)",,
3,Retrolens/Otago/AllansBeach/Shorelines/AllansBeach_7MAR1975_mosaic.shp,Retrolens/Otago/AllansBeach/Stack/AllansBeach_7MAR1975_mosaic.jp2,100.0,RLN,5,True,True,True,JP2OpenJPEG,uint16,256.0,2882,1673,1,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(1.425043057151705, 0.0, 1419230.659988, 0.0, -1.425043057151544, 4917827.506705294, 0.0, 0.0, 1.0)",1024.0,1024,True,,0,"(1.425043057151705, 1.425043057151544)",,
4,Retrolens/Otago/AllansBeach/Shorelines/AllansBeach_17MAR2000_mosaic.shp,Retrolens/Otago/AllansBeach/Stack/AllansBeach_17MAR2000_mosaic.jp2,100.0,RLN,5,True,True,True,JP2OpenJPEG,uint16,256.0,3480,2009,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(1.4190181270697362, 0.0, 1419105.5339317448, 0.0, -1.419018127069604, 4918049.109434993, 0.0, 0.0, 1.0)",1024.0,1024,True,pixel,0,"(1.4190181270697362, 1.419018127069604)",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2049,Retrolens/HawkesBay/Porangahau/Shorelines/Porangahau_14OCT1980.shp,Retrolens/HawkesBay/Porangahau/Stack/Porangahau_14OCT1980_mosaic.jp2,100.0,RL,2,True,True,True,JP2OpenJPEG,uint16,256.0,11854,5503,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.7790386525610454, 0.0, 1913709.0950618898, 0.0, -0.7790386525610458, 5541837.029089412, 0.0, 0.0, 1.0)",1024.0,1024,True,pixel,0,"(0.7790386525610454, 0.7790386525610458)",,
2050,Retrolens/HawkesBay/Porangahau/Shorelines/Porangahau_25MAR1965.shp,Retrolens/HawkesBay/Porangahau/Stack/Porangahau_25MAR1965_mosaic.jp2,100.0,RL,1,True,True,True,JP2OpenJPEG,uint16,256.0,7846,2808,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.7217192742558709, 0.0, 1916718.3694209547, 0.0, -0.7217192742558981, 5541691.466820562, 0.0, 0.0, 1.0)",1024.0,1024,True,pixel,0,"(0.7217192742558709, 0.7217192742558981)",,
2051,Retrolens/HawkesBay/Porangahau/Shorelines/Porangahau_07SEP1944.shp,Retrolens/HawkesBay/Porangahau/Stack/Porangahau_07SEP1944_mosaic.jp2,100.0,RL,4,True,True,True,JP2OpenJPEG,uint16,,4740,10760,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.4940602487347576, 0.0, 1910913.6982235978, 0.0, -0.4940602487347826, 5536328.318330653, 0.0, 0.0, 1.0)",1024.0,1024,True,pixel,0,"(0.4940602487347576, 0.4940602487347826)",,
2052,Retrolens/HawkesBay/WhakakiLagoon/Shorelines/WhakakiLagoon_01NOV1942.shp,Retrolens/HawkesBay/WhakakiLagoon/Stack/WhakakiLagoon_01NOV1942_mosaic.jp2,100.0,RL,3,False,False,False,JP2OpenJPEG,uint16,,26336,2924,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.5103823395016565, 0.0, 1985414.3429311933, 0.0, -0.5103823395016591, 5668213.539088913, 0.0, 0.0, 1.0)",,2924,False,pixel,0,"(0.5103823395016565, 0.5103823395016591)",,


## Read LDS index tiles

In [15]:
index_tiles = filelist[filelist.str.contains(".+index-tiles.+.shp$")]
index_tiles

4515     Gabrielle/Imagery/post_storm/LINZ/HawkesBay/hawkes-bay-010m-cyclone-gabrielle-aerial-photos-index-tiles-Copy.shp
10077            Gabrielle/Imagery/post_storm/LINZ/BayofPlenty/bay-of-plenty-01m-urban-aerial-photos-index-tiles-2023.shp
11954         Gabrielle/Imagery/post_storm/LINZ/Gisborne/gisborne-02m-cyclone-gabrielle-aerial-photos-index-tiles-202.shp
13934         Gabrielle/Imagery/pre-storm/Waikato/TairuaPauanui/waikato-03m-rural-aerial-photos-index-tiles-2021-2023.shp
14053              Gabrielle/Imagery/pre-storm/Waikato/LINZtemp/waikato-03m-rural-aerial-photos-index-tiles-2021-2023.shp
                                                               ...                                                       
35717             SpatialData/Mosaics/Bay of Plenty/Footprints/bay-of-plenty-03m-rural-aerial-photos-index-tiles-2019.shp
35722        SpatialData/Mosaics/Bay of Plenty/Footprints/bay-of-plenty-01m-urban-aerial-photos-index-tiles-2018-2019.shp
43327                   

In [16]:
def read_index_tile(f):
    gdf = gpd.read_file(prefix+f).to_crs(2193)
    gdf["filename"] = f
    return gdf

index_tiles = pd.concat(process_map(read_index_tile, index_tiles))
len(index_tiles)

  0%|          | 0/79 [00:00<?, ?it/s]

147407

In [17]:
index_tiles.Captured = index_tiles.Captured.replace({"05, 06, 09, 10 Feb. 2014": "2014-02-05", "11 & 26 March 2014": "2014-03-11"})
index_tiles["parsed_date"] = pd.NA
for col in ['Date_Flown', 'Date_Suppl', 'DATE', 'DATE_DMY', 'FLOWN_DATE', 'FLY_DATE', 'ACQ_DATE', "FLYING_DAT", "FLOWN", "Captured"]:
    index_tiles["parsed_date"].update(pd.to_datetime(index_tiles[col].str.split(",|-|to").str[0], dayfirst=True, format="mixed", errors="ignore"))
index_tiles.parsed_date = index_tiles.parsed_date.astype("string")
index_tiles.parsed_date.value_counts(dropna=False)

  super().__setitem__(key, value)


parsed_date
2017                   23649
<NA>                   22679
2022                   11348
2023                    3607
24/02/2017              2685
                       ...  
9/11/2016                  1
2029-03-12 13:01:12        1
2005-05-12 12:05:22        1
2016-11-15 15:12:28        1
2028-11-15 16:03:27        1
Name: count, Length: 384, dtype: Int64

## LDS index tile matching

In [18]:
LDS = df[(df.Source == "LDS") & (df.match_score < 100)].sort_values("match_score")
LDS

Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er
30274,Gabrielle/Shorelines/BayofPlenty/Opotiki/BOPLINZ_Opotiki_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/BayofPlenty/Opotiki/Opotiki_28FEB2023.tif,57.894737,LDS,2,True,True,True
31160,Gabrielle/Shorelines/Gisborne/TeAraroa/EastCape_18DEC2021.shp,Gabrielle/Imagery/pre-storm/Auckland/Waiheke/Onetangi_21DEC2022.tif,58.823529,LDS,48,True,True,True
30441,Gabrielle/Shorelines/BayofPlenty/EasternBoP/EasternBoP_20DEC2021.shp,Gabrielle/Imagery/pre-storm/Waikato/Matarangi/Matarangi_24DEC2022.tif,59.459459,LDS,34,True,True,True
30213,Gabrielle/Shorelines/BayofPlenty/Waihi/BOPLINZ_Waihi_05APR2023.shp,Gabrielle/Imagery/post_storm/Region/Auckland/Omaha/PNEO/OmahaPakiri_04APR2023.tif,61.538462,LDS,6,True,True,True
44135,Retrolens/Southland/Riverton/Shorelines/Riverton_Mosaic_STH13R_05FEB2014.shp,MaxarImagery/HighFreq/Southland/Riverton/Stack/Riverton_13MAR2021.tif,62.307692,LDS,8,True,False,False
...,...,...,...,...,...,...,...,...
60306,Retrolens/Manawatu/CastleCliff/Shorelines/CastleCliff_10JUN1942_clifftop.shp,Retrolens/Manawatu/CastleCliff/Stack/CastleCliff_10JUN1942_mosaic.jp2,93.023256,LDS,1,True,False,False
77463,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Shorelines/Ryans_Pipikaretu_Penguin_TeRauoneBeach_01APR2006.shp,Retrolens/Otago/Ryans_Pipikaretu_Penguin_TeRauoneBeach/Stack/Ryans_Pipikaretu_Penguin_TeRauoneBeach_17MAR2000_mosaic.jp2,94.252874,LDS,8,True,False,False
39365,Retrolens/Southland/Tiwai_Point/Shorelines/TiwaiPoint_LDS_07FEB2016.shp,MaxarImagery/HighFreq/Southland/TiwaiPoint/Stack/TiwaiPoint_02FEB2016.tif,94.444444,LDS,1,True,False,False
47940,Retrolens/Taranaki/Oakura/Shorelines/Oakura_31OCT2016_beach.shp,MaxarImagery/HighFreq/Taranaki/Oakura/Stack/Oakura_03OCT2016.tif,96.551724,LDS,3,True,False,False


In [19]:
def get_resolution(filename):
  gdf = gpd.read_file(prefix+filename)
  if "LDS" not in gdf.Source.unique():
    return {"filename": filename}
  bounds = gdf.total_bounds
  intersecting_tiles = index_tiles[index_tiles.intersects(box(*bounds))]
  if filename.startswith("Gabrielle"):
    intersecting_tiles = intersecting_tiles[intersecting_tiles.filename.str.startswith("Gabrielle")]
  if len(intersecting_tiles) == 0:
    print(f"{filename} doesn't intersect any index tiles")
    return {"filename": filename}
  if "Date" not in gdf.columns:
    #print(f"{filename} missing Date column")
    date = gdf.DSASdate.unique()[0]
    DSASdate = gdf.DSASdate.unique()[0]
  elif "DSASDate" not in gdf.columns:
    #print(f"{filename} missing DSASDate column")
    date = gdf.Date.unique()[0]
    DSASdate = gdf.DSASdate.unique()[0]
  else:
    date = gdf.Date.unique()[0]
    DSASdate = gdf.DSASDate.unique()[0]
  if not DSASdate:
    DSASdate = date

  date_options = []
  date_to_col = {}
  for col in ['Date_Flown', 'Date_Suppl', 'DATE', 'DATE_DMY', 'FLOWN_DATE', 'FLY_DATE', 'ACQ_DATE', "FLYING_DAT", "FLOWN", "Captured"]:#, "parsed_date"]:
    options_for_col = intersecting_tiles[col].dropna().unique().tolist()
    date_options.extend(options_for_col)
    for date_option in options_for_col:
      date_to_col[date_option] = col
  display(date, DSASdate, gdf, intersecting_tiles.dropna(axis=1, how="all"), date_options, date_to_col)
  if not date_options:
    if "hawkes-bay-010m-cyclone-gabrielle-aerial-photos-index" in intersecting_tiles.filename.unique()[0]:
      return {
        "filename": filename,
        "matched_index_tiles": intersecting_tiles.filename.unique(),
        "date": date,
        "matched_date": "SPECIAL_OVERRIDE",
        "match_score": 100,
        "Pixel_ER": .1
      }
    else:
      print(f"No date options in {intersecting_tiles.filename.unique()}")
      return {"filename": filename}
  else:
    match = False
    if DSASdate in date_options:
      match = DSASdate
      score = 100
    for option in date_options:
      if DSASdate in option or date in option:
        match = option
        score = 100
    if not match:
      match, score, index = rapidfuzz.process.extractOne(query=date, choices=date_options, processor=lambda s: s.replace("-", ""))
    col_for_match = date_to_col[match]
    tiles_from_this_date = intersecting_tiles[intersecting_tiles[col_for_match] == match]
    GSDM = []
    for col in ['GSDM', 'ORTHO_GSD', 'Ortho_GSD', 'GSDm', 'GSD', 'GSD_M', 'GSD_CM', 'gsdM']:
      GSDM.extend(tiles_from_this_date[col].dropna().astype(str).str.strip("m").unique())
    if len(GSDM) == 0:
      tilefile = tiles_from_this_date.filename.unique()[0]
      if "-04m" in tilefile:
        GSDM = .4
      elif "-0075m" in tilefile:
        GSDM = .075
    elif len(GSDM) == 1:
      GSDM = GSDM[0]
    elif len(GSDM) > 1:
      #print(f"Ambiguous GSDM: {GSDM}")
      GSDM = GSDM[0]
    return {
      "filename": filename,
      "matched_index_tiles": tiles_from_this_date.filename.unique().tolist(),
      "date": date,
      "DSASDate": DSASdate,
      "matched_date": match,
      "matched_date_col": col_for_match,
      "match_score": score,
      "Pixel_ER": GSDM
    }

#get_resolution("Retrolens/Taranaki/PariokariwaPoint/Shorelines/PariokariwaPoint_19MAR2017_Taranaki399_cliff.shp")
LDS = pd.DataFrame(process_map(get_resolution, LDS.filename)).sort_values("match_score")
print("Perfect matches:", sum(LDS.match_score == 100))
print("Imperfect matches:", sum(LDS.match_score < 100))
LDS

  0%|          | 0/381 [00:00<?, ?it/s]

Perfect matches: 272
Imperfect matches: 109


Unnamed: 0,filename,matched_index_tiles,date,DSASDate,matched_date,matched_date_col,match_score,Pixel_ER
43,Retrolens/Canterbury/Motunau/Shorelines/Motunau_09JAN2015.shp,[SpatialData/Mosaics/Footprint shapefiles/Kaikoura/kaikoura-030m-rural-aerial-photos-index-tiles-2016-2017.shp],2015-01-09,09/01/2015,"20/12/2016,21/12/2016,15/02/2017,16/02/2017,21/02/2017,24/02/2017",FLOWN_DATE,30.000000,0.3
162,Retrolens/Canterbury/KaitoreteSpitWest/Shorelines/KaitoreteSpitWest_22APR2023.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2015-2016-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2015-2016.shp],2003-04-22,22/04/2003,151125 151228,DATE,33.750000,0.3
129,Retrolens/Canterbury/KaitoreteSpitEast/Shorelines/KaitoreteSpitEast_22APR2023.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2015-2016-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2015-2016.shp],2023-04-22,22/04/2023,151228 151229,DATE,38.095238,0.3
319,Retrolens/Canterbury/WainononLagoon_PareoaRiver/Shorelines/WainonoLagoon_PareoraRiver_02DEC2017.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2017-12-02,02/12/2017,02/12/17 to 08/02/18,FLOWN,42.750000,0.3
341,Retrolens/Canterbury/Washdyke/Shorelines/Washdyke_14FEB2017.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2017-02-28,28/02/2017,14/02/17 to 09/03/17,FLOWN,42.750000,0.3
...,...,...,...,...,...,...,...,...
133,Retrolens/Northland/BayleysBeach/Shorelines/BayleysBeach_06NOV2015.shp,[SpatialData/Mosaics/Footprint shapefiles/Northland Footrpints/northland-04m-rural-aerial-photos-index-tiles-2014-2016.shp],2015-11-06,06/11/2015,"06/11/2015, 07/11/2015",DATE_DMY,100.000000,0.4
132,Retrolens/Taranaki/Oeo/Shorelines/Oeo_31OCT2016.shp,[SpatialData/Mosaics/Footprint shapefiles/Taranaki Footprints/taranaki-03m-rural-aerial-photos-index-tiles-2016-2018.shp],2016-10-31,31/10/2016,31/10/2016,FLOWN,100.000000,0.3
131,Retrolens/Waikato/Taharoa/Shorelines/Taharoa_10MAR2018.shp,[SpatialData/Mosaics/Footprint shapefiles/Waikato Footprints/Waikato 2016-19 Footprints/waikato-03m-rural-aerial-photos-index-tiles-2016-2019.shp],2018-03-10,10/03/2018,10/03/2018,FLOWN,100.000000,0.3
127,Gabrielle/Shorelines/Hawkes Bay/Napier/Heretaunga_20FEB2023.shp,[Gabrielle/Imagery/post_storm/LINZ/HawkesBay/hawkes-bay-010m-cyclone-gabrielle-aerial-photos-index-tiles-Copy.shp],2023-02-20,,SPECIAL_OVERRIDE,,100.000000,0.1


In [20]:
with pd.option_context("display.max_rows", None):
  display(LDS[LDS.match_score < 100])

Unnamed: 0,filename,matched_index_tiles,date,DSASDate,matched_date,matched_date_col,match_score,Pixel_ER
43,Retrolens/Canterbury/Motunau/Shorelines/Motunau_09JAN2015.shp,[SpatialData/Mosaics/Footprint shapefiles/Kaikoura/kaikoura-030m-rural-aerial-photos-index-tiles-2016-2017.shp],2015-01-09,09/01/2015,"20/12/2016,21/12/2016,15/02/2017,16/02/2017,21/02/2017,24/02/2017",FLOWN_DATE,30.0,0.3
162,Retrolens/Canterbury/KaitoreteSpitWest/Shorelines/KaitoreteSpitWest_22APR2023.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2015-2016-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2015-2016.shp],2003-04-22,22/04/2003,151125 151228,DATE,33.75,0.3
129,Retrolens/Canterbury/KaitoreteSpitEast/Shorelines/KaitoreteSpitEast_22APR2023.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2015-2016-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2015-2016.shp],2023-04-22,22/04/2023,151228 151229,DATE,38.095238,0.3
319,Retrolens/Canterbury/WainononLagoon_PareoaRiver/Shorelines/WainonoLagoon_PareoraRiver_02DEC2017.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2017-12-02,02/12/2017,02/12/17 to 08/02/18,FLOWN,42.75,0.3
341,Retrolens/Canterbury/Washdyke/Shorelines/Washdyke_14FEB2017.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2017-02-28,28/02/2017,14/02/17 to 09/03/17,FLOWN,42.75,0.3
0,Gabrielle/Shorelines/BayofPlenty/Opotiki/BOPLINZ_Opotiki_05APR2023.shp,[Gabrielle/Imagery/post_storm/LINZ/BayofPlenty/bay-of-plenty-01m-urban-aerial-photos-index-tiles-2023.shp],05/04/2023,05/04/2023,2023-04-05,FLOWN,44.444444,0.1
249,Retrolens/HawkesBay/OceanBeach/Shorelines/OceanBeach_06MAR2019.shp,[SpatialData/Mosaics/Footprint shapefiles/Hawkes Bay Footprints/hawkes-bay-03m-rural-aerial-photos-index-tiles-2019-2020.shp],2019-03-06,6/03/2019,05/03/2019,FLOWN,44.444444,0.3
293,Retrolens/Otago/Warrington/Shorelines/Warrington_30MAR2006.shp,[SpatialData/Mosaics/Footprint shapefiles/Otago footprints/lds-dunedin-01m-urban-aerial-photos-index-tiles-2018-2019-SHP/dunedin-01m-urban-aerial-photos-index-tiles-2018-2019.shp],1899-12-30,1899-12-30,"07/04/2019,08/02/2019,06/11/2018,14/01/18",FLOWN,45.0,0.1
376,Retrolens/Manawatu/CastleCliff/Shorelines/CastleCliff_10JUN1942_clifftop.shp,"[SpatialData/Mosaics/Footprint shapefiles/Manawatu/manawatu-whanganui-03m-rural-aerial-photos-index-tiles-2021-.shp, SpatialData/Mosaics/Manawatu 2021/manawatu-whanganui-03m-rural-aerial-photos-in...",1942-06-10,10/06/1942,"17/03/2021, 21/02/2021",FLOWN,45.0,0.3
175,Retrolens/Taranaki/UrenuiRiver_North_AOI/Shorelines/UrenuiRiver_North_19MARC2017_Taranaki399_beach.shp,[SpatialData/Mosaics/Footprint shapefiles/Taranaki Footprints/taranaki-03m-rural-aerial-photos-index-tiles-2016-2018.shp],2017-03-19,19/03/2017,19/3/2017,FLOWN,47.058824,0.3


In [21]:
# Stop Excel interpreting dates
for col in ["date", "DSASDate", "matched_date"]:
    LDS[col] = "'" + LDS[col]
LDS.to_csv(prefix + "Nick/LDS_matches.csv", index=False)