In [55]:
%load_ext autotime
import geopandas as gpd # Geospatial data operations
import rasterio as rio # Geospatial imagery manipulation
import rasterio.plot
import pandas as pd # Tabular data
import os
import re
import rapidfuzz # Fuzzy string matching
from tqdm.auto import tqdm # Progress bars
from tqdm.contrib.concurrent import thread_map, process_map # Parallel operations
import matplotlib # Plots
import matplotlib.pyplot as plt
import shapely # Polygon operations
#import solaris.tile as tile # Tile splitting
#import solaris.data.coco as coco
import contextlib
import io
import rasterio # Raster imagery operations
from rasterio.vrt import WarpedVRT
from rasterio import transform
from rasterio.merge import merge # Merging tiles into mosaics
from glob import glob # Finding files
from shapely.geometry import box # Bounding box operations
matplotlib.rcParams['figure.figsize'] = (20, 10)
tqdm.pandas()
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 200)
import platform
if platform.system() == "Windows":
  prefix = "Z:/"
else:
  prefix = "ressci201900060-RNC2-Coastal/"

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime


## Match shapefiles to images

In [56]:
# Set this to False to rebuild filelist.txt and meta.csv
use_cache = False

In [57]:
# This cell will read a list of files from Nick/filelist.txt if it exists, or build a list of files and save it to that location
# This is useful for quickly loading the list of files without having to search the entire directory structure
# You will need to delete the file and rerun this cell if files are created, renamed or moved

filename = prefix + "Nick/filelist.txt"
if use_cache and os.path.isfile(filename):
    filelist = pd.read_csv(filename, header=None).iloc[:,0]
else:
    def find_files(root):
        return pd.Series(glob(prefix + root + "**/**", recursive=True)).str.replace(prefix, "")
    filelist = pd.concat(thread_map(find_files, ["Archive/Gabrielle", "MaxarImagery", "Retrolens", "SpatialData/Mosaics", "skyvuw"]))
    if platform.system() == "Windows":
        filelist = filelist.str.replace("\\", "/", regex=False)
    filelist.to_csv(filename, index=False, header=False)
filelist

  0%|          | 0/5 [00:00<?, ?it/s]

0                                                     Archive/Gabrielle/
1                                               Archive/Gabrielle/Orders
2                                          Archive/Gabrielle/Orders/AOIs
3        Archive/Gabrielle/Orders/AOIs/Pauanui_Tairua_07JAN2023WGS84.sbn
4             Archive/Gabrielle/Orders/AOIs/Pauanui_Tairua_07JAN2023.sbx
                                      ...                               
73368                               skyvuw/SN2975/SN2975_TILE_LAYOUT.dbf
73369                             skyvuw/SN2975/SN2975_PHOTO_CENTRES.dbf
73370                             skyvuw/SN2975/SN2975_PHOTO_CENTRES.shx
73371                               skyvuw/SN2975/SN2975_TILE_LAYOUT.prj
73372                               skyvuw/SN2975/SN2975_TILE_LAYOUT.shp
Length: 321000, dtype: object

In [58]:
def check_filename(filename):
    # This regex only matches shapefiles that contain something date-like in their names
    match = re.search(r'/Shorelines/.+\d{4}\w*.shp$', filename)
    return bool(match)

shapefiles = filelist[filelist.apply(check_filename)]
df = shapefiles.to_frame(name="filename")
df

Unnamed: 0,filename
29910,Archive/Gabrielle/Shorelines/Waikato/Matarangi and surrounds/Matarangi_18FEB2023.shp
29911,Archive/Gabrielle/Shorelines/Waikato/Matarangi and surrounds/Matarangi_24DEC2022.shp
29914,Archive/Gabrielle/Shorelines/Waikato/Matarangi and surrounds/NewChums_18FEB2023.shp
29915,Archive/Gabrielle/Shorelines/Waikato/Matarangi and surrounds/Whangapoua_24DEC2022.shp
29922,Archive/Gabrielle/Shorelines/Waikato/Matarangi and surrounds/Whangapoua_18FEB2023.shp
...,...
113077,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_02JAN1988.shp
113082,Retrolens/Wellington/KapitiSouth/Shorelines/KapitiSouth_06OCT1980.shp
114183,Retrolens/Wellington/PukeruaBay/Shorelines/PukeruaBay_22AUG1961.shp
114184,Retrolens/Wellington/PukeruaBay/Shorelines/PukeruaBay_13FEB2021.shp


In [59]:
image_extensions = (".jpg", ".jp2", ".tif", ".TIF", ".png")

In [60]:
images = filelist[filelist.str.contains("Stack", case=False) & filelist.str.endswith(image_extensions)]
images

1171      MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_08NOV2019_2.tif
1172      MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_08NOV2019_1.tif
1174        MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_12MAR2018.tif
1177        MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_31AUG2005.tif
1191        MaxarImagery/HighFreq/HawkesBay/Mahanga/Imagery/Stack/Mahanga_25DEC2015.tif
                                              ...                                      
114416            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_19NOV1972_mosaic.jp2
114422            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_04APR1986_mosaic.jp2
114425            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_10NOV1977_mosaic.tif
114426            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_22AUG1961_mosaic.tif
114429            Retrolens/Wellington/PukeruaBay/Stack/PukeruaBay_01AUG1942_mosaic.jp2
Length: 2664, dtype: object

In [61]:
Gabrielle_images = filelist[filelist.str.contains("Gabrielle") & filelist.str.endswith(image_extensions)]
Gabrielle_images

289                                                                                                                                                    Archive/Gabrielle/Pressure sensors/Emma's field notes.jpg
314                                                                                                                  Archive/Gabrielle/Imagery/skyfiworking/SkyFi_4737_Northland_New-Zealand_2023-03-03_2250.png
315                                                                                                                  Archive/Gabrielle/Imagery/skyfiworking/SkyFi_4794_Northland_New-Zealand_2023-03-25_2229.tif
319                                                                                                                    Archive/Gabrielle/Imagery/skyfiworking/SkyFi_4794_Waikato_New-Zealand_2023-08-25_2133.tif
320                                                                                                                    Archive/Gabrielle/Imagery/skyfiworking/SkyFi_

In [62]:
# When fuzzy matching, ignore these strings
# _0 will ignore leading zeros in dates
strings_to_delete = ["_mosaic", "_mosiac", "_mosaid", ".mosaic", "_cliff", "_beach", "_beachcliffsegment", "_MF.shp", "_MT.shp", "_0", "_1.tif", "_2.tif", "_3.tif", "_LDS", "_", " "]

def fuzz_preprocess(filename):
    for s in strings_to_delete:
        filename = filename.replace(s, "")
    # Case-insensitive
    filename = filename.lower()
    # Ignore extension
    filename = os.path.splitext(filename)[0]
    # Basename only
    filename = os.path.basename(filename)
    return filename

def get_matching_image(filename):
    if filename.startswith("Gabrielle"):
        match, score, index = rapidfuzz.process.extractOne(query=filename, choices=Gabrielle_images, processor=fuzz_preprocess)
        return match, score
    else:
        match, score, index = rapidfuzz.process.extractOne(query=filename, choices=images, processor=fuzz_preprocess)
        return match, score

df["matched_image"], df["match_score"] = zip(*process_map(get_matching_image, df.filename))
print("Perfect matches:", sum(df.match_score == 100))
print("Imperfect matches:", sum(df.match_score < 100))

  df["matched_image"], df["match_score"] = zip(*process_map(get_matching_image, df.filename))


  0%|          | 0/3002 [00:00<?, ?it/s]

Perfect matches: 1919
Imperfect matches: 1083


In [63]:
df = df.sort_values("match_score")
df

Unnamed: 0,filename,matched_image,match_score
30915,Archive/Gabrielle/Shorelines/Northland/DoubtlessBay/DoubtlessBay_30AUG2022.shp,MaxarImagery/HighFreq/Tasman/Motueka/Imagery/Stack/Motueka_10AUG2003.tif,54.054054
30990,Archive/Gabrielle/Shorelines/Northland/Ruakaka/Ruakaka_27DEC2022.shp,MaxarImagery/HighFreq/WestCoast/Punakaiki/Stack/Punakaiki_12FEB2023.tif,58.823529
30828,Archive/Gabrielle/Shorelines/Northland/Great ExhibitionBay/Rarawa_11JULY2022.shp,MaxarImagery/HighFreq/Auckland/MuriwaiSouth/Stack/Muriwai_15JULY2017.tif,60.606061
33787,Retrolens/Waikato/Whitianga/Shorelines/Whitianga_18FEB2023.shp,MaxarImagery/HighFreq/Tasman/WainuiBay/Imagery/Stack/WainuiBay_11FEB2021.tif,61.111111
29974,Archive/Gabrielle/Shorelines/Waikato/Whitianga/Whitianga_18FEB2023.shp,MaxarImagery/HighFreq/Tasman/WainuiBay/Imagery/Stack/WainuiBay_11FEB2021.tif,61.111111
...,...,...,...
62253,Retrolens/Manawatu/WhangaehuRiver/Shorelines/WhangaehuRiver_30AUG1942.shp,Retrolens/Manawatu/WhangaehuRiver/Stack/WhangaehuRiver_30AUG1942_mosaic.jp2,100.000000
62250,Retrolens/Manawatu/WhangaehuRiver/Shorelines/WhangaehuRiver_4FEB1982.shp,Retrolens/Manawatu/WhangaehuRiver/Stack/WhangaehuRiver_4FEB1982_mosaic.jp2,100.000000
31843,MaxarImagery/HighFreq/Otago/Aramoana/Shorelines/Aramoana_29JUNE2004.shp,MaxarImagery/HighFreq/Otago/Aramoana/Imagery/Stack/Aramoana_29JUNE2004.tif,100.000000
63119,Retrolens/Tasman/MoutereSpit/Shorelines/MoutereSpit_19MAY1958.shp,Retrolens/Tasman/MoutereSpit/Stack/MoutereSpit_19MAY1958_mosaic.jp2,100.000000


In [64]:
def get_shapefile_meta(tup):
    i, row = tup
    filename = row.filename
    source = "Unknown"
    try:
        shapefile = gpd.read_file(prefix+filename)
    except:
        print(f"Can't read {filename}")
        return {"Source": source, "n_lines": 0}
    if "Source" not in shapefile.columns:
        if filename.startswith("Retrolens"):
            source = "RL"
        elif filename.startswith("MaxarImagery/HighFreq"):
            source = "MAX"
    else:
        sources = shapefile.Source.unique()
        if len(sources) == 0 or not sources[0]:
            #print(f"{filename} has no sources")
            if filename.startswith("MaxarImagery/HighFreq"):
                source = "MAX"
        elif len(sources) == 1:
            source = sources[0]
        elif len(sources) > 1:
            print(f"{filename} has ambiguous sources: {sources}")
            source = sources[0]
    n_lines = len(shapefile.dropna(subset="geometry").explode(index_parts=False))
    CPS = ""
    if "CPS" not in shapefile.columns:
        CPS = "No CPS column"
    else:
        CPS = shapefile.CPS.values
        if len(CPS) == 0:
            CPS = "No CPS values"
        elif 9 in CPS:
            CPS = "Has CPS=9"
        elif 0 in CPS:
            CPS = "Has CPS=0"
        elif None in CPS:
            CPS = "Has None CPS"
        elif pd.isna(CPS).any():
            CPS = "Has NA CPS"
        else:
            CPS = "OK"
    return {
        "Source": source,
        "n_lines": n_lines,
        "CPS": CPS,
        "Photoscale": "Photoscale" in shapefile.columns,
        "Pixel_Er": "Pixel_Er" in shapefile.columns
    }

shp_meta = pd.DataFrame(process_map(get_shapefile_meta, df.iterrows(), total=len(df)), index=df.index)
df = pd.concat([df, shp_meta], axis=1)
df

  0%|          | 0/3002 [00:00<?, ?it/s]

Retrolens/Waikato/OpitoBay/Shorelines/Opito_OtamaBay_14FEB2023.shp has ambiguous sources: ['PLE' 'MAX']
Retrolens/HawkesBay/Awatoto/Shorelines/Awatoto_06MAR2019.shp has ambiguous sources: ['LDS' None]
Retrolens/Canterbury/Motunau/Shorelines/Motunau_09JAN2015.shp has ambiguous sources: ['LDS' None]
Retrolens/Waikato/Whangamata/Shorelines/Whangamata_14FEB2023.shp has ambiguous sources: ['MAX' 'PLE']
Retrolens/Taranaki/OpunakeBeach/Shorelines/OpunakeBeach_11FEB2017.shp has ambiguous sources: ['LDS' None]
Retrolens/Otago/Moeraki_HampdenBeach/Shorelines/Moeraki_HampdenBeach_24FEB2014.shp has ambiguous sources: ['LDS' None]
Can't read Retrolens/WestCoast/Sandy_MaoriBeach/Shorelines/Sandy_MaoriBeach_27FEB2017.shp
MaxarImagery/HighFreq/Canterbury/PareoraRiver_Timaru/Shorelines/PareoraRiver_Timaru_30JAN2021.shp has ambiguous sources: ['MAX' None]
MaxarImagery/HighFreq/Canterbury/PareoraRiver_Timaru/Shorelines/PareoraRiver_Timaru_16MAR2012.shp has ambiguous sources: ['MAX' None]
MaxarImagery/Hig

Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er
30915,Archive/Gabrielle/Shorelines/Northland/DoubtlessBay/DoubtlessBay_30AUG2022.shp,MaxarImagery/HighFreq/Tasman/Motueka/Imagery/Stack/Motueka_10AUG2003.tif,54.054054,MAX,6,OK,True,True
30990,Archive/Gabrielle/Shorelines/Northland/Ruakaka/Ruakaka_27DEC2022.shp,MaxarImagery/HighFreq/WestCoast/Punakaiki/Stack/Punakaiki_12FEB2023.tif,58.823529,MAX,28,OK,True,True
30828,Archive/Gabrielle/Shorelines/Northland/Great ExhibitionBay/Rarawa_11JULY2022.shp,MaxarImagery/HighFreq/Auckland/MuriwaiSouth/Stack/Muriwai_15JULY2017.tif,60.606061,MAX,24,OK,True,True
33787,Retrolens/Waikato/Whitianga/Shorelines/Whitianga_18FEB2023.shp,MaxarImagery/HighFreq/Tasman/WainuiBay/Imagery/Stack/WainuiBay_11FEB2021.tif,61.111111,NEO,3,OK,True,True
29974,Archive/Gabrielle/Shorelines/Waikato/Whitianga/Whitianga_18FEB2023.shp,MaxarImagery/HighFreq/Tasman/WainuiBay/Imagery/Stack/WainuiBay_11FEB2021.tif,61.111111,NEO,3,OK,True,True
...,...,...,...,...,...,...,...,...
62253,Retrolens/Manawatu/WhangaehuRiver/Shorelines/WhangaehuRiver_30AUG1942.shp,Retrolens/Manawatu/WhangaehuRiver/Stack/WhangaehuRiver_30AUG1942_mosaic.jp2,100.000000,RL,2,OK,True,True
62250,Retrolens/Manawatu/WhangaehuRiver/Shorelines/WhangaehuRiver_4FEB1982.shp,Retrolens/Manawatu/WhangaehuRiver/Stack/WhangaehuRiver_4FEB1982_mosaic.jp2,100.000000,RL,2,OK,True,True
31843,MaxarImagery/HighFreq/Otago/Aramoana/Shorelines/Aramoana_29JUNE2004.shp,MaxarImagery/HighFreq/Otago/Aramoana/Imagery/Stack/Aramoana_29JUNE2004.tif,100.000000,Max,8,OK,True,True
63119,Retrolens/Tasman/MoutereSpit/Shorelines/MoutereSpit_19MAY1958.shp,Retrolens/Tasman/MoutereSpit/Stack/MoutereSpit_19MAY1958_mosaic.jp2,100.000000,RL,3,OK,True,True


In [65]:
df.CPS.value_counts()

CPS
OK               2922
No CPS column      55
Has CPS=0          13
No CPS values      11
Name: count, dtype: int64

In [66]:
df[(df.CPS!="OK") & (df.n_lines > 0)][["filename", "Source", "n_lines", "CPS"]].sort_values(by=["CPS", "filename"]).to_csv(prefix+"Nick/CPS_issues.csv", index=False)

In [67]:
df.sort_values(by="match_score")

Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er
30915,Archive/Gabrielle/Shorelines/Northland/DoubtlessBay/DoubtlessBay_30AUG2022.shp,MaxarImagery/HighFreq/Tasman/Motueka/Imagery/Stack/Motueka_10AUG2003.tif,54.054054,MAX,6,OK,True,True
30990,Archive/Gabrielle/Shorelines/Northland/Ruakaka/Ruakaka_27DEC2022.shp,MaxarImagery/HighFreq/WestCoast/Punakaiki/Stack/Punakaiki_12FEB2023.tif,58.823529,MAX,28,OK,True,True
30828,Archive/Gabrielle/Shorelines/Northland/Great ExhibitionBay/Rarawa_11JULY2022.shp,MaxarImagery/HighFreq/Auckland/MuriwaiSouth/Stack/Muriwai_15JULY2017.tif,60.606061,MAX,24,OK,True,True
33787,Retrolens/Waikato/Whitianga/Shorelines/Whitianga_18FEB2023.shp,MaxarImagery/HighFreq/Tasman/WainuiBay/Imagery/Stack/WainuiBay_11FEB2021.tif,61.111111,NEO,3,OK,True,True
29974,Archive/Gabrielle/Shorelines/Waikato/Whitianga/Whitianga_18FEB2023.shp,MaxarImagery/HighFreq/Tasman/WainuiBay/Imagery/Stack/WainuiBay_11FEB2021.tif,61.111111,NEO,3,OK,True,True
...,...,...,...,...,...,...,...,...
111891,Retrolens/Wellington/Otaki/Shorelines/Otaki_15APR1966.shp,Retrolens/Wellington/Otaki/Stack/Otaki_15APR1966_mosaic.jp2,100.000000,RL,9,OK,True,True
111902,Retrolens/Wellington/Otaki/Shorelines/Otaki_04OCT1939.shp,Retrolens/Wellington/Otaki/Stack/Otaki_04OCT1939_mosaic.tif,100.000000,RL,4,OK,True,True
111908,Retrolens/Wellington/Otaki/Shorelines/Otaki_06OCT1980.shp,Retrolens/Wellington/Otaki/Stack/Otaki_06OCT1980_mosaic.jp2,100.000000,RL,12,OK,True,True
40992,MaxarImagery/HighFreq/WestCoast/Ohinemaka/Shorelines/Ohinemaka_30OCT2016.shp,MaxarImagery/HighFreq/WestCoast/Ohinemaka/Imagery/Stack/Ohinemaka_30OCT2016.tif,100.000000,MAX,0,No CPS column,False,False


In [68]:
failures = df[(df.match_score < 100) & (df.Source != "LDS") & (df.n_lines > 0)]
try:
  failures.to_csv(prefix+"Nick/failures.csv", index=False)
except:
  print("Can't write Nick/failures.csv")
failures

Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er
30915,Archive/Gabrielle/Shorelines/Northland/DoubtlessBay/DoubtlessBay_30AUG2022.shp,MaxarImagery/HighFreq/Tasman/Motueka/Imagery/Stack/Motueka_10AUG2003.tif,54.054054,MAX,6,OK,True,True
30990,Archive/Gabrielle/Shorelines/Northland/Ruakaka/Ruakaka_27DEC2022.shp,MaxarImagery/HighFreq/WestCoast/Punakaiki/Stack/Punakaiki_12FEB2023.tif,58.823529,MAX,28,OK,True,True
30828,Archive/Gabrielle/Shorelines/Northland/Great ExhibitionBay/Rarawa_11JULY2022.shp,MaxarImagery/HighFreq/Auckland/MuriwaiSouth/Stack/Muriwai_15JULY2017.tif,60.606061,MAX,24,OK,True,True
33787,Retrolens/Waikato/Whitianga/Shorelines/Whitianga_18FEB2023.shp,MaxarImagery/HighFreq/Tasman/WainuiBay/Imagery/Stack/WainuiBay_11FEB2021.tif,61.111111,NEO,3,OK,True,True
29974,Archive/Gabrielle/Shorelines/Waikato/Whitianga/Whitianga_18FEB2023.shp,MaxarImagery/HighFreq/Tasman/WainuiBay/Imagery/Stack/WainuiBay_11FEB2021.tif,61.111111,NEO,3,OK,True,True
...,...,...,...,...,...,...,...,...
91194,Retrolens/Northland/Mangawhai/Shorelines/Mangawhai_12MAR1995_CF.shp,Retrolens/Northland/Mangawhai/Stack/Mangawhai_12MAR1995_mosaic.jp2,94.736842,RL,6,OK,False,False
91210,Retrolens/Northland/Mangawhai/Shorelines/Mangawhai_20MAY1963_CF.shp,Retrolens/Northland/Mangawhai/Stack/Mangawhai_20MAY1963_mosaic.jp2,94.736842,RL,4,OK,False,False
91181,Retrolens/Northland/Mangawhai/Shorelines/Mangawhai_10JAN1983_CF.shp,Retrolens/Northland/Mangawhai/Stack/Mangawhai_10JAN1983_mosaic.jp2,94.736842,RL,2,OK,False,False
35641,MaxarImagery/HighFreq/Northland/Mimiwhangata/Shorelines/Mimiwhangata_04JUN2008.shp,MaxarImagery/HighFreq/Northland/Mimiwhangata/Imagery/Stack/Mimiwhangata_04JUNE2008.tif,97.560976,MAX,7,OK,True,True


In [69]:
failures[(failures.Source == "RL") & ~failures.filename.str.contains("Waikato")]

Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er
61705,Retrolens/Manawatu/SantoftForest/Shorelines/SantoftForest_12MAR2016.shp,Retrolens/Manawatu/SantoftForest/STACKS/SantoftForest_23May1949_mosaic.jp2,77.272727,RL,5,OK,True,True
100335,Retrolens/Gisborne/Tolaga_KaiauaBay/Shorelines/Tolaga_KaiauaBay_14MAR1943.shp,Retrolens/Gisborne/Tolaga_KaiauaBay/Stack/Tolaga_KaiauaBay_04MAR1979_mosaic.jp2,89.361702,RL,2,OK,True,True
91225,Retrolens/Northland/Mangawhai/Shorelines/Mangawhai_06JAN1978_CF.shp,Retrolens/Northland/Mangawhai/Stack/Unclipped/Mangawhai_06JAN1978_mosaic.jp2,94.444444,RL,1,OK,False,False
91194,Retrolens/Northland/Mangawhai/Shorelines/Mangawhai_12MAR1995_CF.shp,Retrolens/Northland/Mangawhai/Stack/Mangawhai_12MAR1995_mosaic.jp2,94.736842,RL,6,OK,False,False
91210,Retrolens/Northland/Mangawhai/Shorelines/Mangawhai_20MAY1963_CF.shp,Retrolens/Northland/Mangawhai/Stack/Mangawhai_20MAY1963_mosaic.jp2,94.736842,RL,4,OK,False,False
91181,Retrolens/Northland/Mangawhai/Shorelines/Mangawhai_10JAN1983_CF.shp,Retrolens/Northland/Mangawhai/Stack/Mangawhai_10JAN1983_mosaic.jp2,94.736842,RL,2,OK,False,False
81776,Retrolens/Otago/Moeraki_HampdenBeach/Shorelines/Moeraki_HampdenBeach_8MAR1982.shp,Retrolens/Otago/Moeraki_HampdenBeach/Stack/Moeraki_HampdenBeah_8MAR1982_mosaic.jp2,98.113208,RL,20,No CPS column,False,False


In [84]:
# https://www.waikatocoastaldatabase.org.nz/CoastalDataSet/Details/58
WRAPS = df.filename[df.filename.str.contains("01JAN2002")].to_frame()
WRAPS["Pixel_ER"] = .5
WRAPS["Photoscale"] = 40000
WRAPS.to_csv(prefix+"Nick/WRAPS.csv", index=False)
WRAPS

Unnamed: 0,filename,Pixel_ER,Photoscale
34698,Retrolens/Waikato/CrayfishPoint/Shorelines/CrayfishPoint_01JAN2002.shp,0.5,40000
33762,Retrolens/Waikato/Whitianga/Shorelines/Whitianga_01JAN2002.shp,0.5,40000
35501,Retrolens/Waikato/Miranda/Shorelines/Miranda_01JAN2002.shp,0.5,40000
35043,Retrolens/Waikato/TekahaPoint/Shorelines/TeKahaPoint_01JAN2002.shp,0.5,40000
33396,Retrolens/Waikato/RuapukeBeach/Shorelines/RuapukeBeach_01JAN2002.shp,0.5,40000
37436,Retrolens/Waikato/AwakinoNorth/Shorelines/AwakinoNorth_01JAN2002.shp,0.5,40000
36869,Retrolens/Waikato/Aotea/Shorelines/Aotea_01JAN2002.shp,0.5,40000
34798,Retrolens/Waikato/NukuhakariBay/Shorelines/NukuhakariBay_01JAN2002.shp,0.5,40000
35381,Retrolens/Waikato/Awakino/Shorelines/Awakino_01JAN2002.shp,0.5,40000
32092,Retrolens/Waikato/PotahiPoint/Shorelines/PotahiPoint_01JAN2002.shp,0.5,40000


In [87]:
RL_Waikato = failures[failures.filename.str.contains("Waikato") & (failures.Source == "RL") & ~failures.filename.isin(WRAPS.filename)]
RL_Waikato

Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er
34587,Retrolens/Waikato/PortJackson/Shorelines/PortJackson_23AUG1971.shp,Retrolens/Northland/TaupiriBay/Stack/TaupiriBay_23AUG1971.tif,61.538462,RL,2,OK,False,False
37987,Retrolens/Waikato/Thames/Shorelines/Thames_25JAN1973.shp,Retrolens/Auckland/PakiriBeach_North/Stack/PakiriBeach_North_15JAN1977_mosaic.jp2,62.307692,RL,5,OK,False,False
37116,Retrolens/Waikato/Hahei_CooksBeach/Shorelines/Hahei_CooksBeach_22MAY1944.shp,Retrolens/Manawatu/Akitio/Stack/Akitio_25MAY1944_mosaic.jp2,62.608696,RL,9,OK,False,False
34607,Retrolens/Waikato/PortJackson/Shorelines/PortJackson_15OCT1966.shp,Retrolens/Taranaki/Oeo/Stack/Oeo_11OCT1967_mosaic.jp2,62.608696,RL,4,OK,False,False
36181,Retrolens/Waikato/KennedyBay/Shorelines/KennedyBay_23AUG1971.shp,Retrolens/Northland/TaupiriBay/Stack/TaupiriBay_23AUG1971.tif,63.157895,RL,4,OK,False,False
...,...,...,...,...,...,...,...,...
34193,Retrolens/Waikato/TeMataBay/Shorelines/TeMataBay_09FEB1971.shp,Retrolens/Northland/MataiBay/Stack/MataiBay_11FEB1971.tif,82.352941,RL,3,OK,False,False
36820,Retrolens/Waikato/Aotea/Shorelines/Aotea_12SEP1961.shp,Retrolens/Auckland/Orewa/Stack/Orewa_12SEP1961.jp2,85.714286,RL,1,OK,False,False
36013,Retrolens/Waikato/Pauanui_Tairua/Shorelines/Pauanui_Tairua_09FEB1971.shp,MaxarImagery/HighFreq/Waikato/Pauanui_Tairua/Imagery/Stack/Pauanui_Tairua_09FEB2019.tif,90.476190,RL,4,OK,False,False
34429,Retrolens/Waikato/PortWaikato/Shorelines/PortWaikato_29MAR2012.shp,MaxarImagery/HighFreq/Waikato/PortWaikato/Imagery/Stack/PortWaikato_09MAR2016.tif,92.307692,RL,1,OK,False,False


In [88]:
def get_survey_number(filename, DSASDate, year):
    # Find CSV for AOI
    bits = filename.split("/")
    terminator = min(
        bits.index("Stack") if "Stack" in bits else 1024,
        bits.index("Shorelines") if "Shorelines" in bits else 1024,
    )
    bits = bits[:terminator]
    csv_path_pattern = f"{prefix}{'/'.join(bits)}/*.csv"
    csv_candidates = glob(csv_path_pattern)
    if len(csv_candidates) == 0:
        raise ValueError(f"No CSV found for {csv_path_pattern}")
    elif len(csv_candidates) > 1:
        print(f"Ambiguous CSVs: {csv_candidates}")
        csv_candidates = [csv_candidates[0]]
    assert len(csv_candidates) == 1
    csv_filename = csv_candidates[0]
    try:
        csv = pd.read_csv(csv_filename, encoding="cp1252")
    except UnicodeDecodeError:
        # Excel file saved with .csv extension
        print(f"{csv_filename} is actually an Excel file")
        csv = pd.read_excel(csv_filename)
        csv.Date = csv.Date.astype(str)

    # Fuzzy string match dates. Lots of typos.
    matched_date, score, index = rapidfuzz.process.extractOne(query=DSASDate, choices=csv.Date.unique())
    if score < 80:
        # Terrible match score, just use year
        matched_date, score, index = rapidfuzz.process.extractOne(query=year, choices=csv.Date.unique())
        print(f"Matching {year} to {matched_date} with score {score} for {csv_filename}")
    elif DSASDate != matched_date:
        print(f"Matching {DSASDate} to {matched_date} with score {score} for {csv_filename}")
    if "RMSE" in csv.columns:
        filtered_csv = csv[(csv.Date == matched_date) & ~csv.RMSE.isna()]
    else:
        filtered_csv = csv[(csv.Date == matched_date)]
    surveys = filtered_csv.Survey.unique()
    if len(surveys) == 0:
        print(f"No surveys found for {csv_filename} for date {DSASDate}, removing RMSE filter")
        filtered_csv = csv[(csv.Date.str.contains(matched_date))]
        surveys = filtered_csv.Survey.unique()
        print(f"Survey now: {surveys}")
        if len(surveys) == 0:
            print(f"Still no surveys found for {csv_filename} for date {matched_date}, reducing date filter just to year from filename {year}")
            filtered_csv = csv[(csv.Date.str.contains(year))]
            surveys = filtered_csv.Survey.unique()
            print(f"Survey now: {surveys}")
    if len(surveys) > 1:
        #print(f"Ambiguous surveys found for {csv_candidates[0]} for date {DSASDate}: {surveys}")
        pass
    if len(surveys) == 0:
        print(f"Can't find a survey for {filename}")
    return surveys

def process_shapefile(filename):
    year = re.search(r'(\d{4})', filename).group(1)
    shapefile = gpd.read_file(prefix + filename)
    date = shapefile.Date.unique()[0]
    DSASDate = pd.to_datetime(date).strftime("%d/%m/%Y")
    survey_numbers = get_survey_number(filename, DSASDate, year)
    #print(filename, survey_numbers)
    for survey_number in survey_numbers:
        try:
            survey_tiles = gpd.read_file(f"{prefix}/skyvuw/SN{survey_number}/SN{survey_number}_TILE_LAYOUT.shp")
            GSD = float(survey_tiles.GSD.dropna().astype(str).str.strip("m").unique()[0])
            #print(f"{survey_number}={GSD}")
            return GSD
        except Exception as e:
            print(f"{survey_number} {e}")
    return None

RL_Waikato["Pixel_ER"] = RL_Waikato.filename.progress_apply(process_shapefile)
RL_Waikato

  0%|          | 0/194 [00:00<?, ?it/s]

Matching 2012 to 21/02/1969 with score 60.00000000000001 for ressci201900060-RNC2-Coastal/Retrolens/Waikato/CrayfishPoint/CrayfishPoint.csv
Matching 2012 to 4/01/2001 with score 67.5 for ressci201900060-RNC2-Coastal/Retrolens/Waikato/UrawhitikiPoint/UrawhitikiPoint.csv
Matching 01/05/1974 to 1/05/1974 with score 94.73684210526316 for ressci201900060-RNC2-Coastal/Retrolens/Waikato/CrayfishPoint/CrayfishPoint.csv
253 'GeoDataFrame' object has no attribute 'GSD'
Matching 09/02/1971 to 9/02/1971 with score 94.73684210526316 for ressci201900060-RNC2-Coastal/Retrolens/Waikato/TePuru/TePuru.csv
Matching 09/02/1971 to 9/02/1971 with score 94.73684210526316 for ressci201900060-RNC2-Coastal/Retrolens/Waikato/Whitianga/Whitianga.csv
5129 ressci201900060-RNC2-Coastal//skyvuw/SN5129/SN5129_TILE_LAYOUT.shp: No such file or directory
Matching 2013 to 22/01/1942 with score 67.5 for ressci201900060-RNC2-Coastal/Retrolens/Waikato/KaawaStream_North/KaawaStream_North.csv
Matching 2012 to 12/05/1967 with s

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RL_Waikato["Pixel_ER"] = RL_Waikato.filename.progress_apply(process_shapefile)


Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er,Pixel_ER
34587,Retrolens/Waikato/PortJackson/Shorelines/PortJackson_23AUG1971.shp,Retrolens/Northland/TaupiriBay/Stack/TaupiriBay_23AUG1971.tif,61.538462,RL,2,OK,False,False,0.4
37987,Retrolens/Waikato/Thames/Shorelines/Thames_25JAN1973.shp,Retrolens/Auckland/PakiriBeach_North/Stack/PakiriBeach_North_15JAN1977_mosaic.jp2,62.307692,RL,5,OK,False,False,0.4
37116,Retrolens/Waikato/Hahei_CooksBeach/Shorelines/Hahei_CooksBeach_22MAY1944.shp,Retrolens/Manawatu/Akitio/Stack/Akitio_25MAY1944_mosaic.jp2,62.608696,RL,9,OK,False,False,0.4
34607,Retrolens/Waikato/PortJackson/Shorelines/PortJackson_15OCT1966.shp,Retrolens/Taranaki/Oeo/Stack/Oeo_11OCT1967_mosaic.jp2,62.608696,RL,4,OK,False,False,1.0
36181,Retrolens/Waikato/KennedyBay/Shorelines/KennedyBay_23AUG1971.shp,Retrolens/Northland/TaupiriBay/Stack/TaupiriBay_23AUG1971.tif,63.157895,RL,4,OK,False,False,0.4
...,...,...,...,...,...,...,...,...,...
34193,Retrolens/Waikato/TeMataBay/Shorelines/TeMataBay_09FEB1971.shp,Retrolens/Northland/MataiBay/Stack/MataiBay_11FEB1971.tif,82.352941,RL,3,OK,False,False,0.4
36820,Retrolens/Waikato/Aotea/Shorelines/Aotea_12SEP1961.shp,Retrolens/Auckland/Orewa/Stack/Orewa_12SEP1961.jp2,85.714286,RL,1,OK,False,False,0.6
36013,Retrolens/Waikato/Pauanui_Tairua/Shorelines/Pauanui_Tairua_09FEB1971.shp,MaxarImagery/HighFreq/Waikato/Pauanui_Tairua/Imagery/Stack/Pauanui_Tairua_09FEB2019.tif,90.476190,RL,4,OK,False,False,0.4
34429,Retrolens/Waikato/PortWaikato/Shorelines/PortWaikato_29MAR2012.shp,MaxarImagery/HighFreq/Waikato/PortWaikato/Imagery/Stack/PortWaikato_09MAR2016.tif,92.307692,RL,1,OK,False,False,


In [91]:
RL_Waikato.Pixel_ER[RL_Waikato.Pixel_ER.isna()] = RL_Waikato.Pixel_ER.mean()
RL_Waikato

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  RL_Waikato.Pixel_ER[RL_Waikato.Pixel_ER.isna()] = RL_Waikato.Pixel_ER.mean()


Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er,Pixel_ER
34587,Retrolens/Waikato/PortJackson/Shorelines/PortJackson_23AUG1971.shp,Retrolens/Northland/TaupiriBay/Stack/TaupiriBay_23AUG1971.tif,61.538462,RL,2,OK,False,False,0.400000
37987,Retrolens/Waikato/Thames/Shorelines/Thames_25JAN1973.shp,Retrolens/Auckland/PakiriBeach_North/Stack/PakiriBeach_North_15JAN1977_mosaic.jp2,62.307692,RL,5,OK,False,False,0.400000
37116,Retrolens/Waikato/Hahei_CooksBeach/Shorelines/Hahei_CooksBeach_22MAY1944.shp,Retrolens/Manawatu/Akitio/Stack/Akitio_25MAY1944_mosaic.jp2,62.608696,RL,9,OK,False,False,0.400000
34607,Retrolens/Waikato/PortJackson/Shorelines/PortJackson_15OCT1966.shp,Retrolens/Taranaki/Oeo/Stack/Oeo_11OCT1967_mosaic.jp2,62.608696,RL,4,OK,False,False,1.000000
36181,Retrolens/Waikato/KennedyBay/Shorelines/KennedyBay_23AUG1971.shp,Retrolens/Northland/TaupiriBay/Stack/TaupiriBay_23AUG1971.tif,63.157895,RL,4,OK,False,False,0.400000
...,...,...,...,...,...,...,...,...,...
34193,Retrolens/Waikato/TeMataBay/Shorelines/TeMataBay_09FEB1971.shp,Retrolens/Northland/MataiBay/Stack/MataiBay_11FEB1971.tif,82.352941,RL,3,OK,False,False,0.400000
36820,Retrolens/Waikato/Aotea/Shorelines/Aotea_12SEP1961.shp,Retrolens/Auckland/Orewa/Stack/Orewa_12SEP1961.jp2,85.714286,RL,1,OK,False,False,0.600000
36013,Retrolens/Waikato/Pauanui_Tairua/Shorelines/Pauanui_Tairua_09FEB1971.shp,MaxarImagery/HighFreq/Waikato/Pauanui_Tairua/Imagery/Stack/Pauanui_Tairua_09FEB2019.tif,90.476190,RL,4,OK,False,False,0.400000
34429,Retrolens/Waikato/PortWaikato/Shorelines/PortWaikato_29MAR2012.shp,MaxarImagery/HighFreq/Waikato/PortWaikato/Imagery/Stack/PortWaikato_09MAR2016.tif,92.307692,RL,1,OK,False,False,0.539617


In [92]:
RL_Waikato.to_csv(prefix+"Nick/RL_Waikato.csv", index=False)

In [74]:
df.to_csv(prefix+"Nick/shoreline_image_matching.csv", index=False)

### For those perfect matches, get the metadata from the corresponding matched image

In [75]:
# Investigate metadata about the matched images

def get_meta(tup):
    i, row = tup
    try:
        image = rio.open(prefix + row.matched_image)
    except:
        print(f"Can't read {row.matched_image}")
        return {}
    try:
        gdf = gpd.read_file(prefix + row.filename)
        row = row.to_dict()
        row["n_lines"] = len(gdf.dropna(subset="geometry").explode(index_parts=False))
    except: 
        print(f"Can't read {row['filename']}")
        return {}
    
    row.update(image.profile)
    row["GCPs"] = len(image.gcps[0])
    row["res"] = image.res
    row["CPS"] = "CPS" in gdf.columns
    return row

metafile = "meta.csv"
if use_cache and os.path.isfile(metafile):
    meta = pd.read_csv(metafile)
else:
    meta = pd.DataFrame(process_map(get_meta, df[df.match_score == 100].iterrows(), total=sum(df.match_score == 100)))
    meta.to_csv(metafile, index=False)
meta

  0%|          | 0/1919 [00:00<?, ?it/s]

  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er,driver,dtype,nodata,width,height,count,crs,transform,blockxsize,blockysize,tiled,compress,interleave,GCPs,res,photometric
0,Retrolens/Northland/NinetyMile1/Shorelines/NinetyMile1_31JAN1985.shp,Retrolens/Northland/NinetyMile1/Stack/NinetyMile1_31JAN1985.tif,100.0,RL,43,True,True,True,GTiff,uint16,65535.0,24007,30409,1,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.6331355511999924, 0.0, 1576131.610602493, 0.0, -0.6331355511999924, 6180150.51818726, 0.0, 0.0, 1.0)",128.0,128,True,lzw,band,0,"(0.6331355511999924, 0.6331355511999924)",
1,MaxarImagery/HighFreq/Tasman/PortPuponga/Shorelines/PortPuponga_25APR2014.shp,MaxarImagery/HighFreq/Tasman/PortPuponga/Imagery/Stack/PortPuponga_25APR2014.tif,100.0,MAX,2,True,True,True,GTiff,uint8,,9481,13333,3,,"(0.5, 0.0, 1573134.8066450255, 0.0, -0.5, 5513988.750861768, 0.0, 0.0, 1.0)",128.0,128,True,lzw,pixel,4,"(0.5, 0.5)",
2,Retrolens/Northland/NorthHeadKaipara/Shorelines/NorthHeadKaipara_09FEB1982.shp,Retrolens/Northland/NorthHeadKaipara/Stack/NorthHeadKaipara_09FEB1982.tif,100.0,RL,6,True,True,True,GTiff,uint8,255.0,62963,39175,1,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.35828864559998, 0.0, 1685314.4761735941, 0.0, -0.35828864559998, 5993107.457954215, 0.0, 0.0, 1.0)",128.0,128,True,lzw,band,0,"(0.35828864559998, 0.35828864559998)",
3,Retrolens/Northland/CapeReinga/Shorelines/CapeReinga_24DEC2021.shp,MaxarImagery/HighFreq/Northland/CapeReinga/Stack/CapeReinga_24DEC2021.tif,100.0,MAX,5,True,True,True,GTiff,uint16,256.0,11051,3926,4,,"(0.5, 0.0, 1593745.2764935745, 0.0, -0.5, 6192128.372511847, 0.0, 0.0, 1.0)",128.0,128,True,,pixel,6,"(0.5, 0.5)",
4,Retrolens/Northland/CapeReinga/Shorelines/CapeReinga_12APR2023.shp,MaxarImagery/HighFreq/Northland/CapeReinga/Stack/CapeReinga_12APR2023.tif,100.0,LDS,4,True,True,True,GTiff,uint16,256.0,5626,3711,3,,"(0.5, 0.0, 1571484.4210365056, 0.0, -0.5, 6190730.603749393, 0.0, 0.0, 1.0)",128.0,128,True,,pixel,5,"(0.5, 0.5)",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1914,Retrolens/Manawatu/WhangaehuRiver/Shorelines/WhangaehuRiver_30AUG1942.shp,Retrolens/Manawatu/WhangaehuRiver/Stack/WhangaehuRiver_30AUG1942_mosaic.jp2,100.0,RL,2,True,True,True,JP2OpenJPEG,uint16,256.0,15536,19362,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.5, 0.0, 1775206.0901533046, 0.0, -0.5, 5571000.7009726055, 0.0, 0.0, 1.0)",1024.0,1024,True,,pixel,0,"(0.5, 0.5)",
1915,Retrolens/Manawatu/WhangaehuRiver/Shorelines/WhangaehuRiver_4FEB1982.shp,Retrolens/Manawatu/WhangaehuRiver/Stack/WhangaehuRiver_4FEB1982_mosaic.jp2,100.0,RL,2,True,True,True,JP2OpenJPEG,uint16,256.0,6270,7842,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(1.3, 0.0, 1774943.0731330027, 0.0, -1.2999999999999525, 5571274.874077858, 0.0, 0.0, 1.0)",1024.0,1024,True,,pixel,0,"(1.3, 1.2999999999999525)",
1916,MaxarImagery/HighFreq/Otago/Aramoana/Shorelines/Aramoana_29JUNE2004.shp,MaxarImagery/HighFreq/Otago/Aramoana/Imagery/Stack/Aramoana_29JUNE2004.tif,100.0,Max,8,True,True,True,GTiff,uint8,,7173,5399,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(0.6000000000000065, 0.0, 1418391.3259597044, 0.0, -0.600000000000069, 4930131.53388546, 0.0, 0.0, 1.0)",128.0,128,True,lzw,pixel,0,"(0.6000000000000065, 0.600000000000069)",
1917,Retrolens/Tasman/MoutereSpit/Shorelines/MoutereSpit_19MAY1958.shp,Retrolens/Tasman/MoutereSpit/Stack/MoutereSpit_19MAY1958_mosaic.jp2,100.0,RL,3,True,True,True,JP2OpenJPEG,uint16,256.0,1416,3769,3,"(proj, lat_0, lon_0, k, x_0, y_0, ellps, towgs84, units, no_defs)","(1.2343198958337753, 0.0, 1601525.7875725902, 0.0, -1.2343198958338077, 5450485.411790903, 0.0, 0.0, 1.0)",1024.0,1024,True,,pixel,0,"(1.2343198958337753, 1.2343198958338077)",


## Read LDS index tiles

In [76]:
index_tiles = filelist[filelist.str.contains(".+index-tiles.+.shp$")]
index_tiles

4525     Archive/Gabrielle/Imagery/post_storm/LINZ/HawkesBay/hawkes-bay-010m-cyclone-gabrielle-aerial-photos-index-tiles-Copy.shp
10086            Archive/Gabrielle/Imagery/post_storm/LINZ/BayofPlenty/bay-of-plenty-01m-urban-aerial-photos-index-tiles-2023.shp
11963         Archive/Gabrielle/Imagery/post_storm/LINZ/Gisborne/gisborne-02m-cyclone-gabrielle-aerial-photos-index-tiles-202.shp
13943         Archive/Gabrielle/Imagery/pre-storm/Waikato/TairuaPauanui/waikato-03m-rural-aerial-photos-index-tiles-2021-2023.shp
14062              Archive/Gabrielle/Imagery/pre-storm/Waikato/LINZtemp/waikato-03m-rural-aerial-photos-index-tiles-2021-2023.shp
                                                                   ...                                                           
36395                     SpatialData/Mosaics/Bay of Plenty/Footprints/bay-of-plenty-03m-rural-aerial-photos-index-tiles-2019.shp
36401                SpatialData/Mosaics/Bay of Plenty/Footprints/bay-of-plenty-01m-urban-

In [77]:
def read_index_tile(f):
    gdf = gpd.read_file(prefix+f).to_crs(2193)
    gdf["filename"] = f
    return gdf

index_tiles = pd.concat(process_map(read_index_tile, index_tiles))
len(index_tiles)

  0%|          | 0/79 [00:00<?, ?it/s]

147407

In [78]:
index_tiles.Captured = index_tiles.Captured.replace({"05, 06, 09, 10 Feb. 2014": "2014-02-05", "11 & 26 March 2014": "2014-03-11"})
index_tiles["parsed_date"] = pd.NA
for col in ['Date_Flown', 'Date_Suppl', 'DATE', 'DATE_DMY', 'FLOWN_DATE', 'FLY_DATE', 'ACQ_DATE', "FLYING_DAT", "FLOWN", "Captured"]:
    index_tiles["parsed_date"].update(pd.to_datetime(index_tiles[col].str.split(",|-|to").str[0], dayfirst=True, format="mixed", errors="ignore"))
index_tiles.parsed_date = index_tiles.parsed_date.astype("string")
index_tiles.parsed_date.value_counts(dropna=False)

  super().__setitem__(key, value)


parsed_date
2017                   23649
<NA>                   22679
2022                   11348
2023                    3607
24/02/2017              2685
                       ...  
9/11/2016                  1
2029-03-12 13:01:12        1
2005-05-12 12:05:22        1
2016-11-15 15:12:28        1
2028-11-15 16:03:27        1
Name: count, Length: 384, dtype: Int64

## LDS index tile matching

In [79]:
LDS = df[(df.Source == "LDS") & (df.match_score < 100)].sort_values("match_score")
LDS

Unnamed: 0,filename,matched_image,match_score,Source,n_lines,CPS,Photoscale,Pixel_Er
30736,Archive/Gabrielle/Shorelines/Hawkes Bay/Napier/Heretaunga_20FEB2023.shp,Retrolens/WestCoast/WhareateaRiver/Stack/WhareateaRiver_23FEB2023.tif,61.904762,LDS,9,OK,True,True
101748,Retrolens/Gisborne/Tuparoa/Shorelines/Tuparoa_08FEB2023.shp,MaxarImagery/HighFreq/Gisborne/TeAraroa/Imagery/Stack/TeAraroa_20FEB2016.tif,62.500000,LDS,11,OK,True,True
30205,Archive/Gabrielle/Shorelines/BayofPlenty/Waihi/BOPLINZ_Waihi_05APR2023.shp,MaxarImagery/HighFreq/BayOfPlenty/Ohiwa/Imagery/Stack/Ohiwa_08APR2013.tif,62.608696,LDS,6,OK,True,True
30277,Archive/Gabrielle/Shorelines/BayofPlenty/Papamoa/BOPLINZ_Papamoa_04APR2023.shp,MaxarImagery/HighFreq/BayOfPlenty/Ohiwa/Imagery/Stack/Ohiwa_08APR2013.tif,62.608696,LDS,7,OK,True,True
103636,Retrolens/Gisborne/Waiparapara/Shorelines/Waiparapara_07MAR2023.shp,MaxarImagery/HighFreq/Gisborne/WaipiroBay/Imagery/Stack/WaipiroBay_22MAR2003.tif,63.157895,LDS,6,OK,True,True
...,...,...,...,...,...,...,...,...
41381,Retrolens/Southland/Tiwai_Point/Shorelines/TiwaiPoint_07FEB2016.shp,MaxarImagery/HighFreq/Southland/TiwaiPoint/Stack/TiwaiPoint_02FEB2016.tif,94.444444,LDS,1,OK,True,True
91178,Retrolens/Northland/Mangawhai/Shorelines/Mangawhai_09NOV2015_CF.shp,Retrolens/Northland/Mangawhai/Stack/Mangawhai_09NOV2015.tif,94.444444,LDS,2,OK,False,False
36009,Retrolens/Waikato/Pauanui_Tairua/Shorelines/Pauanui_Tairua_04FEB2019.shp,MaxarImagery/HighFreq/Waikato/Pauanui_Tairua/Imagery/Stack/Pauanui_Tairua_09FEB2019.tif,95.238095,LDS,4,OK,True,True
50093,Retrolens/Taranaki/Oakura/Shorelines/Oakura_31OCT2016_beach.shp,MaxarImagery/HighFreq/Taranaki/Oakura/Stack/Oakura_03OCT2016.tif,96.551724,LDS,3,OK,True,True


In [80]:
def get_resolution(filename):
  gdf = gpd.read_file(prefix+filename)
  if "LDS" not in gdf.Source.unique():
    return {"filename": filename}
  bounds = gdf.total_bounds
  intersecting_tiles = index_tiles[index_tiles.intersects(box(*bounds))]
  if filename.startswith("Gabrielle"):
    intersecting_tiles = intersecting_tiles[intersecting_tiles.filename.str.startswith("Gabrielle")]
  if len(intersecting_tiles) == 0:
    print(f"{filename} doesn't intersect any index tiles")
    return {"filename": filename}
  if "Date" not in gdf.columns:
    #print(f"{filename} missing Date column")
    date = gdf.DSASdate.unique()[0]
    DSASdate = gdf.DSASdate.unique()[0]
  elif "DSASDate" not in gdf.columns:
    #print(f"{filename} missing DSASDate column")
    date = gdf.Date.unique()[0]
    DSASdate = gdf.DSASdate.unique()[0]
  else:
    date = gdf.Date.unique()[0]
    DSASdate = gdf.DSASDate.unique()[0]
  if not DSASdate:
    DSASdate = date

  date_options = []
  date_to_col = {}
  for col in ['Date_Flown', 'Date_Suppl', 'DATE', 'DATE_DMY', 'FLOWN_DATE', 'FLY_DATE', 'ACQ_DATE', "FLYING_DAT", "FLOWN", "Captured", "FLOWN1", "FLOWN2"]:#, "parsed_date"]:
    options_for_col = intersecting_tiles[col].dropna().unique().tolist()
    date_options.extend(options_for_col)
    for date_option in options_for_col:
      date_to_col[date_option] = col
  display(date, DSASdate, gdf, intersecting_tiles.dropna(axis=1, how="all"), date_options, date_to_col)
  if not date_options:
    if "hawkes-bay-010m-cyclone-gabrielle-aerial-photos-index" in intersecting_tiles.filename.unique()[0]:
      return {
        "filename": filename,
        "matched_index_tiles": intersecting_tiles.filename.unique(),
        "date": date,
        "matched_date": "SPECIAL_OVERRIDE",
        "match_score": 100,
        "Pixel_ER": .1
      }
    elif "southland-075m-rural-aerial-photos-index-tiles-2005-2011.shp" in intersecting_tiles.filename.unique()[0]:
      return {
        "filename": filename,
        "matched_index_tiles": intersecting_tiles.filename.unique(),
        "date": date,
        "matched_date": "SPECIAL_OVERRIDE",
        "match_score": 100,
        "Pixel_ER": .75
      }
    else:
      print(f"No date options in {intersecting_tiles.filename.unique()}")
      return {"filename": filename}
  else:
    match = False
    if DSASdate in date_options:
      match = DSASdate
      score = 100
    for option in date_options:
      if DSASdate in option or date in option:
        match = option
        score = 100
    if not match:
      match, score, index = rapidfuzz.process.extractOne(query=date, choices=date_options, processor=lambda s: s.replace("-", ""))
    col_for_match = date_to_col[match]
    tiles_from_this_date = intersecting_tiles[intersecting_tiles[col_for_match] == match]
    GSDM = []
    for col in ['GSDM', 'ORTHO_GSD', 'Ortho_GSD', 'GSDm', 'GSD', 'GSD_M', 'GSD_CM', 'gsdM']:
      GSDM.extend(tiles_from_this_date[col].dropna().astype(str).str.strip("m").unique())
    if len(GSDM) == 0:
      tilefile = tiles_from_this_date.filename.unique()[0]
      if "-04m" in tilefile:
        GSDM = .4
      elif "-0075m" in tilefile:
        GSDM = .075
    elif len(GSDM) == 1:
      GSDM = GSDM[0]
    elif len(GSDM) > 1:
      #print(f"Ambiguous GSDM: {GSDM}")
      GSDM = GSDM[0]
    return {
      "filename": filename,
      "matched_index_tiles": tiles_from_this_date.filename.unique().tolist(),
      "date": date,
      "DSASDate": DSASdate,
      "matched_date": match,
      "matched_date_col": col_for_match,
      "match_score": score,
      "Pixel_ER": GSDM
    }

LDS = pd.DataFrame(process_map(get_resolution, LDS.filename)).sort_values("match_score")
print("Perfect matches:", sum(LDS.match_score == 100))
print("Imperfect matches:", sum(LDS.match_score < 100))
print("Has Pixel_ER: ", sum(~LDS.Pixel_ER.isna()))
print("No Pixel_ER: ", sum(LDS.Pixel_ER.isna()))
LDS

  0%|          | 0/514 [00:00<?, ?it/s]

Perfect matches: 366
Imperfect matches: 148
Has Pixel_ER:  514
No Pixel_ER:  0


Unnamed: 0,filename,matched_index_tiles,date,DSASDate,matched_date,matched_date_col,match_score,Pixel_ER
69,Retrolens/Canterbury/Motunau/Shorelines/Motunau_09JAN2015.shp,[SpatialData/Mosaics/Footprint shapefiles/Kaikoura/kaikoura-030m-rural-aerial-photos-index-tiles-2016-2017.shp],2015-01-09,09/01/2015,"20/12/2016,21/12/2016,15/02/2017,16/02/2017,21/02/2017,24/02/2017",FLOWN_DATE,30.000000,0.3
235,Retrolens/Canterbury/KaitoreteSpitWest/Shorelines/KaitoreteSpitWest_22APR2023.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2015-2016-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2015-2016.shp],2003-04-22,22/04/2003,151125 151228,DATE,33.750000,0.3
180,Retrolens/Canterbury/KaitoreteSpitEast/Shorelines/KaitoreteSpitEast_22APR2023.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2015-2016-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2015-2016.shp],2023-04-22,22/04/2023,151228 151229,DATE,38.095238,0.3
292,Retrolens/Canterbury/RakaiaSouth/Shorelines/RakaiaSouth_02DEC2020.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2020-12-02,02/12/2020,14/02/17 to 09/03/17,FLOWN,41.538462,0.3
130,Retrolens/Canterbury/OpihiRiver_South/Shorelines/OpihiRiver_South_02DEC2020.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2020-12-02,02/12/2020,14/02/17 to 09/03/17,FLOWN,41.538462,0.3
...,...,...,...,...,...,...,...,...
186,Retrolens/Gisborne/WaiapuRiver/Shorelines/WaiapuRiver_16FEB2018.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-gisborne-03m-rural-aerial-photos-index-tiles-2017-2019-SHP (1)/gisborne-03m-rural-aerial-photos-index-tiles-2017-2019.shp],2018-02-16,16/02/2018,16/02/2018,FLOWN,100.000000,0.3
185,Retrolens/Gisborne/HuataiBeach/Shorelines/HuataiBeach_22MAR2023.shp,[Archive/Gabrielle/Imagery/post_storm/LINZ/Gisborne/gisborne-02m-cyclone-gabrielle-aerial-photos-index-tiles-202.shp],2023-03-22,22/03/2023,22/03/2023,FLOWN,100.000000,0.2
184,Retrolens/Northland/BayleysBeach/Shorelines/BayleysBeach_06NOV2015.shp,[SpatialData/Mosaics/Footprint shapefiles/Northland Footrpints/northland-04m-rural-aerial-photos-index-tiles-2014-2016.shp],2015-11-06,06/11/2015,"06/11/2015, 07/11/2015",DATE_DMY,100.000000,0.4
198,Retrolens/Bay of Plenty/OhopeBeach/Shorelines/OhopeBeach_20MAR2023.shp,[Archive/Gabrielle/Imagery/post_storm/LINZ/BayofPlenty/bay-of-plenty-01m-urban-aerial-photos-index-tiles-2023.shp],2023-03-20,20/03/2023,2023-03-20,FLOWN,100.000000,0.10


In [81]:
with pd.option_context("display.max_rows", None):
  display(LDS[LDS.match_score < 100])

Unnamed: 0,filename,matched_index_tiles,date,DSASDate,matched_date,matched_date_col,match_score,Pixel_ER
69,Retrolens/Canterbury/Motunau/Shorelines/Motunau_09JAN2015.shp,[SpatialData/Mosaics/Footprint shapefiles/Kaikoura/kaikoura-030m-rural-aerial-photos-index-tiles-2016-2017.shp],2015-01-09,09/01/2015,"20/12/2016,21/12/2016,15/02/2017,16/02/2017,21/02/2017,24/02/2017",FLOWN_DATE,30.0,0.3
235,Retrolens/Canterbury/KaitoreteSpitWest/Shorelines/KaitoreteSpitWest_22APR2023.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2015-2016-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2015-2016.shp],2003-04-22,22/04/2003,151125 151228,DATE,33.75,0.3
180,Retrolens/Canterbury/KaitoreteSpitEast/Shorelines/KaitoreteSpitEast_22APR2023.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2015-2016-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2015-2016.shp],2023-04-22,22/04/2023,151228 151229,DATE,38.095238,0.3
292,Retrolens/Canterbury/RakaiaSouth/Shorelines/RakaiaSouth_02DEC2020.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2020-12-02,02/12/2020,14/02/17 to 09/03/17,FLOWN,41.538462,0.3
130,Retrolens/Canterbury/OpihiRiver_South/Shorelines/OpihiRiver_South_02DEC2020.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2020-12-02,02/12/2020,14/02/17 to 09/03/17,FLOWN,41.538462,0.3
321,Retrolens/Canterbury/OpihiRiver_RangitataRiver/Shorelines/OpihiRiver_RangitataRiver_02DEC2020.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2020-12-02,02/12/2020,14/02/17 to 09/03/17,FLOWN,41.538462,0.3
247,Retrolens/Canterbury/OpihiRiver_OrariRiver/Shorelines/OpihiRiver_OrariRiver_02DEC2020.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2020-12-02,02/12/2020,14/02/17 to 09/03/17,FLOWN,41.538462,0.3
461,Retrolens/Canterbury/Washdyke/Shorelines/Washdyke_14FEB2017.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2017-02-28,28/02/2017,14/02/17 to 09/03/17,FLOWN,42.75,0.3
433,Retrolens/Canterbury/WainononLagoon_PareoaRiver/Shorelines/WainonoLagoon_PareoraRiver_02DEC2017.shp,[SpatialData/Mosaics/Footprint shapefiles/lds-canterbury-03m-rural-aerial-photos-index-tiles-2017-2018-SHP/canterbury-03m-rural-aerial-photos-index-tiles-2017-2018.shp],2017-12-02,02/12/2017,02/12/17 to 08/02/18,FLOWN,42.75,0.3
340,Retrolens/HawkesBay/OceanBeach/Shorelines/OceanBeach_06MAR2019.shp,[SpatialData/Mosaics/Footprint shapefiles/Hawkes Bay Footprints/hawkes-bay-03m-rural-aerial-photos-index-tiles-2019-2020.shp],2019-03-06,6/03/2019,05/03/2019,FLOWN,44.444444,0.3


In [82]:
# Stop Excel interpreting dates
for col in ["date", "DSASDate", "matched_date"]:
    LDS[col] = "'" + LDS[col]
LDS.to_csv(prefix + "Nick/LDS_matches.csv", index=False)