# Machine Learning for Seagrass Extent Prediction

In [35]:
from pystac.client import Client
from odc.stac import load
import geopandas as gpd
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import joblib
from matplotlib import colors
from sklearn.ensemble import RandomForestClassifier 


from utils import scale, apply_masks, do_prediction, calculate_band_indices

## Loading Sentinel-2 GeoMAD

Load data, then create band indices and mask out areas we don't want to include.

In [36]:
catalog = "https://stac.digitalearthpacific.org"
client = Client.open(catalog)

In [5]:
# Ba Estuary
# bbox = [177.51971, -17.49416, 177.68452, -17.34430]
# datetime="2024"

In [6]:
# Komave
# bbox = [177.72578562645654, -18.240565454997043, 177.8126842310554, -18.217513820319642]
# datetime="2024"

In [66]:
gdf = gpd.read_file("training-data/PNG_fieldpoints25.geojson")
gdf = gdf.to_crs("EPSG:4326")
min_lon, min_lat, max_lon, max_lat = gdf.total_bounds
bbox = [min_lon, min_lat, max_lon, max_lat]
datetime="2024"

In [67]:
items = client.search(
    collections=["dep_s2_geomad"], datetime=datetime, bbox=bbox
).item_collection()

print(f"Found {len(items)} items")

Found 2 items


In [68]:
data = load(
    items,
    bbox=bbox,
    measurements=[
        "nir",
        "red",
        "blue",
        "green",
        "emad",
        "smad",
        "bcmad",
        "count",
        "green",
        "nir08",
        "nir09",
        "swir16",
        "swir22",
        "coastal",
        "rededge1",
        "rededge2",
        "rededge3",
    ],
    chunks={"x": 2048, "y": 2048},
)

data

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 8.01 MiB 8.01 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 8.01 MiB 8.01 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 8.01 MiB 8.01 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type float32 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 4.00 MiB 4.00 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 3 graph layers Data type uint16 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,4.00 MiB,4.00 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray


In [69]:
scaled_data = scale(data)
indices = calculate_band_indices(scaled_data)

# TODO: split masks into separate functions
masked_data = apply_masks(scaled_data)

masked_data


# Suppose ds is your xarray Dataset
masked_data = masked_data.drop_vars(['stumpf', 'ln_bg'])

In [70]:
masked_data

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 34 graph layers,1 chunks in 34 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 34 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 34 graph layers,1 chunks in 34 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 35 graph layers,1 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 35 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 35 graph layers,1 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 35 graph layers,1 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 35 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 35 graph layers,1 chunks in 35 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 8.01 MiB 8.01 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float32 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 8.01 MiB 8.01 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float32 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 8.01 MiB 8.01 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float32 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,8.01 MiB,8.01 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 36 graph layers,1 chunks in 36 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 36 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 36 graph layers,1 chunks in 36 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 41 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 41 graph layers,1 chunks in 41 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 36 graph layers,1 chunks in 36 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 36 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 36 graph layers,1 chunks in 36 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 39 graph layers,1 chunks in 39 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 39 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 39 graph layers,1 chunks in 39 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 49 graph layers,1 chunks in 49 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 49 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 49 graph layers,1 chunks in 49 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 39 graph layers,1 chunks in 39 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 39 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 39 graph layers,1 chunks in 39 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 39 graph layers,1 chunks in 39 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 39 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 39 graph layers,1 chunks in 39 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 42 graph layers,1 chunks in 42 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 42 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 42 graph layers,1 chunks in 42 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 42 graph layers,1 chunks in 42 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 42 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 42 graph layers,1 chunks in 42 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 42 graph layers,1 chunks in 42 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 42 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 42 graph layers,1 chunks in 42 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 44 graph layers,1 chunks in 44 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16.02 MiB 16.02 MiB Shape (1, 1459, 1439) (1, 1459, 1439) Dask graph 1 chunks in 44 graph layers Data type float64 numpy.ndarray",1439  1459  1,

Unnamed: 0,Array,Chunk
Bytes,16.02 MiB,16.02 MiB
Shape,"(1, 1459, 1439)","(1, 1459, 1439)"
Dask graph,1 chunks in 44 graph layers,1 chunks in 44 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [71]:
masked_data.odc.explore(bands=["red", "green", "blue"], vmin=0, vmax=0.3)

## Run predictions over our region

In [72]:
model = joblib.load("models/model-geomad-joined-data-rf-11042025.model")

In [73]:
loaded = masked_data.compute()

In [74]:
loaded

In [75]:
predicted_da = do_prediction(loaded, model).astype(np.float32)
predicted_da

In [48]:
# gdf = gpd.read_file("training-data/Alluvium_vu_malekula_13022025.geojson")
# gdf = gdf.to_crs("EPSG:4326")
# min_lon, min_lat, max_lon, max_lat = gdf.total_bounds
# bbox = [min_lon, min_lat, max_lon, max_lat]
# datetime="2024"

In [16]:
# Define training data
#gdf = gpd.read_file("training-data/Alluvium_vu_malekula_13022025.geojson")
#gdf = gdf.to_crs("EPSG:4326")
# gdf.explore(column="cc_id", legend=True)

In [85]:
postcard =data #.where(all_masks)
# postcard =clipped_ds.to_array(dim="band")
# First transform the training points to the same CRS as the data
training = gdf.to_crs(postcard.odc.geobox.crs)

In [86]:
print(training.columns)
#training=training.drop(columns=['date', 'uuid'])

Index(['fid', 'Major_Ecosystem', 'Percentage_Cover1', 'Minor_Ecosystem',
       'Percentage_Cover2', 'Photo', 'Notes', 'Date', 'Surveyor', 'geometry'],
      dtype='object')


In [89]:
training

Unnamed: 0,fid,Major_Ecosystem,Percentage_Cover1,Minor_Ecosystem,Percentage_Cover2,Photo,Notes,Date,Surveyor,geometry
0,1,mangrove,100,,,,,2025-03-19 09:37:04.150000+00:00,,POINT (-301531.722 -1056410.427)
1,2,mangrove,100,mangrove,,,,2025-03-19 09:37:04.150000+00:00,Waqa,POINT (-302035.169 -1056172.465)
2,3,,,,,,,2025-03-19 09:37:04.150000+00:00,Waqa,POINT (-302035.622 -1056163.948)
3,4,mangrove,90,mangrove,,,,2025-03-19 09:37:04.150000+00:00,Waqa,POINT (-301678.701 -1055570.345)
4,5,mangrove,90,,,,,2025-03-19 09:37:04.150000+00:00,Waqa,POINT (-301676.178 -1055605.274)
...,...,...,...,...,...,...,...,...,...,...
4647,4648,sand,,seagrass,,DCIM/geotag-macbluepng_20250319141225512.jpg,,2025-03-19 01:28:11.744000+00:00,Max Andrew,POINT (-302189.535 -1057825.943)
4648,4649,seagrass,,sand,,,,2025-03-19 01:28:11.744000+00:00,Max Andrew,
4649,4650,seagrass,,seagrass,,DCIM/geotag-macbluepng_20250319143359132.jpg,,2025-03-19 01:28:11.744000+00:00,Max Andrew,
4650,4651,seagrass,,,,DCIM/geotag-macbluepng_20250319143523650.jpg,,2025-03-19 01:28:11.744000+00:00,Max Andrew,


In [87]:
training_da = training.assign(x=training.geometry.x, y=training.geometry.y).to_xarray()

In [88]:
training_values = (
    postcard.sel(training_da[["x", "y"]], method="nearest").squeeze().compute().to_pandas()
)
training_values

Unnamed: 0_level_0,y,x,spatial_ref,time,nir,red,blue,green,emad,smad,bcmad,count,nir08,nir09,swir16,swir22,coastal,rededge1,rededge2,rededge3
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0,-1056415.0,-301535.0,3832,2024-01-01,2619,528,397,737,401.137695,0.000659,0.028749,31,2923,2881,1511,892,315,1206,2361,2690
1,-1056175.0,-302035.0,3832,2024-01-01,1183,540,536,696,960.667786,0.008482,0.143958,21,1091,923,933,701,724,907,1003,1040
2,-1056165.0,-302035.0,3832,2024-01-01,1361,750,656,821,911.694824,0.012277,0.150911,21,1074,906,946,760,715,883,985,1055
3,-1055575.0,-301675.0,3832,2024-01-01,2364,271,285,432,794.167114,0.006631,0.086733,21,2383,1801,805,522,770,739,1827,2219
4,-1055605.0,-301675.0,3832,2024-01-01,2229,287,302,424,906.952942,0.008708,0.102485,20,2177,1678,799,513,799,751,1700,2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4647,-1057825.0,-302185.0,3832,2024-01-01,1809,1974,1539,1876,1476.850342,0.010534,0.115055,29,1748,1083,1267,864,872,1811,1628,1753
4648,-1049325.0,-299305.0,3832,2024-01-01,2291,413,378,595,1323.421509,0.005048,0.099559,11,2937,3371,1848,968,350,1004,2214,2617
4649,-1049325.0,-299305.0,3832,2024-01-01,2291,413,378,595,1323.421509,0.005048,0.099559,11,2937,3371,1848,968,350,1004,2214,2617
4650,-1049325.0,-299305.0,3832,2024-01-01,2291,413,378,595,1323.421509,0.005048,0.099559,11,2937,3371,1848,968,350,1004,2214,2617


In [84]:
# Join the training data with the extracted values and remove unnecessary columns
training_array = pd.concat([training["cc_id"], training_values], axis=1)
# Drop rows where there was no data available
training_array = training_array.dropna()
# Preview our resulting training array
training_array.head()

KeyError: 'cc_id'

In [None]:
postcard_df = training_array
postcard_df.columns

In [None]:
postcard_df.columns

In [None]:
# postcard_df.columns
# postcard_df = postcard_df.drop(columns=["spatial_ref", "time", "field_1", "y", "x"])
# postcard_df = postcard_df.drop(columns=["field_1"])
# postcard_df = postcard_df.drop(columns=["y", "x", "spatial_ref"])

postcard_df = postcard_df.drop(columns=["time"])
#postcard_df

In [None]:
postcard_df

In [None]:
len(postcard_df.columns)

In [60]:
# The classes are the first column
classes = np.array(postcard_df)[:, 0]

# The observation data is everything after the second column
observations = np.array(postcard_df)[:, 1:]

# Create a model...
classifier = RandomForestClassifier()

# ...and fit it to the data
model = classifier.fit(observations, classes)

In [62]:
version = "Png_joined_model"

In [63]:
postcard_params = postcard_df.columns[1:]
print(postcard_params)

Index(['y', 'x', 'spatial_ref', 'nir', 'red', 'blue', 'green', 'emad', 'smad',
       'bcmad', 'count', 'nir08', 'nir09', 'swir16', 'swir22', 'coastal',
       'rededge1', 'rededge2', 'rededge3'],
      dtype='object')


In [64]:


importances = classifier.feature_importances_

# Create the DataFrame
data = {
    'param': postcard_params,  # Use the list directly
    'importance': importances,  # Use the list directly
}
importance_df = pd.DataFrame(data)

importance_df["%"] = (importance_df["importance"]*100)

sorted_df = importance_df.sort_values(by='importance', ascending=False)
sorted_df.to_csv(f"{version}importance_df.csv")

sorted_df
# sorted_df.to_csv(.csv', index=False)

Unnamed: 0,param,importance,%
0,y,0.11071,11.07096
13,swir16,0.10435,10.434962
14,swir22,0.092492,9.249163
3,nir,0.08121,8.120965
1,x,0.060902,6.090236
17,rededge2,0.059974,5.997446
12,nir09,0.056494,5.649439
16,rededge1,0.054797,5.479676
11,nir08,0.05446,5.44604
18,rededge3,0.054371,5.437092


In [65]:
sorted_df


Unnamed: 0,param,importance,%
0,y,0.11071,11.07096
13,swir16,0.10435,10.434962
14,swir22,0.092492,9.249163
3,nir,0.08121,8.120965
1,x,0.060902,6.090236
17,rededge2,0.059974,5.997446
12,nir09,0.056494,5.649439
16,rededge1,0.054797,5.479676
11,nir08,0.05446,5.44604
18,rededge3,0.054371,5.437092


## Visualisation
Visualise the prediction with the 8 class colour map or the 10 class colour map below, respectively

In [None]:
classes = [
    [1, "sediment", "#8c8c8c"],
    [2, "sand", "#fedd24"],
    [3, "rubble", "#f8ffb4"],
    [4, "seagrass", "#6df7dc"],
    [5, "seaweed", "#b9df6f"],
    [6, "coral", "#a011c3"],
    [7, "rock", "#804600"],
    [8, "deeps", "#011b61"],
]

values_list = [c[0] for c in classes]
color_list = [c[2] for c in classes]

# Build a listed colormap.
c_map = colors.ListedColormap(color_list)
bounds = values_list + [8]
norm = colors.BoundaryNorm(bounds, c_map.N)
predicted_da.odc.explore(cmap=c_map)

In [None]:
classes = [
    [1, "sediment", "#8c8c8c"],
    [2, "sand", "#fedd24"],
    [3, "rubble", "#f8ffb4"],
    [4, "seagrass", "#6df7dc"],
    [5, "seaweed", "#b9df6f"],
    [6, "coral", "#a011c3"],
    [7, "rock", "#804600"],
    [8, "deeps", "#011b61"],
    [9, "mangrove", "#086a39"],
    [10, "land", "#00FFFFFF"],
]

values_list = [c[0] for c in classes]
color_list = [c[2] for c in classes]

# Build a listed colormap.
c_map = colors.ListedColormap(color_list)
bounds = values_list + [14]
norm = colors.BoundaryNorm(bounds, c_map.N)

In [None]:
predicted_da.odc.explore(cmap=c_map)

In [None]:
# predicted_da.odc.write_cog("predictions/predicted_ba_estuary_joined_data_postcard_svm_22042025.tiff", overwrite=True)
predicted_da.odc.write_cog("predictions/predicted_komave_postcard_rf_24042025.tiff", overwrite=True)

### Quantitative validation (to be completed)

In [None]:
test_data = gpd.read_file("testing-data/utanglang_postcard.geojson")
test_data.explore()

In [None]:
print("stop here")

In [None]:
stop_here


In [None]:
# First transform the training points to the same CRS as the data
test = test_data.to_crs(postcard.odc.geobox.crs)
# Next get the X and Y values out of the point geometries
training_da = test.assign(x=test.geometry.x, y=test.geometry.y).to_xarray()
# Now we can use the x and y values (lon, lat) to extract values from the median composite
training_values = (
    postcard.sel(training_da[["x", "y"]], method="nearest").squeeze().compute().to_pandas()
)
len(training_values)

# Join the training data with the extracted values and remove unnecessary columns
training_array = pd.concat([test["observed_id"], training_values], axis=1)
training_array = training_array.drop(
    columns=[
        "y",
        "x",
        "spatial_ref",
    ]
)
# # Drop rows where there was no data available
# training_array = training_array.dropna()
# Preview our resulting training array
training_array.head()

In [None]:
# Join the training data with the extracted values and remove unnecessary columns
training_array = pd.concat([test["observed_id"], training_values], axis=1)
training_array = training_array.drop(
    columns=[
        "y",
        "x",
        "spatial_ref",
    ]
)
# # Drop rows where there was no data available
# training_array = training_array.dropna()
# Preview our resulting training array
training_array.head()

In [None]:
print(predicted_da.dtype)  # Check the dtype of your DataArray
predicted_da = predicted_da.astype('float32')  # Convert to float32

In [None]:
print(len(training_array), len(test))  # Check the lengths of both arrays

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

# Sample data
np.random.seed(42)
training_array = np.random.rand(100, 5)
test = pd.DataFrame({"observed_id": np.random.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 100)})

# Split the data
X_train, X_test, y_train, y_test = train_test_split(training_array, test.observed_id, test_size=0.9, random_state=42)

# Replace None values with a default value, e.g., 0 or the most frequent value
y_train = y_train.fillna(0)  # or y_train.fillna(y_train.mode()[0])
y_test = y_test.fillna(0)    # Ensure y_test also has no None values
y_train = y_train.astype(int)
y_test = y_test.astype(int)

# Train your model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Generate and display confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm)
plt.figure(figsize=(12, 10))  # Adjust width and height for larger plot
cm_display.plot(cmap=plt.cm.Blues)
plt.show()

In [None]:
cm

In [None]:
from sklearn.metrics import accuracy_score

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')

In [None]:
from sklearn.metrics import classification_report

# report = classification_report(true_labels, predicted_labels, target_names=class_labels)
# print(report)

report = classification_report(y_test, y_pred, target_names=['sediment', 'sand', 'rubble', 'seagrass', 'seaweed', 'coral', 'rock', 'deeps', 'mangrove', 'land'])
print(report)

In [None]:
print("Unique labels in y_test:", np.unique(y_test))
print("Unique labels in y_pred:", np.unique(y_pred))

In [None]:
print("Length of y_test:", len(y_test))
print("Length of y_pred:", len(y_pred))