<img src="https://github.com/nicholasmetherall/digital-earth-pacific-macblue-activities/blob/main/attachments/images/DE_Pacific_banner.JPG?raw=true" width="900"/>
Figure 1.1.a. Jupyter environment + Python notebooks

### Digital Earth Pacific Notebook 1 prepare postcard and load data to csv

<font color='green'>The objective of this notebook is to prepare a geomad postcard for your AOI (masking, scaling and loading additional band ratios and spectral indices) and sampling all the datasets into a csv based on your training data geodataframe. </font>

<font color='blue'>Step 1.1</font>

In [50]:
from pystac_client import Client
from dask.distributed import Client as DaskClient
from odc.stac import load, configure_s3_access
import rasterio as rio
import geopandas as gpd
import pandas as pd
import numpy as np
import xarray as xr
import folium
import postcards
from postcards import load_data
from postcards import mask_and_scale
from sklearn.ensemble import RandomForestClassifier
import odc.geo.xr
import rioxarray
import matplotlib.pyplot as plt

<font color='blue'>Define catalogue</font>

In [51]:
catalog = "https://stac.digitalearthpacific.org"
client = Client.open(catalog)

<font color='red'>Define your area of interest - copy and paste the bottom left latitude (min_lat) and the bottem left longitude (min_lon) and the top right latitude (max_lat) and the top right longitude (max_lon)

<font color='red'>In this activity you can use the following examples: 
`-18.1313, -18.1553, 177.4347, 177.4032`
  
<font color='red'>  Note we have reordered these into the wrong order so you will have to define them correctly below:</font>

In [52]:
min_lat = -18.20459
min_lon = 178.33041
max_lat = -18.10000 
max_lon = 178.53506
bbox = [min_lon, min_lat, max_lon, max_lat]

<font color='red'>Define your time of interest - choose a range of a few months in 2024 using the syntax `datetime="YYYY-MM/YYYY-MM"`</font>

In [53]:
datetime="2024"
items = list(client.search(collections=["dep_s2_geomad"], datetime=datetime, bbox=bbox).items())

In [54]:
from pystac import Collection

In [55]:
collection = Collection.from_file("https://stac.digitalearthpacific.org/collections/dep_s2_geomad")

In [56]:
data = load(
        items,
        measurements=[
            "nir", "red", "blue", "green", "emad", "smad", 
            "bcmad", "count", "green", "nir08", 
            "nir09", "swir16", "swir22", "coastal",
            "rededge1", "rededge2", "rededge3", 
        ],
        bbox=bbox,
        chunks={"x": 2048, "y": 2048},
        groupby="solar_day",
    )

In [57]:
# dask_client = DaskClient(n_workers=1, threads_per_worker=16, memory_limit='16GB')
# configure_s3_access(cloud_defaults=True, requester_pays=True)

In [58]:
scaled = (data.where(data != 0) * 0.0001).clip(0, 1)

In [59]:
mndwi = (scaled["green"]-scaled["swir16"])/(scaled["green"]+scaled["swir16"])
land_mask = mndwi > 0
masked_raster = np.ma.masked_where(~land_mask, mndwi)
masked_raster_da = xr.DataArray(masked_raster)
masked_raster_da = masked_raster_da.rename({"dim_0": "y", "dim_1": "x"})
clipped_ds = scaled.where(land_mask)

In [60]:
ndti = (clipped_ds["red"]-clipped_ds["green"])/(clipped_ds["red"]+clipped_ds["green"])
ndti_mask = ndti < 0.2
ndti_masked_raster = np.ma.masked_where(~ndti_mask, ndti)
ndti_masked_raster_da = xr.DataArray(ndti_masked_raster)
ndti_masked_raster_da = ndti_masked_raster_da.rename({"dim_0": "y", "dim_1": "x"})
clipped_ds = clipped_ds.where(ndti_mask)

In [61]:
nir = clipped_ds['nir']
nir_mask = nir < 0.085
nir_masked_raster = np.ma.masked_where(~nir_mask, nir)
nir_masked_raster_da = xr.DataArray(nir_masked_raster)
nir_masked_raster_da = nir_masked_raster_da.rename({"dim_0": "y", "dim_1": "x"})
clipped_ds = clipped_ds.where(nir_mask)

In [62]:
# Incorporate other band ratios and indices
cai = (clipped_ds["coastal"]-clipped_ds["blue"])/( clipped_ds["coastal"]+ clipped_ds["blue"]) #coastal aerosol index
ndvi = (clipped_ds["nir"]-clipped_ds["red"])/( clipped_ds["nir"]+ clipped_ds["red"]) #vegetation index (NDVI)
ndwi = (clipped_ds["green"]-clipped_ds["nir"])/(clipped_ds["green"]+clipped_ds["nir"]) #water index (NDWI)
b_g = (clipped_ds["blue"])/(clipped_ds["green"]) #blue to green ratio
b_r = (clipped_ds["blue"])/(clipped_ds["red"]) #blue to red ratio
mci = (clipped_ds["nir"])/(clipped_ds["rededge1"]) # max chlorophlyll index (MCI)
ndci = (clipped_ds["rededge1"]-clipped_ds["red"])/(clipped_ds["rededge1"]+clipped_ds["red"]) # normalised difference chlorophyll index (NDCI)


In [63]:
clipped_ds['cai'] = cai
clipped_ds['ndvi'] = ndvi
clipped_ds['ndwi'] = ndwi
clipped_ds['mndwi'] = mndwi
clipped_ds['ndti'] = ndti
clipped_ds['b_g'] = b_g
clipped_ds['b_r'] = b_r
clipped_ds['mci'] = mci
clipped_ds['ndci'] = ndci
clipped_ds

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 46 graph layers,2 chunks in 46 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 46 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 46 graph layers,2 chunks in 46 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 47 graph layers,2 chunks in 47 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 47 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 47 graph layers,2 chunks in 47 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 56 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 47 graph layers,2 chunks in 47 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 47 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 47 graph layers,2 chunks in 47 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 55 graph layers,2 chunks in 55 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 55 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 55 graph layers,2 chunks in 55 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 55 graph layers,2 chunks in 55 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 55 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 55 graph layers,2 chunks in 55 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 55 graph layers,2 chunks in 55 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 55 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 55 graph layers,2 chunks in 55 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 56 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 56 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 56 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 48 graph layers,2 chunks in 48 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 48 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 48 graph layers,2 chunks in 48 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 56 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 56 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 56 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 56 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 56 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 56 graph layers,2 chunks in 56 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 70 graph layers,2 chunks in 70 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 70 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 70 graph layers,2 chunks in 70 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 51 graph layers,2 chunks in 51 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 51 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 51 graph layers,2 chunks in 51 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 51 graph layers,2 chunks in 51 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 51 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 51 graph layers,2 chunks in 51 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 19 graph layers,2 chunks in 19 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 19 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 19 graph layers,2 chunks in 19 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 33 graph layers,2 chunks in 33 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 33 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 33 graph layers,2 chunks in 33 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 59 graph layers,2 chunks in 59 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 59 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 59 graph layers,2 chunks in 59 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 59 graph layers,2 chunks in 59 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 59 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 59 graph layers,2 chunks in 59 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 58 graph layers,2 chunks in 58 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 58 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 58 graph layers,2 chunks in 58 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 61 graph layers,2 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.60 MiB 9.52 MiB Shape (1, 1219, 2279) (1, 1219, 2048) Dask graph 2 chunks in 61 graph layers Data type float32 numpy.ndarray",2279  1219  1,

Unnamed: 0,Array,Chunk
Bytes,10.60 MiB,9.52 MiB
Shape,"(1, 1219, 2279)","(1, 1219, 2048)"
Dask graph,2 chunks in 61 graph layers,2 chunks in 61 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


### Postcard csv

<font color='green'>The objective of this notebook was to train the machine learning model that will allow us to classify an area with land cover classes defined through the training data. </font>

<font color='blue'>Step 1.2. Input the training data to sample geomad data from the postcard</font>

In [64]:
# Define training data
suva_gdf = gpd.read_file("training_data/south_suva_tdata_14022025.geojson")
suva_gdf = suva_gdf.to_crs("EPSG:4326")
# suva_gdf.explore(column="cc_id", legend=True)

In [65]:
suva_postcard =clipped_ds.to_array(dim="band")
# First transform the training points to the same CRS as the data
suva_training = suva_gdf.to_crs(suva_postcard.odc.geobox.crs)

In [66]:
suva_training_da = suva_training.assign(x=suva_training.geometry.x, y=suva_training.geometry.y).to_xarray()

In [67]:
suva_training_values = (
    suva_postcard.sel(suva_training_da[["x", "y"]], method="nearest").squeeze().compute().to_pandas()
)
# suva_training_values

In [68]:
# Join the training data with the extracted values and remove unnecessary columns
suva_training_array = pd.concat([suva_training["cc_id"], suva_training_values.transpose()], axis=1)
# Drop rows where there was no data available
suva_training_array = suva_training_array.dropna()
# Preview our resulting training array
# suva_training_array.head()

In [69]:
suva_training_array.to_csv("training_data/postcard_suva_dataframe.csv")

### Notebook 2 - Train Random Forest Machine Learning (ML) Model

<font color='green'>Combine the csv geodataframes from notebook 1 into a single csv to train the machine learning model</font>

<font color='blue'>Step 2.1. Concatenating all postcard dataframes</font>

In [70]:
postcard_suva_df = gpd.read_file("training_data/postcard_suva_dataframe.csv")
postcard_suva_df

Unnamed: 0,field_1,cc_id,nir,red,blue,green,emad,smad,bcmad,count,...,rededge3,cai,ndvi,ndwi,mndwi,ndti,b_g,b_r,mci,ndci
0,0,4,0.0368,0.0519,0.0609,0.0689,0.08751858,6.9816947e-06,2.5805362e-05,0.0029,...,0.042099997,0.0,-0.17023674,0.30368966,0.4961998,-0.14072846,0.88388973,1.1734104,0.60726076,0.077333316
1,1,4,0.037499998,0.058399998,0.0657,0.0764,0.088531576,6.3987345e-06,2.3560617e-05,0.0028,...,0.0436,-0.04701198,-0.21793535,0.34152767,0.54969573,-0.13353117,0.8599477,1.1250001,0.5868544,0.044971414
2,2,4,0.0378,0.060399998,0.0661,0.0784,0.083143786,6.3806383e-06,2.1550335e-05,0.0026,...,0.041699998,-0.06014438,-0.23014256,0.3493976,0.5902637,-0.12968302,0.84311223,1.094371,0.58243454,0.03591379
3,3,4,0.032899998,0.0519,0.059,0.0689,0.06880829,5.977988e-06,2.3465267e-05,0.0026,...,0.0363,-0.035996508,-0.22405663,0.35363457,0.5676905,-0.14072846,0.8563135,1.1368016,0.5833333,0.04155123
4,4,4,0.0316,0.048299998,0.0585,0.0668,0.060051117,5.3245544e-06,2.2541633e-05,0.0025,...,0.0326,-0.050269317,-0.20901127,0.3577236,0.5680751,-0.16072981,0.8757485,1.2111802,0.59962046,0.043564357
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2643,3529,8,0.0202,0.0312,0.0493,0.0545,0.06028177,4.652932e-06,2.6074404e-05,0.0022,...,0.022699999,-0.012320346,-0.2140078,0.45917,0.5683453,-0.27187866,0.90458715,1.5801282,0.7087719,-0.04522612
2644,3530,8,0.022799999,0.033,0.0535,0.0569,0.06589043,3.398329e-06,2.8317436e-05,0.0025,...,0.0253,0.0027958772,-0.18279573,0.42785448,0.52342707,-0.26585093,0.9402461,1.6212121,0.7354838,-0.031250007
2645,3531,8,0.0253,0.0365,0.0556,0.0587,0.071145184,4.871276e-06,2.8748416e-05,0.0026,...,0.0286,-0.0099909175,-0.18122977,0.39761904,0.4785894,-0.23319326,0.9471891,1.5232877,0.7376093,-0.031073447
2646,3532,8,0.033,0.0396,0.0639,0.0642,0.1073031,5.6069043e-06,4.2647654e-05,0.0026,...,0.034199998,0.011600894,-0.09090909,0.32098764,0.43303573,-0.23699422,0.9953271,1.6136364,0.85271317,-0.0114942575


In [71]:
# joined_training_data = pd.concat([postcard_suva_df, df2, df3], axis=0, ignore_index=True) # once we have more dataframes this code can be used to concatenate them

<font color='blue'>Step 2.2. Train the ML model</font>

In [72]:
# The classes are the first column
classes = np.array(postcard_suva_df)[:, 1]

# The observation data is everything after the second column
observations = np.array(postcard_suva_df)[:, 2:]

# Create a model...
classifier = RandomForestClassifier()

# ...and fit it to the data
model = classifier.fit(observations, classes)

In [76]:
# Convert to a stacked array of observations
# stacked_arrays = stacked_arrays.squeeze()
stacked_arrays = suva_postcard.stack(dims=["y", "x"]).transpose()

# Predict the classes
predicted = model.predict(stacked_arrays)

# Reshape back to the original 2D array
array = predicted.reshape(len(suva_postcard.y), len(suva_postcard.x))

# Convert to an xarray again, because it's easier to work with
predicted_da = xr.DataArray(
    array, coords={"y": suva_postcard.y, "x": suva_postcards.x}, dims=["y", "x"]
)

In [None]:
print(predicted_da.dtype)  # Check the dtype of your DataArray
predicted_da = predicted_da.astype('float32')  # Convert to float32

# Check for NaN values
if np.isnan(predicted_da).any():
    print("NaN values found in the data")
    # Handle NaN values, e.g. by filling them
    predicted_da = predicted_da.fillna(0)  # Replace NaN with 0 or appropriate value

In [None]:
# Suppose 'model' is your trained model object
joblib.dump(model, "models/model-geomad-v2-rf-25022025.model")
