<img src="https://github.com/nicholasmetherall/digital-earth-pacific-macblue-activities/blob/main/attachments/images/DE_Pacific_banner.JPG?raw=true" width="900"/>

Figure 1.1.a. Jupyter environment + Python notebooks

# Digital Earth Pacific Notebook 1 prepare postcard and load data to csv

The objective of this notebook is to prepare a geomad postcard for your AOI (masking, scaling and loading additional band ratios and spectral indices) and sampling all the datasets into a csv based on your training data geodataframe.

## Step 1.1: Configure the environment

In [37]:
from datetime import datetime
from shapely.geometry import Polygon
from shapely import box
from pyproj import CRS 
import folium
import geopandas as gpd
import numpy as np
import pandas as pd
import rasterio as rio
import xarray as xr
import rioxarray
from ipyleaflet import basemaps
from numpy.lib.stride_tricks import sliding_window_view
import pystac_client
import planetary_computer
from odc.stac import load
from pystac.client import Client
from skimage.feature import graycomatrix, graycoprops
from utils import scale, do_prediction, calculate_band_indices, apply_masks, threshold_calc_land, threshold_calc_ds

In [38]:
# Predefined variable for title and version

# Enter your initials
initials = "agl"

# Enter your site name
site = "bootless"

# Date
date = datetime.now()

# Make a clean version string
version = f"{initials}-{site}-{date.strftime('%d%m%Y')}"
print(version)

agl-bootless-09072025


## Step 1.2: Configure STAC access and search parameters

In [39]:
catalog = "https://stac.digitalearthpacific.org"
client = Client.open(catalog)

mspc_catalogue = "https://planetarycomputer.microsoft.com/api/stac/v1/"
dem_collection = "cop-dem-glo-30"

In [41]:
# Use bounding box

min_lon = 147.23859958846435
min_lat = -9.544064327324529
max_lon = 147.31182409906984
max_lat = -9.477718193563604

bbox = (min_lon, min_lat, max_lon, max_lat)

In [30]:
dem_stac_client = pystac_client.Client.open(mspc_catalogue)

search = dem_stac_client.search(
    collections=[dem_collection],
    bbox=bbox,
    max_items=1
)

item = next(search.get_items())
print(f"STAC item ID: {item.id}")

# Get signed asset URL
asset_href = planetary_computer.sign(item.assets["data"].href)
print(asset_href)

# # Mask elevation:
bbox_polygon = box(*bbox)
dem = rioxarray.open_rasterio(asset_href).squeeze()
dem = dem.rio.clip([bbox_polygon], crs="EPSG:4326", drop=True)
elevation_threshold: float = 10.0
masked = dem.where(dem >= elevation_threshold)
masked = masked.compute()





STAC item ID: Copernicus_DSM_COG_10_S10_00_E147_00_DEM
https://elevationeuwest.blob.core.windows.net/copernicus-dem/COP30_hh/Copernicus_DSM_COG_10_S10_00_E147_00_DEM.tif?st=2025-07-08T00%3A59%3A09Z&se=2025-07-09T01%3A44%3A09Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2025-07-08T18%3A18%3A29Z&ske=2025-07-15T18%3A18%3A29Z&sks=b&skv=2024-05-04&sig=p0tnsw0O0ywaZXkefcwFqG92SsyLadnLmkdQTFBXeWo%3D


NoDataInBounds: No data found in bounds.

In [None]:
# Cell 16 (after dem = rioxarray.open_rasterio(asset_href).squeeze())
print("\n--- DEBUGGING DEM (after loading) ---")
try:
    print(f"DEM CRS: {dem.rio.crs}")
    print(f"DEM Transform: {dem.rio.transform()}")
    print(f"DEM Shape: {dem.shape}")
    print(f"DEM Bounds: {dem.rio.bounds()}")
    print("--- Full xarray.DataArray representation of dem ---")
    print(dem)
except Exception as e:
    print(f"Could not get DEM geospatial properties: {e}")
print("--- END DEBUGGING DEM ---")

In [None]:
# Convert the bbox tuple into a shapely Box object
bbox_polygon = box(*bbox) # Unpack the tuple into arguments for shapely.geometry.box

# Now, clip using the shapely polygon
# Note: clip expects a list of geometries, so [bbox_polygon] is correct.
dem_clipped = dem.rio.clip([bbox_polygon], drop=True, crs="EPSG:3832") # Add crs for clarity if dem doesn't have it explicitly

In [None]:
# Ensure bbox_polygon is created before this clipping line
# Example (use your actual coordinates):
# bbox_coords_list = [min_lon, min_lat, max_lon, max_lat]
# min_lon, min_lat, max_lon, max_lat = bbox_coords_list
# bbox_polygon = Polygon([
#     (min_lon, min_lat),
#     (max_lon, min_lat),
#     (max_lon, max_lat),
#     (min_lon, max_lat),
#     (min_lon, min_lat)
# ])
# bbox_crs = "EPSG:3832"

# # Mask elevation:
# dem = rioxarray.open_rasterio(asset_href).squeeze()

# # Clip using the polygon, wrapped in a list, and its CRS
# dem_clipped = dem.rio.clip([bbox], crs="EPSG:3832", drop=True) # <-- Corrected line: [bbox_polygon]

elevation_threshold: float = 10.0
masked = dem_clipped.where(dem_clipped <= elevation_threshold)

In [None]:
print(f"Masked CRS: {masked.rio.crs}")
print(f"Masked Transform: {masked.rio.transform()}")
print(f"Masked Shape: {masked.shape}")
print(f"Masked Bounds: {masked.rio.bounds()}")
print(f"Number of non-NaN values in masked: {np.count_nonzero(~np.isnan(masked.values))}")

In [None]:
print(masked)

In [None]:
import numpy as np # Make sure numpy is imported if it's not already

# ... Your existing code up to masked = dem_clipped.where(dem_clipped >= elevation_threshold) ...

# --- START OF FIX ---
# Get the correct bounds from rioxarray (which you confirmed are valid)
bounds = masked.rio.bounds() # (min_x, min_y, max_x, max_y)

# Reconstruct x and y coordinates based on bounds and shape
# x-coordinates should go from min_x to max_x, spaced evenly across the width
x_coords_fixed = np.linspace(bounds[0], bounds[2], masked.shape[1])

# y-coordinates typically go from max_y to min_y for top-left origin (common in rasters),
# spaced evenly across the height.
y_coords_fixed = np.linspace(bounds[3], bounds[1], masked.shape[0])

# Create a new DataArray with the corrected x and y coordinates
# Use .copy() to preserve the original data, dimensions, and other attributes
masked_fixed_coords = masked.copy()

# Assign the fixed coordinates as dimension coordinates
# The (('x',), x_coords_fixed) syntax ensures they are linked to the 'x' dimension
masked_fixed_coords['x'] = (('x',), x_coords_fixed)
masked_fixed_coords['y'] = (('y',), y_coords_fixed)

# Ensure rioxarray still recognizes its spatial context.
# While assign_coords often preserves this, explicit setting is safer.
masked_fixed_coords.rio.write_crs(masked.rio.crs, inplace=True)
masked_fixed_coords.rio.write_transform(masked.rio.transform(), inplace=True)
# --- END OF FIX ---

# Now, try to explore with the fixed DataArray
# Since your array is (y: 157, x: 84), it's a 2D array.
# The `band` is a non-dimension coordinate, so you should use .odc.explore() directly.
elevation_mask = ~np.isnan(masked_fixed_coords)

elevation_mask.odc.explore()
print(elevation_mask.rio.crs)

In [None]:
datetime = "2024"

items = client.search(
    collections=["dep_s2_geomad"],
    datetime=datetime,
    bbox=bbox
).item_collection()

print(f"Found {len(items)} items in for {datetime}")

In [None]:
data = load(
    items,
    measurements=[
        "nir",
        "red",
        "blue",
        "green",
        "emad",
        "smad",
        "bcmad",
        "count",
        "green",
        "nir08",
        "nir09",
        "swir16",
        "swir22",
        "coastal",
        "rededge1",
        "rededge2",
        "rededge3",
    ],
    crs="EPSG:3832",
    bbox=bbox,
    chunks={"x": 2048, "y": 2048},
    groupby="solar_day",
)

scaled = (data.where(data != 0) * 0.0001).clip(0, 1)

# Load into memory
scaled = scaled.compute().squeeze()

In [None]:
# Incorporate band ratios and indices

# Modified Normalised Difference Water Index (MNDWI)
scaled["mndwi"] = (scaled["green"] - scaled["swir16"]) / (scaled["green"] + scaled["swir16"])

# Normalised Difference Turbidity Index (NDTI)
scaled["ndti"] = (scaled["red"] - scaled["green"]) / (scaled["red"] + scaled["green"])

# coastal aerosol index
scaled["cai"] = (scaled["coastal"] - scaled["blue"]) / (
    scaled["coastal"] + scaled["blue"]
)
# vegetation index (NDVI)
scaled["ndvi"] = (scaled["nir"] - scaled["red"]) / (
    scaled["nir"] + scaled["red"]
)
# enhanced vegetation index
scaled["evi"] = (2.5 * scaled["nir"] - scaled["red"]) / (
    scaled["nir"] + (6 * scaled["red"]) - (7.5 * scaled["blue"]) + 1
)
# soil adjusted vegetation index
scaled["savi"] = (scaled["nir"] - scaled["red"]) / (
    scaled["nir"] + scaled["red"]
)
# water index (NDWI)
scaled["ndwi"] = (
    (scaled["green"] - scaled["nir"])
    / (scaled["green"] + scaled["nir"] + 0.428)
    * (1 + 0.428)
)
# blue to green ratio
scaled["b_g"] = (scaled["blue"]) / (scaled["green"])
# blue to red ratio
scaled["b_r"] = (scaled["blue"]) / (scaled["red"])
# max chlorophlyll index (MCI)
scaled["mci"] = (scaled["nir"]) / (scaled["rededge1"])
# normalised difference chlorophyll index (NDCI)
scaled["ndci"] = (scaled["rededge1"] - scaled["red"]) / (
    scaled["rededge1"] + scaled["red"]
)
# Natural log of blue/green
scaled["ln_bg"] = np.log(scaled.blue / scaled.green)

In [None]:
scaled

In [None]:
# Explore the site we are working on
scaled.odc.explore(vmin=0, vmax=0.3, bands=["red", "green", "blue"], crs="EPSG:3832", name=site)

In [88]:
print("--- DEBUGGING SCALED DATASET ---")

# Print the full Dataset representation first to see its structure and available bands
print("--- Full xarray.Dataset representation of scaled ---")
print(scaled)

# Now, we need to pick a single band from the Dataset to check its individual DataArray properties
# Let's check the 'red' band as an example.
if "red" in scaled.data_vars:
    sample_band = scaled["red"]
    print(f"\n--- Debugging 'red' band (as a DataArray) ---")
    print(f"Red Band CRS: {sample_band.rio.crs}")
    print(f"Red Band Transform: {sample_band.rio.transform()}")
    print(f"Red Band Shape: {sample_band.shape}") # This should now work
    print(f"Red Band Bounds: {sample_band.rio.bounds()}")
    print(f"Number of non-NaN values in red band: {np.count_nonzero(~np.isnan(sample_band.values))}")
    print("--- End Debugging 'red' band ---")
else:
    print("\nError: 'red' band not found in scaled Dataset. Please check available bands.")
    print(f"Available data variables: {list(scaled.data_vars.keys())}")


print("--- END DEBUGGING SCALED DATASET ---")


# --- SOLUTION FOR PLOTTING ---

# 1. Select the desired RGB bands from the Dataset
# This will still be an xarray.Dataset, but only containing 'red', 'green', 'blue' data variables
rgb_dataset = scaled[["red", "green", "blue"]]

# 2. Convert this Dataset into a single xarray.DataArray with a 'band' dimension
# The .to_array() method converts data variables into a new dimension (named 'band' here).
# It will automatically order them by the band names you selected.
rgb_dataarray = rgb_dataset.to_array(dim="band")

# Verify the structure of the new DataArray before plotting
print("\n--- Debugging rgb_dataarray for explore ---")
print(f"rgb_dataarray Shape: {rgb_dataarray.shape}")
print(f"rgb_dataarray Dimensions: {rgb_dataarray.dims}")
print(rgb_dataarray) # Full xarray representation
print("--- End Debugging rgb_dataarray ---")

# 3. Now, call .explore() on this new multi-band DataArray
# The `bands` parameter in .explore() will now refer to the values in the 'band' dimension.
# Make sure "red", "green", "blue" actually exist as values in the 'band' dimension of rgb_dataarray.
rgb_dataarray.odc.explore(vmin=0, vmax=0.3, bands=["red", "green", "blue"], crs="EPSG:3832", name=site)

--- DEBUGGING SCALED DATASET ---
--- Full xarray.Dataset representation of scaled ---
<xarray.Dataset> Size: 68MB
Dimensions:      (y: 745, x: 816)
Coordinates:
  * y            (y) float64 6kB -1.053e+06 -1.053e+06 ... -1.06e+06 -1.06e+06
  * x            (x) float64 7kB -3.074e+05 -3.074e+05 ... -2.993e+05 -2.992e+05
    spatial_ref  int32 4B 3832
    time         datetime64[ns] 8B 2024-01-01
Data variables: (12/28)
    nir          (y, x) float32 2MB 0.1571 0.1784 0.2099 ... 0.049 0.0489 0.0489
    red          (y, x) float32 2MB 0.1535 0.168 0.1917 ... 0.0543 0.0547 0.0545
    blue         (y, x) float32 2MB 0.1105 0.1247 0.1386 ... 0.0719 0.0701 0.07
    green        (y, x) float32 2MB 0.1401 0.1565 0.1734 ... 0.0671 0.0668
    emad         (y, x) float32 2MB 0.05177 0.05221 0.05399 ... 0.1333 0.1327
    smad         (y, x) float32 2MB 8.047e-08 8.855e-08 ... 1.529e-06 1.487e-06
    ...           ...
    ndwi         (y, x) float32 2MB -0.03347 -0.04099 ... 0.04777 0.04701
    b_g

  self._dims = self._parse_dimensions(dims)


ValueError: Only 2D single-band (x, y) or 3D multi-band (x, y, band) arrays are supported by `.explore()`. Please reduce the dimensions in your array, for example by using `.isel()` or `.sel()`: `da.isel(time=0).odc.explore()`.

In [89]:
## Moderate land mask
MNDWI_THRESHOLD = -0.2
mndwi_mask = scaled.mndwi > MNDWI_THRESHOLD

mndwi_mask.odc.explore()

In [90]:
ln_bg = scaled['ln_bg'].values.flatten()  # flatten in case it's multi-dimensional
# Remove NaNs if present
ln_bg = ln_bg[~np.isnan(ln_bg)]
# Calculate mean and std
mean = ln_bg.mean()
std = ln_bg.std()


In [91]:
# Deep ocean mask
ln_bg_mask = scaled["ln_bg"] < 0

ln_bg_mask.odc.explore()

In [92]:
import numpy as np # Ensure numpy is imported

print("\n--- DEBUGGING ALL_MASKS DATAARRAY ---")
print(f"all_masks CRS: {all_masks.rio.crs}")
print(f"all_masks Transform: {all_masks.rio.transform()}")
print(f"all_masks Shape: {all_masks.shape}")
print(f"all_masks Bounds: {all_masks.rio.bounds()}")
print(f"Number of True values in all_masks: {np.count_nonzero(all_masks.values)}")
print("--- Full xarray.DataArray representation of all_masks ---")
print(all_masks) # This will print the full xarray DataArray representation
print("--- END DEBUGGING ALL_MASKS DATAARRAY ---")


--- DEBUGGING ALL_MASKS DATAARRAY ---
all_masks CRS: EPSG:3832


NoDataInBounds: Unable to determine bounds from coordinates.

In [93]:
print(all_masks)
print(mndwi_mask.rio.crs)
print(ln_bg_mask.rio.crs)

<xarray.DataArray (y: 0, x: 0)> Size: 0B
array([], shape=(0, 0), dtype=bool)
Coordinates:
  * y            (y) float64 0B 
  * x            (x) float64 0B 
    time         datetime64[ns] 8B 2024-01-01
    band         int64 8B 1
    spatial_ref  int64 8B 0
EPSG:3832
EPSG:3832


In [95]:
import numpy as np

print("\n--- DEBUGGING ALL_MASKS DATAARRAY ---")
print(f"all_masks CRS: {all_masks.rio.crs}")
print(f"all_masks Transform: {all_masks.rio.transform()}")
print(f"all_masks Shape: {all_masks.shape}")
print(f"all_masks Bounds: {all_masks.rio.bounds()}")
print(f"Number of True values in all_masks: {np.count_nonzero(all_masks.values)}")
print("--- Full xarray.DataArray representation of all_masks ---")
print(all_masks)
print("--- END DEBUGGING ALL_MASKS DATAARRAY ---")


--- DEBUGGING ALL_MASKS DATAARRAY ---
all_masks CRS: EPSG:3832


NoDataInBounds: Unable to determine bounds from coordinates.

In [94]:
all_masks = mndwi_mask & ln_bg_mask & elevation_mask
all_masks = all_masks.rio.write_crs("EPSG:3832") # Use .rio.reproject

centroid = scaled.odc.geobox.geographic_extent.centroid.coords[0][::-1]
m = folium.Map(location=centroid, zoom_start=14)

scaled.odc.to_rgba(bands=["red", "green", "blue"], vmin=0, vmax=0.3).odc.add_to(m, name="RGB")
mndwi_mask.where(mndwi_mask == 0).odc.add_to(m, name="MNDWI Mask", vmin=0, vmax=1)
# ndti_mask.where(ndti_mask == 0).odc.add_to(m, name="NDTI Mask", vmin=0, vmax=1)
ln_bg_mask.where(ln_bg_mask == 0).odc.add_to(m, name="ln_bg Mask", vmin=0, vmax=1)
# all_masks.where(all_masks == 0).odc.add_to(m, name="All Masks", vmin=0, vmax=1)
all_masks.odc.add_to(m, name="All Masks", vmin=0, vmax=1) 

folium.LayerControl().add_to(m)

m

AssertionError: 

In [None]:
# Now apply the mask, and view the masked region

masked = scaled.where(all_masks)
masked.odc.explore(
    vmin=0, vmax=0.3, bands=["red", "green", "blue"], name=f"{site}-masked", tiles=basemaps.Esri.WorldImagery
)

### GLCM texture analysis

The objective of this notebook was to train the machine learning model that will allow us to classify an area with land cover classes defined through the training data.

Step 1.2. Input the training data to sample geomad data from the postcard

In [15]:
WINDOW_SIZE = 9
LEVELS = 32

# Input
max = masked.blue.max().values
min = masked.blue.min().values
# Scale to 0-LEVELS for GLCM
img = ((masked.blue - min) / (max - min) * (LEVELS - 1)).clip(0, LEVELS - 1).values.astype(np.uint8)

# Extract overlapping windows
patches = sliding_window_view(img, (WINDOW_SIZE, WINDOW_SIZE))
# Shape: (rows, cols, win_y, win_x)

# Your patch function
def glcm_features(patch):
    glcm = graycomatrix(
        patch,
        distances=[1],
        angles=[0],
        levels=LEVELS,
        symmetric=True,
        normed=True
    )
    out = np.empty(7, dtype=np.float32)
    out[0] = graycoprops(glcm, "contrast")[0, 0]
    out[1] = graycoprops(glcm, "homogeneity")[0, 0]
    out[2] = graycoprops(glcm, "energy")[0, 0]
    out[3] = graycoprops(glcm, "ASM")[0, 0]
    out[4] = graycoprops(glcm, "correlation")[0, 0]
    out[5] = graycoprops(glcm, "mean")[0, 0]

            
            # glcm_p = glcm[:, :, 0, 0]
            # entropy[i, j] = -np.sum(glcm_p * np.log2(glcm_p + 1e-10))
    
    glcm_p = glcm[:, :, 0, 0]
    out[6] = -np.sum(glcm_p * np.log2(glcm_p + 1e-10))
    return out

# Use apply_ufunc to vectorize over (row, col) dimensions
result = xr.apply_ufunc(
    glcm_features,
    xr.DataArray(patches, dims=["y", "x", "win_y", "win_x"]),
    input_core_dims=[["win_y", "win_x"]],
    output_core_dims=[["feature"]],
    vectorize=True,
    dask="parallelized",
    output_dtypes=[np.float32]
)

# Add coordinates & names
pad = WINDOW_SIZE - 1
result = result.assign_coords({
    "y": masked.y[: -pad],
    "x": masked.x[: -pad],
    "feature": ["contrast", "homogeneity", "energy", "ASM", "correlation", "mean", "entropy"]
})

result_bands = result.to_dataset(dim="feature")

# Combine with original
masked_plus = masked.copy()
masked_plus = masked_plus.assign(result_bands)

masked_plus

  img = ((masked.blue - min) / (max - min) * (LEVELS - 1)).clip(0, LEVELS - 1).values.astype(np.uint8)


In [16]:
# Re-apply the mask
masked_plus = masked_plus.where(all_masks)

In [17]:
masked_plus.correlation.odc.explore()

### Postcard csv

The objective of this notebook was to train the machine learning model that will allow us to classify an area with land cover classes defined through the training data.

Step 1.2. Input the training data to sample geomad data from the postcard

In [18]:
# Reproject training data to the GeoMAD CRS and convert to xarray
training_reprojected = training.to_crs(masked_plus.odc.crs)
training_da = training_reprojected.assign(
    x=training_reprojected.geometry.x, y=training_reprojected.geometry.y
).to_xarray()

# Extract training values from the masked dataset
training_values = (
    masked_plus.sel(training_da[["x", "y"]], method="nearest")
    .squeeze()
    .compute()
    .to_pandas()
)
training_values

Unnamed: 0_level_0,nir,red,blue,green,emad,smad,bcmad,count,nir08,nir09,...,homogeneity,energy,ASM,correlation,mean,entropy,y,x,spatial_ref,time
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0602,0.0742,0.0926,0.1005,0.152102,0.000002,0.000034,0.0031,0.0637,0.0988,...,0.888889,0.575007,0.330633,0.552448,3.541667,1.757751,-1057915.0,-302095.0,3832,2024-01-01
1,0.0598,0.0750,0.0989,0.1071,0.161220,0.000003,0.000034,0.0031,0.0648,0.0689,...,0.868056,0.661656,0.437789,0.282831,3.756944,1.547091,-1057915.0,-302075.0,3832,2024-01-01
2,0.0597,0.0750,0.0949,0.1040,0.161838,0.000003,0.000034,0.0031,0.0667,0.0682,...,0.951389,0.839043,0.703993,0.533117,3.881944,0.910556,-1057945.0,-302075.0,3832,2024-01-01
3,0.0597,0.0752,0.0952,0.1040,0.158307,0.000002,0.000034,0.0031,0.0662,0.0687,...,0.895833,0.636696,0.405382,0.515152,3.687500,1.604869,-1057925.0,-302085.0,3832,2024-01-01
4,0.0580,0.0723,0.0995,0.1074,0.159002,0.000003,0.000034,0.0031,0.0638,0.0677,...,0.875000,0.694722,0.482639,0.242105,3.791667,1.438722,-1057915.0,-302065.0,3832,2024-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
815,,,,,,,,,,,...,,,,,,,-1059575.0,-303115.0,3832,2024-01-01
816,,,,,,,,,,,...,,,,,,,-1059535.0,-303115.0,3832,2024-01-01
817,,,,,,,,,,,...,,,,,,,-1059485.0,-303115.0,3832,2024-01-01
818,,,,,,,,,,,...,,,,,,,-1059435.0,-303135.0,3832,2024-01-01


In [19]:
# Join the training data with the extracted values and remove unnecessary columns
training_array = pd.concat([training["cc_id"], training_values], axis=1)

# Drop rows where there was no data available
training_array = training_array.dropna()

# Preview our resulting training array
training_array.head()

Unnamed: 0,cc_id,nir,red,blue,green,emad,smad,bcmad,count,nir08,...,homogeneity,energy,ASM,correlation,mean,entropy,y,x,spatial_ref,time
0,4,0.0602,0.0742,0.0926,0.1005,0.152102,2e-06,3.4e-05,0.0031,0.0637,...,0.888889,0.575007,0.330633,0.552448,3.541667,1.757751,-1057915.0,-302095.0,3832,2024-01-01
1,4,0.0598,0.075,0.0989,0.1071,0.16122,3e-06,3.4e-05,0.0031,0.0648,...,0.868056,0.661656,0.437789,0.282831,3.756944,1.547091,-1057915.0,-302075.0,3832,2024-01-01
2,4,0.0597,0.075,0.0949,0.104,0.161838,3e-06,3.4e-05,0.0031,0.0667,...,0.951389,0.839043,0.703993,0.533117,3.881944,0.910556,-1057945.0,-302075.0,3832,2024-01-01
3,4,0.0597,0.0752,0.0952,0.104,0.158307,2e-06,3.4e-05,0.0031,0.0662,...,0.895833,0.636696,0.405382,0.515152,3.6875,1.604869,-1057925.0,-302085.0,3832,2024-01-01
4,4,0.058,0.0723,0.0995,0.1074,0.159002,3e-06,3.4e-05,0.0031,0.0638,...,0.875,0.694722,0.482639,0.242105,3.791667,1.438722,-1057915.0,-302065.0,3832,2024-01-01


In [20]:
# Write the training data to a CSV file
training_array.to_csv(f"training-data/{version}-training.csv", index=False)