In [14]:
import geopandas as gpd
import joblib
import numpy as np
import odc.geo  # noqa: F401
import pandas as pd
from shapely import geometry
from sklearn.ensemble import RandomForestClassifier
import matplotlib as plt

import xarray as xr

import rioxarray

from utils import load_data

In [15]:
%reload_ext autoreload
%autoreload 2

## Find and load S2 data

Load data and set up your array to use for prediction

In [16]:
# Configure some things up front
chunks = dict(x=256, y=256)
datetime = "2023"

bbox = [177.14, -18.41, 179.80, -16.01]
bbox_geometry = geometry.box(*bbox)

gdf = gpd.GeoDataFrame({'geometry': [bbox_geometry]}, crs='EPSG:4326')
gdf.explore()

In [17]:
merged = load_data(bbox, chunks=dict(x=256, y=256), datetime=datetime, resolution=10)
merged

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 7 graph layers,12644 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 7 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 7 graph layers,12644 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 7 graph layers,12644 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 7 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 7 graph layers,12644 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 14 graph layers,12644 chunks in 14 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 14 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 14 graph layers,12644 chunks in 14 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 8 graph layers,12644 chunks in 8 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 8 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 8 graph layers,12644 chunks in 8 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 13 graph layers,12644 chunks in 13 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 13 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 13 graph layers,12644 chunks in 13 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 7 graph layers,12644 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 7 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 7 graph layers,12644 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 7 graph layers,12644 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 7 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 7 graph layers,12644 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 4 graph layers,12644 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 4 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 4 graph layers,12644 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 2 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 2 graph layers,12644 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 5 graph layers,12644 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.07 GiB 256.00 kiB Shape (27801, 29612) (256, 256) Dask graph 12644 chunks in 5 graph layers Data type float32 numpy.ndarray",29612  27801,

Unnamed: 0,Array,Chunk
Bytes,3.07 GiB,256.00 kiB
Shape,"(27801, 29612)","(256, 256)"
Dask graph,12644 chunks in 5 graph layers,12644 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## Train and predict

When you change your training data, you can re-train and predict here.

In [18]:
training_file = "training_data/fj_lulc_data_points_ce.gpkg"

tdata = gpd.read_file(training_file, bbox=bbox_geometry)
tdata.explore()

In [19]:
# Count the values in a specific column (e.g., "lulc_class" column)
class_counts = tdata['Class'].value_counts()
code_counts = tdata['ClassId'].value_counts()

# Print the counts
print(class_counts, code_counts)

Class
Forest        4832
Grassland     2073
Cropland       851
Settlement     165
Bare_Other      36
Mangroves        9
Name: count, dtype: int64 ClassId
2    4832
3    2073
1     851
4     165
7      36
5       9
Name: count, dtype: int64


In [20]:
type(merged)

xarray.core.dataset.Dataset

In [21]:
tdata

Unnamed: 0,Class,SubClass,ClassId,geometry
0,Cropland,ShiftSubsistence,1,POINT (177.52552 -18.12415)
1,Forest,NatForest,2,POINT (177.52556 -18.13635)
2,Grassland,Herbland,3,POINT (177.52559 -18.14855)
3,Grassland,Herbland,3,POINT (177.53828 -18.12412)
4,Grassland,Herbland,3,POINT (177.53832 -18.13632)
...,...,...,...,...
7961,Forest,NatForest,2,POINT (179.77301 -16.59321)
7962,Forest,NatForest,2,POINT (179.78547 -16.58085)
7963,Forest,NatForest,2,POINT (179.78565 -16.59304)
7964,Forest,NatForest,2,POINT (179.79811 -16.58068)


In [22]:
# # First attempt from depal (Sachin) - to sample both x,y from gdf (tdata) and data variables from xarray (merged)
# def add_image_values(pts: gpd.GeoDataFrame, image: merged.DataArray) -> gpd.GeoDataFrame:
#     # """Add the values of the image at each point location to the input GeoDataFrame"""
#     # Get values for each of the image bands at each of the points.
#     pts = tdata.odc.crs(merged.rio.crs)
#     y = merged.DataArray(pts.geometry.y, dims="points")
#     pt_values_i = image.sel(x=x, y=y, method="nearest")
#     return pd.concat([pts, pt_values_i.squeeze().to_pandas().transpose()], axis=1)

In [23]:
# # Second attempt (Nick) - to sample both x,y from gdf (tdata) and data variables from xarray (merged)

# def add_image_values(pts, image):
#     # Get x, y coordinates of the points
#     x = tdata.geometry.x
#     y = tdata.geometry.y
    
#     # Select the nearest pixel values from the image for each point
#     pixel_values = merged.sel(x=x, y=y, method="nearest")
    
#     # Convert the xarray DataArray to a pandas DataFrame and transpose it
#     pixel_values_df = pixel_values.to_dataframe().transpose()
    
#     # Concatenate the pixel values DataFrame with the GeoDataFrame
#     pts_with_values = pd.concat([pts, pixel_values_df], axis=1)
    
#     return pts_with_values

# # Call the function with your GeoDataFrame (tdata) and xarray Dataset (image)
# result = add_image_values(tdata, merged)

In [24]:
# Get values for each of the image bands at each of the points.
pts_proj = tdata.to_crs(merged.odc.crs)

# a DataArray with x & y coords
pts_da = pts_proj.assign(x=pts_proj.geometry.x, y=pts_proj.geometry.y).to_xarray()

# a dataframe or series (for a single point)
pt_values_i = (
    merged.sel(pts_da[["x", "y"]], method="nearest").squeeze().compute().to_pandas()
)

if isinstance(pt_values_i, pd.Series):
    pt_values_i = pt_values_i.to_frame().transpose()
    pt_values_i.index = tdata.index

In [25]:
# Print all columns
print(pt_values_i.columns)

# Or, if you want to list all columns
columns_list = pt_values_i.columns.tolist()
print(columns_list)

Index(['y', 'x', 'spatial_ref', 'time', 'B02', 'B03', 'B04', 'B05', 'B06',
       'B07', 'B08', 'B8A', 'B11', 'B12', 'emad', 'bcmad', 'smad', 'ndvi',
       'mndwi', 'evi', 'savi', 'bsi', 'ndmi', 'ndbi', 'elevation', 'mean_vv',
       'mean_vh', 'mean_vv_vh'],
      dtype='object')
['y', 'x', 'spatial_ref', 'time', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B11', 'B12', 'emad', 'bcmad', 'smad', 'ndvi', 'mndwi', 'evi', 'savi', 'bsi', 'ndmi', 'ndbi', 'elevation', 'mean_vv', 'mean_vh', 'mean_vv_vh']


In [26]:
# Count the values in a specific column (e.g., "lulc_class" column)
value_counts = pt_values_i[''].value_counts()

# Print the counts
print(value_counts)

KeyError: ''

In [27]:
training_array = pd.concat([tdata, pt_values_i], axis=1).to_crs(4326)
# training_array = training_array.drop(
#     columns=[
#         "fid",
#         "Class",
#         "geometry",
#         "y",
#         "x",
#         "spatial_ref",
#         "time",
#     ]
# )

sorted_values = pt_values_i.sort_index(axis=1)

training_array = pd.concat([tdata["ClassId"], sorted_values], axis=1)

training_array = training_array.drop(columns=['time','x','y','spatial_ref'])


# Drop rows where there are any NaNs
training_array = training_array.dropna()


training_array.head()

Unnamed: 0,ClassId,B02,B03,B04,B05,B06,B07,B08,B11,B12,...,evi,mean_vh,mean_vv,mean_vv_vh,mndwi,ndbi,ndmi,ndvi,savi,smad
0,1,368.0,582.0,405.0,976.0,2490.0,3022.0,3001.0,1876.0,951.0,...,17334792.0,0.135326,0.457941,3.383984,-0.240705,0.436815,0.230675,0.762184,0.763475,0.005044
1,2,317.0,556.0,331.0,965.0,3484.0,4503.0,4622.0,1781.0,777.0,...,45382688.0,0.045692,0.211956,4.638844,-0.165791,0.566195,0.443698,0.866344,0.86292,0.001658
2,3,368.0,692.0,451.0,1210.0,2908.0,3390.0,3337.0,2241.0,1144.0,...,23686844.0,0.034474,0.167475,4.857959,-0.246187,0.412336,0.196486,0.76188,0.765016,0.004156
3,3,318.0,649.0,307.0,1076.0,3187.0,3818.0,3814.0,1814.0,801.0,...,28678492.0,0.057375,0.260329,4.537351,-0.104828,0.495191,0.355366,0.851007,0.850997,0.000696
4,3,350.0,717.0,356.0,1208.0,3254.0,3845.0,4125.0,2176.0,1053.0,...,34260208.0,0.033117,0.115324,3.482363,-0.189831,0.458539,0.309316,0.841107,0.830368,0.000778


In [28]:
classifier = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_leaf=10,
    n_jobs=-1,
    random_state=42,
)

training_data = np.array(training_array)[:, 1:]
classes = np.array(training_array)[:, 0]

model = classifier.fit(training_data, classes)


In [29]:
joblib.dump(model, "NFI_model_ce.dump")

['NFI_model_ce.dump']

In [None]:
classes

In [None]:
# Print feature importances against column headings
fields_importances = sorted(
    zip(training_array.columns[1:], classifier.feature_importances_),
    key=lambda x: x[1],
    reverse=True,
)

for i in fields_importances:
    # Format as a table to 2 decinal places
    print(f"{i[0]:<11}| {i[1]:.2f}")
