In [1]:
%matplotlib inline
import os
import datacube
import warnings
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import rioxarray
from odc.io.cgroups import get_cpu_quota
from odc.algo import xr_geomedian
from deafrica_tools.datahandling import load_ard
from deafrica_tools.bandindices import calculate_indices
from deafrica_tools.classification import collect_training_data
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from rasterio.enums import Resampling
from random_sampling import random_sampling # adapted from function by Chad Burton: https://gist.github.com/cbur24/04760d645aa123a3b1817b07786e7d9f

ncpus=round(get_cpu_quota())
print('ncpus = '+str(ncpus))

# file paths and attributes
# traning_points_path = 'Data/trainning_samples_FNDS_II_SOM_2016.geojson'
traning_points_path = 'Results/stratified_random_training_points_lulc_2016_balanced.geojson'
# rf2017_path='Data/Landcover_map_ODC_Brazil_2015_2016.tif'
rf2017_path='Data/moz_lulc2016_28082019_final.tif'
tiles_shp='Data/Mozambique_tiles_biggest1.shp'
class_name = 'LC_Class_I' # class label in integer format
crs='epsg:32736' # WGS84/UTM Zone 36S

# Load reference land cover survey points and reproject
training_data2017= gpd.read_file(traning_points_path).to_crs(crs) # read training points as geopandas dataframe
training_data2017=training_data2017[[class_name,'geometry']] # select attributes
print('land cover survey points 2017:\n',training_data2017)

# get bounding boxes of tiles
tiles=gpd.read_file(tiles_shp).to_crs(crs)
tile_bboxes=tiles.bounds
print('tile boundaries for Mozambique: \n',tile_bboxes)

# load initial classification map
rf_2017_raster = xr.open_dataset(rf2017_path,engine="rasterio").astype(np.uint8).squeeze("band", drop=True)
# # reproject the raster
# rf_2017_raster= rf_2017_raster.rio.reproject(resolution=10, dst_crs=crs,resampling=Resampling.nearest)
rf_2017_raster=rf_2017_raster.band_data
print('Reference land cover classifcation raster:\n',rf_2017_raster) # note: 255 is nodata

  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)


ncpus = 31
land cover survey points 2017:
       LC_Class_I                         geometry
0             11   POINT (720926.179 7420038.062)
1             11   POINT (664556.179 7439898.062)
2             11   POINT (706736.179 7390518.062)
3             11   POINT (669446.179 7267068.062)
4             11  POINT (1271036.179 8536308.062)
...          ...                              ...
5196          62  POINT (1043126.179 8498568.062)
5197          21   POINT (710666.179 7871448.062)
5198          21   POINT (486476.179 7844088.062)
5199          21   POINT (801416.179 8518578.062)
5200          21   POINT (517916.179 7895478.062)

[5201 rows x 2 columns]
tile boundaries for Mozambique: 
            minx          miny          maxx          maxy
0  2.007347e+05  8.332509e+06  7.007347e+05  8.832509e+06
1  2.007347e+05  7.832519e+06  7.007347e+05  8.332519e+06
2  2.007347e+05  7.332529e+06  7.007347e+05  7.832529e+06
3  2.007347e+05  6.832539e+06  7.007347e+05  7.332539e+06
4  7.007

In [2]:
lc_classes=training_data2017[class_name].unique() # get class labels
print('land cover classes:\n',lc_classes)
n_samples=1000 # number of random samples to optimise number of clusters for kmeans
zonal_stats = None
scaler = StandardScaler() # standard scaler for input data standardisation
frequency_threshold=0.05 # threshold of cluter frequency
# fill_nan_value=-999 # value to replace nans in query results
measurements = ['blue','green','red','red_edge_1','red_edge_2', 'red_edge_3','nir_1','nir_2','swir_1','swir_2']
query = {
    'time': ('2021-01', '2021-12'),
    'measurements': measurements,
    'output_crs': crs,
    'resolution': (-10, 10)
}
# define a function to feature layers
def feature_layers(query): 
    #connect to the datacube
    dc = datacube.Datacube(app='feature_layers')
    ds = load_ard(dc=dc,
                  products=['s2_l2a'],
                  group_by='solar_day',
                  verbose=False,
#                   mask_filters=[("opening", 2)], # morphological opening by 2 pixels to remove small masked regions
                  **query)
    ds = calculate_indices(ds,
                           index=['NDVI'],
                           drop=False,
                           satellite_mission='s2')
    # interpolate nodata using mean of previous and next observation
#     ds=ds.interpolate_na(dim='time',method='linear',use_coordinate=False,fill_value='extrapolate')
#     ds=ds.interpolate_na(dim='time',method='linear',use_coordinate=False)
    # calculate geomedians within each two-month interval
    ds=ds.resample(time='2MS').map(xr_geomedian)
    # replace nan with a value so that the collect_training_data function will work
#     ds=ds.fillna(fill_nan_value)
    # stack multi-temporal measurements and rename them
    n_time=ds.dims['time']
    list_measurements=list(ds.keys())
    ds_stacked=None
    for j in range(len(list_measurements)):
        for k in range(n_time):
            variable_name=list_measurements[j]+'_'+str(k)
            # print ('Stacking band ',list_measurements[j],' at time ',k)
            measure_single=ds[list_measurements[j]].isel(time=k).rename(variable_name)
            if ds_stacked is None:
                ds_stacked=measure_single
            else:
                ds_stacked=xr.merge([ds_stacked,measure_single],compat='override')
    return ds_stacked

land cover classes:
 [11 21 31 44 51 61 62 70 71 72 73 77 33 12 79 74 42 75]


In [None]:
td2021_filtered=None # filtered training data
# filtering training data for each class
# for i in lc_classes[8:]:
for i in lc_classes:
    #i=1 # test for first class
    print('Processing class ',i)
    gpd_samples=None
    n_total=np.sum(rf_2017_raster.to_numpy()==i)
    # generate randomly sampled data to fit and optimise a kmeans clusterer
    for n in range(len(tile_bboxes)):
        print('stratified random sampling from tile ',n)
        da_mask=rf_2017_raster.rio.clip([tiles.iloc[n].geometry],crs=crs,drop=True)
        da_mask=da_mask.rio.reproject(dst_crs=crs,resampling=Resampling.nearest)
        n_samples_tile=n_samples*np.sum(da_mask.to_numpy()==i)/n_total
        if n_samples_tile>0:
            gpd_samples_tile=random_sampling(da_mask,n_samples_tile,sampling='manual',
                                             manual_class_ratios={str(i):n_samples_tile},out_fname=None)
            if gpd_samples is None:
                gpd_samples=gpd_samples_tile
            else:
                gpd_samples=pd.concat([gpd_samples,gpd_samples_tile])
    # get data array
#     da_mask=da_mask.where(da_mask==i,np.nan) # replace other class values as nan so they won't be sampled (comment due to large memory required)
#     gpd_samples=random_sampling(da_mask,n_samples,sampling='stratified_random',manual_class_ratios=None,out_fname=None)
#     gpd_samples=random_sampling(da_mask,n_samples,sampling='manual',manual_class_ratios={str(i):n_samples},out_fname=None)
    gpd_samples=gpd_samples.reset_index(drop=True).drop(columns=['spatial_ref','class']) # drop this attribute derived from random_sampling function
    gpd_samples[class_name]=i # add attribute field so that we can use collect_training_data function
    if gpd_samples.crs is None:
        gpd_samples=gpd_samples.set_crs(crs)
    print('radomly sampled points for class ',i,'\n',gpd_samples)
    # extract data for the random samples
    column_names, sampled_data = collect_training_data(gdf=gpd_samples,
                                                          dc_query=query,
                                                          ncpus=ncpus,
#                                                           ncpus=1,
                                                          field=class_name, 
                                                          zonal_stats=zonal_stats,
                                                          feature_func=feature_layers,
                                                          return_coords=False)
    # standardise features
    scaler=scaler.fit(sampled_data[:,1:])
    sampled_data=scaler.transform(sampled_data[:,1:])
#     sampled_data[:,-6:]=sampled_data[:,-6:]*10000
#     sampled_data=sampled_data[:,1:]
    # fit kmeans model using the sample training data
    # first find optimal number of clusters based on Calinski-Harabasz index
    highest_score=-999
    n_cluster_optimal=2
    kmeans_model_optimal=None # initialise optimal model parameters
    labels_optimal=None
    for n_cluster in range(2,10):
        kmeans_model = KMeans(n_clusters=n_cluster, random_state=1).fit(sampled_data)
        labels=kmeans_model.predict(sampled_data)
        score=metrics.calinski_harabasz_score(sampled_data, labels)
#         score=metrics.davies_bouldin_score(sampled_data, labels)
        print('Calinski-Harabasz score for ',n_cluster,' clusters is: ',score)
#         print('Davies-Bouldin score for ',n_cluster,' clusters is: ',score)
        if (highest_score==-999)or(highest_score<score):
#         if (highest_score==-999)or(highest_score>score):
            highest_score=score
            n_cluster_optimal=n_cluster
            kmeans_model_optimal=kmeans_model
            labels_optimal=labels
    print('Best number of clusters for class %s: %s'%(i,n_cluster_optimal))
    
    # subset original training points for this class
    td_single_class=training_data2017[training_data2017[class_name]==i].reset_index(drop=True)
    print('Number of training data collected: ',len(td_single_class))
    column_names, model_input = collect_training_data(gdf=td_single_class,
                                                      dc_query=query,
                                                      ncpus=ncpus,
                                                      field=class_name,
                                                      zonal_stats=zonal_stats,
                                                      feature_func=feature_layers,
                                                      clean=True,
                                                      return_coords=True)
    print('Number of training data after removing Nans and Infs: ',model_input.shape[0])
    # first covert the training data to pandas
    td_single_class_filtered=pd.DataFrame(data=model_input,columns=column_names)
    # then to geopandas dataframe
    td_single_class_filtered=gpd.GeoDataFrame(td_single_class_filtered, 
                                    geometry=gpd.points_from_xy(model_input[:,-2], model_input[:,-1],
                                                                crs=crs))
    # normalisation before clustering
    model_input=scaler.transform(model_input[:,1:-2])
#     model_input=model_input[:,1:-2]
#     model_input[:,-6:]=model_input[:,-6:]*10000
    # predict clustering labels
    labels_kmeans = kmeans_model_optimal.predict(model_input)
    # append clustering results to pixel coordinates
    td_single_class_filtered['cluster']=labels_kmeans
    # append frequency of each cluster
#     labels_optimal=pd.DataFrame(data=labels_optimal,columns=['cluster']) # calculate cluster frequencies of the random samples
#     cluster_frequency=td_single_class_filtered['cluster'].map(labels_optimal['cluster'].value_counts(normalize=True))
    cluster_frequency=td_single_class_filtered['cluster'].map(td_single_class_filtered['cluster'].value_counts(normalize=True))
    td_single_class_filtered['cluster_frequency']=cluster_frequency
#     print('filtered training data: \n',td_single_class_filtered[td_single_class_filtered['cluster_frequency']<frequency_threshold])
    # filter by cluster frequency
    td_single_class_filtered=td_single_class_filtered[td_single_class_filtered['cluster_frequency']>=frequency_threshold]
    print('Number of training data after filtering: ',len(td_single_class_filtered))
    # export filtered training data for this class as shapefile (will encounter 10-character limit for attributes)
#     td_single_class_filtered.to_file('Results/landcover_td2021_filtered_DEAfrica_new_class_'+str(i)+'.shp')
    # export filtered training data for this class as geojson file
#     td_single_class_filtered.to_file('Results/landcover_td2021_filtered_class_'+str(i)+'.geojson', driver="GeoJSON")
    td_single_class_filtered.to_file('Results/stratified_random_training_points_lulc_2016_balanced_2021_filtered_class_'+str(i)+'.geojson', driver="GeoJSON")
    # append the filtered training points of this class to final filtered training data
    if td2021_filtered is None:
        td2021_filtered=td_single_class_filtered
    else:
        td2021_filtered=pd.concat([td2021_filtered, td_single_class_filtered])
        
# save training data for all classes
print('filtered training data for 2021:\n',td2021_filtered)
# td2021_filtered.to_file('Results/landcover_td2021_filtered.geojson', driver="GeoJSON")
td2021_filtered.to_file('Results/stratified_random_training_points_lulc_2016_balanced_2021_filtered.geojson', driver="GeoJSON")

# export the filtered training data as txt file
# output_file = "Results/landcover_td2021_filtered.txt"
output_file = "Results/stratified_random_training_points_lulc_2016_balanced_2021_filtered.txt"
td2021_filtered.to_csv(output_file, header=True, index=None, sep=' ')

Processing class  11
stratified random sampling from tile  0
Class 11: sampled at 11 coordinates
stratified random sampling from tile  1
Class 11: sampled at 67 coordinates
stratified random sampling from tile  2
Class 11: sampled at 92 coordinates
stratified random sampling from tile  3
Class 11: sampled at 156 coordinates
stratified random sampling from tile  4
Class 11: sampled at 48 coordinates
stratified random sampling from tile  5
Class 11: sampled at 134 coordinates
stratified random sampling from tile  6
Class 11: sampled at 252 coordinates
stratified random sampling from tile  7
Class 11: sampled at 67 coordinates
stratified random sampling from tile  8
Class 11: sampled at 103 coordinates
stratified random sampling from tile  9
Class 11: sampled at 20 coordinates
radomly sampled points for class  11 
                             geometry  LC_Class_I
0     POINT (368539.879 8365205.739)          11
1     POINT (388830.519 8362651.267)          11
2     POINT (372487.700 83456

  0%|          | 0/950 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (950, 67)
Calinski-Harabasz score for  2  clusters is:  391.7765653104752
Calinski-Harabasz score for  3  clusters is:  292.1122840891573
Calinski-Harabasz score for  4  clusters is:  265.434145994096
Calinski-Harabasz score for  5  clusters is:  231.794589895595
Calinski-Harabasz score for  6  clusters is:  206.8545765972111
Calinski-Harabasz score for  7  clusters is:  187.70718047034285
Calinski-Harabasz score for  8  clusters is:  173.85138187398704
Calinski-Harabasz score for  9  clusters is:  161.14536354244004
Best number of clusters for class 11: 2
Number of training data collected:  171
Collecting training data in parallel mode


  0%|          | 0/171 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (171, 69)
Number of training data after removing Nans and Infs:  171
Number of training data after filtering:  171
Processing class  21
stratified random sampling from tile  0
stratified random sampling from tile  1
Class 21: sampled at 297 coordinates
stratified random sampling from tile  2
Class 21: sampled at 42 coordinates
stratified random sampling from tile  3
Class 21: sampled at 1 coordinates
stratified random sampling from tile  4
Class 21: sampled at 452 coordinates
stratified random sampling from tile  5
Class 21: sampled at 159 coordinates
stratified random sampling from tile  6
stratified random sampling from tile  7
stratified random sampling from tile  8
stratified random sampling from tile  9
radomly sampled points for class  21 
                            geometry  LC_Class_I
0    POINT (563282.602 7889069.308)          21
1    POINT (479279.172 7916448.419)          21
2

  0%|          | 0/951 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeom

Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (951, 67)
Calinski-Harabasz score for  2  clusters is:  422.4430433621877
Calinski-Harabasz score for  3  clusters is:  324.26643381835135
Calinski-Harabasz score for  4  clusters is:  258.77212769026323
Calinski-Harabasz score for  5  clusters is:  224.58334178364015
Calinski-Harabasz score for  6  clusters is:  196.40903717053035
Calinski-Harabasz score for  7  clusters is:  179.49160842363798
Calinski-Harabasz score for  8  clusters is:  166.79796373707924
Calinski-Harabasz score for  9  clusters is:  158.87797897011012
Best number of clusters for class 21: 2
Number of training data collected:  104
Collecting training data in parallel mode


  0%|          | 0/104 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (104, 69)
Number of training data after removing Nans and Infs:  104
Number of training data after filtering:  104
Processing class  31
stratified random sampling from tile  0
Class 31: sampled at 31 coordinates
stratified random sampling from tile  1
Class 31: sampled at 260 coordinates
stratified random sampling from tile  2
Class 31: sampled at 85 coordinates
stratified random sampling from tile  3
Class 31: sampled at 52 coordinates
stratified random sampling from tile  4
Class 31: sampled at 329 coordinates
stratified random sampling from tile  5
Class 31: sampled at 151 coordinates
stratified random sampling from tile  6
Class 31: sampled at 2 coordinates
stratified random sampling from tile  7
Class 31: sampled at 0 coordinates
stratified random sampling from tile  8
Class 31: sampled at 36 coordinates
stratified random sampling from tile  9
Class 31: sampled at 6 coordinates
radoml

  0%|          | 0/952 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (952, 67)
Calinski-Harabasz score for  2  clusters is:  218.62463152685064
Calinski-Harabasz score for  3  clusters is:  170.89617088152687
Calinski-Harabasz score for  4  clusters is:  143.35016641329577
Calinski-Harabasz score for  5  clusters is:  124.31932421569387
Calinski-Harabasz score for  6  clusters is:  120.92961626431588
Calinski-Harabasz score for  7  clusters is:  116.0610782732956
Calinski-Harabasz score for  8  clusters is:  110.30243088143557
Calinski-Harabasz score for  9  clusters is:  102.5226702746347
Best number of clusters for class 31: 2
Number of training data collected:  135
Collecting training data in parallel mode


  0%|          | 0/135 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (135, 69)
Number of training data after removing Nans and Infs:  135
Number of training data after filtering:  135
Processing class  44
stratified random sampling from tile  0
Class 44: sampled at 403 coordinates
stratified random sampling from tile  1
Class 44: sampled at 209 coordinates
stratified random sampling from tile  2
Class 44: sampled at 79 coordinates
stratified random sampling from tile  3
Class 44: sampled at 58 coordinates
stratified random sampling from tile  4
Class 44: sampled at 74 coordinates
stratified random sampling from tile  5
Class 44: sampled at 76 coordinates
stratified random sampling from tile  6
Class 44: sampled at 15 coordinates
stratified random sampling from tile  7
Class 44: sampled at 12 coordinates
stratified random sampling from tile  8
Class 44: sampled at 24 coordinates
stratified random sampling from tile  9
Class 44: sampled at 9 coordinates
radom

  0%|          | 0/959 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeom

Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (959, 67)
Calinski-Harabasz score for  2  clusters is:  966.149099249513
Calinski-Harabasz score for  3  clusters is:  782.2552920925787
Calinski-Harabasz score for  4  clusters is:  831.8352999428586
Calinski-Harabasz score for  5  clusters is:  811.6748718481074
Calinski-Harabasz score for  6  clusters is:  773.397798459548
Calinski-Harabasz score for  7  clusters is:  719.4378191726568
Calinski-Harabasz score for  8  clusters is:  670.5681728818683
Calinski-Harabasz score for  9  clusters is:  637.2037133408585
Best number of clusters for class 44: 2
Number of training data collected:  178
Collecting training data in parallel mode


  0%|          | 0/178 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (178, 69)
Number of training data after removing Nans and Infs:  178
Number of training data after filtering:  178
Processing class  51
stratified random sampling from tile  0
Class 51: sampled at 49 coordinates
stratified random sampling from tile  1
Class 51: sampled at 102 coordinates
stratified random sampling from tile  2
Class 51: sampled at 75 coordinates
stratified random sampling from tile  3
Class 51: sampled at 235 coordinates
stratified random sampling from tile  4
Class 51: sampled at 99 coordinates
stratified random sampling from tile  5
Class 51: sampled at 236 coordinates
stratified random sampling from tile  6
Class 51: sampled at 34 coordinates
stratified random sampling from tile  7
Class 51: sampled at 2 coordinates
stratified random sampling from tile  8
Class 51: sampled at 97 coordinates
stratified random sampling from tile  9
Class 51: sampled at 26 coordinates
rado

  0%|          | 0/955 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
Error opening source dataset: s3://deafrica-sentinel-2/sentinel-s2-l2a-cogs/37/L/FE/2021/12/S2A_37LFE_20211208_0_L2A/B04.tif


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (954, 67)
Calinski-Harabasz score for  2  clusters is:  410.6694178478213
Calinski-Harabasz score for  3  clusters is:  306.4373923673509
Calinski-Harabasz score for  4  clusters is:  252.98490705201814
Calinski-Harabasz score for  5  clusters is:  219.2567855524824
Calinski-Harabasz score for  6  clusters is:  199.7226243991408
Calinski-Harabasz score for  7  clusters is:  179.76330806996853
Calinski-Harabasz score for  8  clusters is:  164.7730432011007
Calinski-Harabasz score for  9  clusters is:  151.96040142336685
Best number of clusters for class 51: 2
Number of training data collected:  131
Collecting training data in parallel mode


  0%|          | 0/131 [00:00<?, ?it/s]

Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (131, 69)
Number of training data after removing Nans and Infs:  131
Number of training data after filtering:  131
Processing class  61
stratified random sampling from tile  0
Class 61: sampled at 9 coordinates
stratified random sampling from tile  1
Class 61: sampled at 150 coordinates
stratified random sampling from tile  2
Class 61: sampled at 268 coordinates
stratified random sampling from tile  3
Class 61: sampled at 198 coordinates
stratified random sampling from tile  4
Class 61: sampled at 87 coordinates
stratified random sampling from tile  5
Class 61: sampled at 140 coordinates
stratified random sampling from tile  6
Class 61: sampled at 31 coordinates
stratified random sampling from tile  7
Class 61: sampled at 3 coordinates
stratified random sampling from tile  8
Class 61: sampled at 45 coordinates
stratified random sampling from tile  9
Class 61: sampled at 22 coordinates
rado

  0%|          | 0/953 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (953, 67)
Calinski-Harabasz score for  2  clusters is:  457.50738176603045
Calinski-Harabasz score for  3  clusters is:  360.5668441932537
Calinski-Harabasz score for  4  clusters is:  359.5244856196999
Calinski-Harabasz score for  5  clusters is:  321.1752309722033
Calinski-Harabasz score for  6  clusters is:  300.6151929443659
Calinski-Harabasz score for  7  clusters is:  282.77199362764645
Calinski-Harabasz score for  8  clusters is:  267.9910178053293
Calinski-Harabasz score for  9  clusters is:  255.3409939469191
Best number of clusters for class 61: 2
Number of training data collected:  161
Collecting training data in parallel mode


  0%|          | 0/161 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (161, 69)
Number of training data after removing Nans and Infs:  161
Number of training data after filtering:  161
Processing class  62
stratified random sampling from tile  0
Class 62: sampled at 30 coordinates
stratified random sampling from tile  1
Class 62: sampled at 46 coordinates
stratified random sampling from tile  2
Class 62: sampled at 51 coordinates
stratified random sampling from tile  3
Class 62: sampled at 1 coordinates
stratified random sampling from tile  4
Class 62: sampled at 410 coordinates
stratified random sampling from tile  5
Class 62: sampled at 301 coordinates
stratified random sampling from tile  6
stratified random sampling from tile  7
stratified random sampling from tile  8
Class 62: sampled at 106 coordinates
stratified random sampling from tile  9
Class 62: sampled at 9 coordinates
radomly sampled points for class  62 
                             geometry  

  0%|          | 0/954 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (954, 67)
Calinski-Harabasz score for  2  clusters is:  247.09990346048988
Calinski-Harabasz score for  3  clusters is:  241.7773823899388
Calinski-Harabasz score for  4  clusters is:  206.74605432180545
Calinski-Harabasz score for  5  clusters is:  189.11130769641176
Calinski-Harabasz score for  6  clusters is:  178.7492470816923
Calinski-Harabasz score for  7  clusters is:  166.16943832642576
Calinski-Harabasz score for  8  clusters is:  156.91792508686657
Calinski-Harabasz score for  9  clusters is:  147.6520525358138
Best number of clusters for class 62: 2
Number of training data collected:  112
Collecting training data in parallel mode


  0%|          | 0/112 [00:00<?, ?it/s]

Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (112, 69)
Number of training data after removing Nans and Infs:  112
Number of training data after filtering:  112
Processing class  70
stratified random sampling from tile  0
stratified random sampling from tile  1
Class 70: sampled at 0 coordinates
stratified random sampling from tile  2
Class 70: sampled at 133 coordinates
stratified random sampling from tile  3
Class 70: sampled at 20 coordinates
stratified random sampling from tile  4
stratified random sampling from tile  5
Class 70: sampled at 459 coordinates
stratified random sampling from tile  6
Class 70: sampled at 109 coordinates
stratified random sampling from tile  7
stratified random sampling from tile  8
Class 70: sampled at 113 coordinates
stratified random sampling from tile  9
Class 70: sampled at 113 coordinates
radomly sampled points for class  70 
                             geometry  LC_Class_I
0     POINT (690238.09

  0%|          | 0/947 [00:00<?, ?it/s]

Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (947, 67)
Calinski-Harabasz score for  2  clusters is:  404.7222560368082
Calinski-Harabasz score for  3  clusters is:  330.6488650045943
Calinski-Harabasz score for  4  clusters is:  305.17531356983795
Calinski-Harabasz score for  5  clusters is:  281.9055751831285
Calinski-Harabasz score for  6  clusters is:  274.894011692069
Calinski-Harabasz score for  7  clusters is:  259.64942779065774
Calinski-Harabasz score for  8  clusters is:  254.1359959881054
Calinski-Harabasz score for  9  clusters is:  243.80448415372697
Best number of clusters for class 70: 2
Number of training data collected:  114
Collecting training data in parallel mode


  0%|          | 0/114 [00:00<?, ?it/s]

Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (114, 69)
Number of training data after removing Nans and Infs:  114
Number of training data after filtering:  114
Processing class  71
stratified random sampling from tile  0
stratified random sampling from tile  1
stratified random sampling from tile  2
Class 71: sampled at 886 coordinates
stratified random sampling from tile  3
Class 71: sampled at 52 coordinates
stratified random sampling from tile  4
stratified random sampling from tile  5
stratified random sampling from tile  6
Class 71: sampled at 5 coordinates
stratified random sampling from tile  7
Class 71: sampled at 0 coordinates
stratified random sampling from tile  8
Class 71: sampled at 5 coordinates
stratified random sampling from tile  9
radomly sampled points for class  71 
                             geometry  LC_Class_I
0     POINT (568804.405 7526243.390)          71
1     POINT (467917.427 7441037.567)          71
2 

  0%|          | 0/948 [00:00<?, ?it/s]

Error opening source dataset: s3://deafrica-sentinel-2/sentinel-s2-l2a-cogs/36/K/UA/2021/6/S2B_36KUA_20210612_0_L2A/B02.tif
Error opening source dataset: s3://deafrica-sentinel-2/sentinel-s2-l2a-cogs/36/K/WU/2021/1/S2A_36KWU_20210128_0_L2A/B06.tif
Error opening source dataset: s3://deafrica-sentinel-2/sentinel-s2-l2a-cogs/36/K/XA/2021/3/S2B_36KXA_20210311_0_L2A/B05.tif
Error opening source dataset: s3://deafrica-sentinel-2/sentinel-s2-l2a-cogs/36/K/XA/2021/4/S2A_36KXA_20210405_1_L2A/B11.tif
Error opening source dataset: s3://deafrica-sentinel-2/sentinel-s2-l2a-cogs/36/K/UA/2021/6/S2B_36KUA_20210612_0_L2A/B02.tif
Error opening source dataset: s3://deafrica-sentinel-2/sentinel-s2-l2a-cogs/36/K/WU/2021/1/S2A_36KWU_20210128_0_L2A/B06.tif
  data = nangeomedian_pcm(xx_data, **kw)
Error opening source dataset: s3://deafrica-sentinel-2/sentinel-s2-l2a-cogs/36/K/UA/2021/6/S2B_36KUA_20210612_0_L2A/B02.tif
Error opening source dataset: s3://deafrica-sentinel-2/sentinel-s2-l2a-cogs/36/K/UA/2021/6/

Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (937, 67)
Calinski-Harabasz score for  2  clusters is:  450.49750874999415
Calinski-Harabasz score for  3  clusters is:  348.9057167906388
Calinski-Harabasz score for  4  clusters is:  296.73257185565177
Calinski-Harabasz score for  5  clusters is:  264.75179959809657
Calinski-Harabasz score for  6  clusters is:  234.871304828333
Calinski-Harabasz score for  7  clusters is:  217.27695578647396
Calinski-Harabasz score for  8  clusters is:  200.24929416610112
Calinski-Harabasz score for  9  clusters is:  190.51440356779554
Best number of clusters for class 71: 2
Number of training data collected:  121
Collecting training data in parallel mode


  0%|          | 0/121 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (121, 69)
Number of training data after removing Nans and Infs:  121
Number of training data after filtering:  121
Processing class  72
stratified random sampling from tile  0
Class 72: sampled at 144 coordinates
stratified random sampling from tile  1
Class 72: sampled at 271 coordinates
stratified random sampling from tile  2
Class 72: sampled at 21 coordinates
stratified random sampling from tile  3
stratified random sampling from tile  4
Class 72: sampled at 447 coordinates
stratified random sampling from tile  5
Class 72: sampled at 72 coordinates
stratified random sampling from tile  6
stratified random sampling from tile  7
stratified random sampling from tile  8
stratified random sampling from tile  9
radomly sampled points for class  72 
                            geometry  LC_Class_I
0    POINT (527613.854 8416295.191)          72
1    POINT (538209.109 8418414.242)          72


  0%|          | 0/955 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeom

Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (955, 67)
Calinski-Harabasz score for  2  clusters is:  233.41079365017103
Calinski-Harabasz score for  3  clusters is:  205.473758544723
Calinski-Harabasz score for  4  clusters is:  195.78573901677922
Calinski-Harabasz score for  5  clusters is:  187.3536442951122
Calinski-Harabasz score for  6  clusters is:  188.36405451203748
Calinski-Harabasz score for  7  clusters is:  180.63144416632684
Calinski-Harabasz score for  8  clusters is:  175.72047286639727
Calinski-Harabasz score for  9  clusters is:  168.01319641862477
Best number of clusters for class 72: 2
Number of training data collected:  114
Collecting training data in parallel mode


  0%|          | 0/114 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (114, 69)
Number of training data after removing Nans and Infs:  114
Number of training data after filtering:  114
Processing class  73
stratified random sampling from tile  0
Class 73: sampled at 9 coordinates
stratified random sampling from tile  1
Class 73: sampled at 54 coordinates
stratified random sampling from tile  2
Class 73: sampled at 84 coordinates
stratified random sampling from tile  3
Class 73: sampled at 113 coordinates
stratified random sampling from tile  4
Class 73: sampled at 208 coordinates
stratified random sampling from tile  5
Class 73: sampled at 361 coordinates
stratified random sampling from tile  6
Class 73: sampled at 13 coordinates
stratified random sampling from tile  7
Class 73: sampled at 5 coordinates
stratified random sampling from tile  8
Class 73: sampled at 89 coordinates
stratified random sampling from tile  9
Class 73: sampled at 16 coordinates
radom

  0%|          | 0/952 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (952, 67)
Calinski-Harabasz score for  2  clusters is:  290.91252923727416
Calinski-Harabasz score for  3  clusters is:  226.5140880949746
Calinski-Harabasz score for  4  clusters is:  208.37940707817927
Calinski-Harabasz score for  5  clusters is:  188.44671238493936
Calinski-Harabasz score for  6  clusters is:  170.71228287165886
Calinski-Harabasz score for  7  clusters is:  156.78900158277312
Calinski-Harabasz score for  8  clusters is:  148.34451395616028
Calinski-Harabasz score for  9  clusters is:  139.35455842538653
Best number of clusters for class 73: 2
Number of training data collected:  179
Collecting training data in parallel mode


  0%|          | 0/179 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (179, 69)
Number of training data after removing Nans and Infs:  179
Number of training data after filtering:  179
Processing class  77
stratified random sampling from tile  0
Class 77: sampled at 5 coordinates
stratified random sampling from tile  1
Class 77: sampled at 67 coordinates
stratified random sampling from tile  2
Class 77: sampled at 109 coordinates
stratified random sampling from tile  3
Class 77: sampled at 200 coordinates
stratified random sampling from tile  4
Class 77: sampled at 254 coordinates
stratified random sampling from tile  5
Class 77: sampled at 191 coordinates
stratified random sampling from tile  6
Class 77: sampled at 86 coordinates
stratified random sampling from tile  7
Class 77: sampled at 1 coordinates
stratified random sampling from tile  8
Class 77: sampled at 32 coordinates
stratified random sampling from tile  9
Class 77: sampled at 6 coordinates
radom

  0%|          | 0/951 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (951, 67)
Calinski-Harabasz score for  2  clusters is:  263.1298952799804
Calinski-Harabasz score for  3  clusters is:  211.380583054828
Calinski-Harabasz score for  4  clusters is:  182.6982333278258
Calinski-Harabasz score for  5  clusters is:  168.29162238464247
Calinski-Harabasz score for  6  clusters is:  153.63465083468543
Calinski-Harabasz score for  7  clusters is:  143.11024906027197
Calinski-Harabasz score for  8  clusters is:  137.3978190658095
Calinski-Harabasz score for  9  clusters is:  130.73589384779683
Best number of clusters for class 77: 2
Number of training data collected:  142
Collecting training data in parallel mode


  0%|          | 0/142 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (142, 69)
Number of training data after removing Nans and Infs:  142
Number of training data after filtering:  142
Processing class  33
stratified random sampling from tile  0
Class 33: sampled at 34 coordinates
stratified random sampling from tile  1
Class 33: sampled at 172 coordinates
stratified random sampling from tile  2
Class 33: sampled at 192 coordinates
stratified random sampling from tile  3
Class 33: sampled at 62 coordinates
stratified random sampling from tile  4
Class 33: sampled at 258 coordinates
stratified random sampling from tile  5
Class 33: sampled at 135 coordinates
stratified random sampling from tile  6
Class 33: sampled at 11 coordinates
stratified random sampling from tile  7
Class 33: sampled at 1 coordinates
stratified random sampling from tile  8
Class 33: sampled at 62 coordinates
stratified random sampling from tile  9
Class 33: sampled at 28 coordinates
rad

  0%|          | 0/955 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (955, 67)
Calinski-Harabasz score for  2  clusters is:  202.45257733513827
Calinski-Harabasz score for  3  clusters is:  220.73434429877113
Calinski-Harabasz score for  4  clusters is:  208.07140854991403
Calinski-Harabasz score for  5  clusters is:  184.46193388194675
Calinski-Harabasz score for  6  clusters is:  169.49738732238168
Calinski-Harabasz score for  7  clusters is:  154.42600109188774
Calinski-Harabasz score for  8  clusters is:  141.9545055399606
Calinski-Harabasz score for  9  clusters is:  132.9902924595882
Best number of clusters for class 33: 3
Number of training data collected:  1025
Collecting training data in parallel mode


  0%|          | 0/1025 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (1025, 69)
Number of training data after removing Nans and Infs:  1025
Number of training data after filtering:  1025
Processing class  12
stratified random sampling from tile  0
Class 12: sampled at 57 coordinates
stratified random sampling from tile  1
Class 12: sampled at 173 coordinates
stratified random sampling from tile  2
Class 12: sampled at 100 coordinates
stratified random sampling from tile  3
Class 12: sampled at 84 coordinates
stratified random sampling from tile  4
Class 12: sampled at 143 coordinates
stratified random sampling from tile  5
Class 12: sampled at 270 coordinates
stratified random sampling from tile  6
Class 12: sampled at 20 coordinates
stratified random sampling from tile  7
Class 12: sampled at 2 coordinates
stratified random sampling from tile  8
Class 12: sampled at 83 coordinates
stratified random sampling from tile  9
Class 12: sampled at 22 coordinates


  0%|          | 0/954 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (954, 67)
Calinski-Harabasz score for  2  clusters is:  344.38898201881494
Calinski-Harabasz score for  3  clusters is:  267.5790789975694
Calinski-Harabasz score for  4  clusters is:  231.9584701579376
Calinski-Harabasz score for  5  clusters is:  197.25671967113348
Calinski-Harabasz score for  6  clusters is:  176.64561993496068
Calinski-Harabasz score for  7  clusters is:  162.06277689867426
Calinski-Harabasz score for  8  clusters is:  153.68549492620872
Calinski-Harabasz score for  9  clusters is:  144.18856972337215
Best number of clusters for class 12: 2
Number of training data collected:  856
Collecting training data in parallel mode


  0%|          | 0/856 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (856, 69)
Number of training data after removing Nans and Infs:  856
Number of training data after filtering:  856
Processing class  79
stratified random sampling from tile  0
Class 79: sampled at 52 coordinates
stratified random sampling from tile  1
Class 79: sampled at 176 coordinates
stratified random sampling from tile  2
Class 79: sampled at 201 coordinates
stratified random sampling from tile  3
Class 79: sampled at 16 coordinates
stratified random sampling from tile  4
Class 79: sampled at 294 coordinates
stratified random sampling from tile  5
Class 79: sampled at 99 coordinates
stratified random sampling from tile  6
Class 79: sampled at 14 coordinates
stratified random sampling from tile  7
Class 79: sampled at 0 coordinates
stratified random sampling from tile  8
Class 79: sampled at 95 coordinates
stratified random sampling from tile  9
Class 79: sampled at 8 coordinates
radom

  0%|          | 0/955 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (955, 67)
Calinski-Harabasz score for  2  clusters is:  219.08174145002334
Calinski-Harabasz score for  3  clusters is:  177.1694548155094
Calinski-Harabasz score for  4  clusters is:  152.30262797520928
Calinski-Harabasz score for  5  clusters is:  131.23684255658284
Calinski-Harabasz score for  6  clusters is:  118.29335484965644
Calinski-Harabasz score for  7  clusters is:  108.09255440971839
Calinski-Harabasz score for  8  clusters is:  100.21518796224818
Calinski-Harabasz score for  9  clusters is:  94.06067304999544
Best number of clusters for class 79: 2
Number of training data collected:  855
Collecting training data in parallel mode


  0%|          | 0/855 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (855, 69)
Number of training data after removing Nans and Infs:  855
Number of training data after filtering:  855
Processing class  74
stratified random sampling from tile  0
Class 74: sampled at 39 coordinates
stratified random sampling from tile  1
Class 74: sampled at 130 coordinates
stratified random sampling from tile  2
Class 74: sampled at 132 coordinates
stratified random sampling from tile  3
Class 74: sampled at 10 coordinates
stratified random sampling from tile  4
Class 74: sampled at 324 coordinates
stratified random sampling from tile  5
Class 74: sampled at 234 coordinates
stratified random sampling from tile  6
Class 74: sampled at 10 coordinates
stratified random sampling from tile  7
Class 74: sampled at 0 coordinates
stratified random sampling from tile  8
Class 74: sampled at 63 coordinates
stratified random sampling from tile  9
Class 74: sampled at 12 coordinates
rad

  0%|          | 0/954 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (954, 67)
Calinski-Harabasz score for  2  clusters is:  210.81123291645844
Calinski-Harabasz score for  3  clusters is:  160.943073873514
Calinski-Harabasz score for  4  clusters is:  139.33387580525604
Calinski-Harabasz score for  5  clusters is:  122.5377792656518
Calinski-Harabasz score for  6  clusters is:  109.46475108341107
Calinski-Harabasz score for  7  clusters is:  101.78136919673521
Calinski-Harabasz score for  8  clusters is:  94.4284896177594
Calinski-Harabasz score for  9  clusters is:  89.9081344196394
Best number of clusters for class 74: 2
Number of training data collected:  493
Collecting training data in parallel mode


  0%|          | 0/493 [00:00<?, ?it/s]

  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)
  data = nangeomedian_pcm(xx_data, **kw)


Percentage of possible fails after run 1 = 0.0 %
Removed 0 rows wth NaNs &/or Infs
Output shape:  (493, 69)
Number of training data after removing Nans and Infs:  493
Number of training data after filtering:  493
Processing class  42
stratified random sampling from tile  0
Class 42: sampled at 5 coordinates
stratified random sampling from tile  1
Class 42: sampled at 129 coordinates
stratified random sampling from tile  2
Class 42: sampled at 126 coordinates
stratified random sampling from tile  3
Class 42: sampled at 97 coordinates
stratified random sampling from tile  4
Class 42: sampled at 159 coordinates
stratified random sampling from tile  5
Class 42: sampled at 351 coordinates
stratified random sampling from tile  6
Class 42: sampled at 27 coordinates
stratified random sampling from tile  7
Class 42: sampled at 6 coordinates
stratified random sampling from tile  8
Class 42: sampled at 36 coordinates
stratified random sampling from tile  9
Class 42: sampled at 15 coordinates
rado

  0%|          | 0/951 [00:00<?, ?it/s]

In [4]:
td_single_class_filtered=gpd.read_file("Results/stratified_random_training_points_lulc_2016_balanced_2021_filtered_removing_nans.geojson")
output_file = "Results/stratified_random_training_points_lulc_2016_balanced_2021_filtered_removing_nans.txt"
td_single_class_filtered.to_csv(output_file, header=True, index=None, sep=' ')