This notebook generates randomly distributed training points from a reference land cover map of Rwanda. Class merging is applied before sampling. The class-merged reference land cover map is also exported for future use.

### load packages

In [None]:
import numpy as np
import geopandas as gpd
import pandas as pd
import xarray as xr
from rasterio.enums import Resampling
from random_sampling import random_sampling # adapted from function by Chad Burton: https://gist.github.com/cbur24/04760d645aa123a3b1817b07786e7d9f
from datacube.utils.cog import write_cog

### input files and attributes

In [None]:
# file paths and attributes
basemap_path='Data/rwanda_landcover_2015_scheme_ii.tif' # baseline classification map
output_crs='epsg:32735' # output crs: WGS84/UTM Zone 35S
class_name='LC_Class_I' # class label in integer format
n_samples=5000 # number of total training points to be extracted
dict_map={'Nodata':0,'Dense Forest':1,'Moderate Forest':2,'Sparse Forest':3,'Woodland':4,
          'Closed Grassland':5,'Open Grassland':6,'Closed Shrubland':7,'Open Shrubland':8,
          'Perennial Cropland':9,'Annual Cropland':10,'Wetland':11,'Water Body':12,'Urban Settlement':13,'Other Land':14}

### class merging

In [None]:
# load reference land cover map
basemap_raster = xr.open_dataset(basemap_path,engine="rasterio").astype(np.uint8).squeeze("band", drop=True)
# reproject the raster
basemap_raster= basemap_raster.rio.reproject(resolution=10, dst_crs=output_crs,resampling=Resampling.nearest)
print('baseline classifcation raster:\n',basemap_raster)
da_basemap=basemap_raster.band_data

# merge classes when needed
da_basemap=da_basemap.where((da_basemap!=dict_map['Moderate Forest'])
                              &(da_basemap!=dict_map['Sparse Forest'])
                              &(da_basemap!=dict_map['Woodland'])
                              &(da_basemap!=dict_map['Open Grassland'])
                              &(da_basemap!=dict_map['Open Shrubland']),0)

# export map with merged classes
write_cog(da_basemap,'Results/rwanda_landcover_2015_scheme_ii_classes_merged.tif', overwrite=True)

### generate training points

In [None]:
gpd_samples=random_sampling(da=da_basemap,n=9000,sampling='equal_stratified_random',
                                   min_sample_n=20,out_fname=None,class_attr=class_name,drop_value=0)
if gpd_samples.crs is None: # assign crs if needed
    gpd_samples=gpd_samples.set_crs(output_crs)
# assign class attribute
gpd_samples[class_name]=gpd_samples['class'].astype(int)
print('stratified samples:\n')
print(gpd_samples)

### export training points

In [None]:
# export as geojson
out_fname='Results/Training_samples_Rwanda.geojson'
gpd_samples.to_file(out_fname, driver="GeoJSON")