This notebook generates randomly distributed training points from a reference land cover map of Rwanda. Class merging is applied before sampling. The class-merged reference land cover map is also exported for future use.

## load packages

In [None]:
import numpy as np
import geopandas as gpd
import pandas as pd
import xarray as xr
from rasterio.enums import Resampling
from random_sampling import random_sampling # adapted from function by Chad Burton: https://gist.github.com/cbur24/04760d645aa123a3b1817b07786e7d9f
from datacube.utils.cog import write_cog

## input files and attributes

In [None]:
# file paths and attributes
basemap_path='Data/rwanda_landcover_2015_scheme_ii.tif' # baseline classification map
output_crs='epsg:32735' # output crs: WGS84/UTM Zone 35S
class_name='LC_Class_I' # class label in integer format
n_samples=5000 # number of total training points to be extracted

## class merging

In [None]:
# load reference land cover map
basemap_raster = xr.open_dataset(basemap_path,engine="rasterio").astype(np.uint8).squeeze("band", drop=True)
# reproject the raster
basemap_raster= basemap_raster.rio.reproject(resolution=10, dst_crs=output_crs,resampling=Resampling.nearest)
print('baseline classifcation raster:\n',basemap_raster)
da_mask=basemap_raster.band_data

# merge classes when needed
da_mask=da_mask.where(da_mask!=2,1) # merge moderate forest (2) and dense forest (1)
da_mask=da_mask.where(da_mask!=6,5) # merge open grassland (6) and closed grassland (5)
da_mask=da_mask.where(da_mask!=8,7) # merge open shrubland (8) and closed shrubland (7)
da_mask=da_mask.where((da_mask!=14)&(da_mask!=3)&(da_mask!=0)&(da_mask!=255),0) # classes with very few pixels or nodata: assign as 0

# export map with merged classes
write_cog(da_mask,'Results/rwanda_landcover_2015_scheme_ii_classes_merged.tif', overwrite=True)

## generate training points

In [None]:
da_mask=da_mask.where((da_mask!=0),np.nan) # replace other and nodata class values as nan so they won't be sampled
# da_mask=da_mask.where((da_mask!=255)&(da_mask!=0),np.nan) # replace other and nodata class values as nan so they won't be sampled

# gpd_samples=random_sampling(da_mask,n_samples,sampling='stratified_random', manual_class_ratios=None,out_fname=None) # stratified sampling
gpd_samples=random_sampling(da_mask,n_samples,sampling='manual',
                            manual_class_ratios={'1.0':900,'5.0':600,'7.0':900,'9.0':900,'10.0':1300,'11.0':900,'12.0':600,'13.0':1000},
                            out_fname=None) # manual sampling: to keep enough samples for minor classes
if gpd_samples.crs is None: # assign crs in case it's lost somehow
    gpd_samples=gpd_samples.set_crs(output_crs)
# assign class attribute
gpd_samples[class_name]=gpd_samples['class'].astype(int)
print('stratified samples:\n')
print(gpd_samples)

## export training points

In [None]:
# export as geojson
# gpd_samples.to_file('Results/stratified_random_training_points_scheme_ii_2015.geojson', driver="GeoJSON")
gpd_samples.to_file('Results/manual_number_random_training_points_scheme_ii_2015.geojson', driver="GeoJSON")