This notebook generates randomly distributed training points from a reference land cover map of Rwanda. Class merging is applied before sampling. The class-merged reference land cover map is also exported for future use.

### load packages

In [11]:
import numpy as np
import geopandas as gpd
import pandas as pd
import xarray as xr
from rasterio.enums import Resampling
from random_sampling import random_sampling # adapted from function by Chad Burton: https://gist.github.com/cbur24/04760d645aa123a3b1817b07786e7d9f
from datacube.utils.cog import write_cog

### input files and attributes

In [12]:
# file paths and attributes
basemap_path='Data/rwanda_landcover_2015_scheme_ii.tif' # baseline classification map
output_crs='epsg:32735' # output crs: WGS84/UTM Zone 35S
class_name='LC_Class_I' # class label in integer format
n_samples=5000 # number of total training points to be extracted
dict_map={'Nodata':0,'Dense Forest':1,'Moderate Forest':2,'Sparse Forest':3,'Woodland':4,
          'Closed Grassland':5,'Open Grassland':6,'Closed Shrubland':7,'Open Shrubland':8,
          'Perennial Cropland':9,'Annual Cropland':10,'Wetland':11,'Water Body':12,'Urban Settlement':13,'Other Land':14}

### class merging

In [20]:
# load reference land cover map
basemap_raster = xr.open_dataset(basemap_path,engine="rasterio").astype(np.uint8).squeeze("band", drop=True)
# reproject the raster
basemap_raster= basemap_raster.rio.reproject(resolution=10, dst_crs=output_crs,resampling=Resampling.nearest)
print('baseline classifcation raster:\n',basemap_raster)
da_basemap=basemap_raster.band_data
da_basemap=da_basemap.where(da_basemap!=dict_map['Moderate Forest'],dict_map['Dense Forest'])
da_basemap=da_basemap.where(da_basemap!=dict_map['Open Grassland'],dict_map['Closed Grassland'])
da_basemap=da_basemap.where(da_basemap!=dict_map['Open Shrubland'],dict_map['Closed Shrubland'])
# merge classes when needed
da_basemap=da_basemap.where((da_basemap!=255)&(da_basemap!=dict_map['Sparse Forest'])&(da_basemap!=dict_map['Woodland']),0)

# export map with merged classes
write_cog(da_basemap,'Results/rwanda_landcover_2015_scheme_ii_classes_merged.tif', overwrite=True)

baseline classifcation raster:
 <xarray.Dataset>
Dimensions:      (x: 23234, y: 20992)
Coordinates:
  * x            (x) float64 7.043e+05 7.044e+05 ... 9.367e+05 9.367e+05
  * y            (y) float64 9.887e+06 9.887e+06 ... 9.678e+06 9.678e+06
    spatial_ref  int64 0
Data variables:
    band_data    (y, x) uint8 255 255 255 255 255 255 ... 255 255 255 255 255


PosixPath('Results/rwanda_landcover_2015_scheme_ii_classes_merged.tif')

### generate training points

In [21]:
gpd_samples=random_sampling(da=da_basemap,n=9000,sampling='equal_stratified_random',
                                   min_sample_n=20,out_fname=None,class_attr=class_name,drop_value=0)
if gpd_samples.crs is None: # assign crs if needed
    gpd_samples=gpd_samples.set_crs(output_crs)
# assign class attribute
gpd_samples[class_name]=gpd_samples[class_name].astype(int)
print('stratified samples:\n')
print(gpd_samples)

Class 1: sampling at 1000 locations
Class 5: sampling at 1000 locations
Class 7: sampling at 1000 locations
Class 9: sampling at 1000 locations
Class 10: sampling at 1000 locations
Class 11: sampling at 1000 locations
Class 12: sampling at 1000 locations
Class 13: sampling at 1000 locations
Class 14: sampling at 1000 locations
stratified samples:

      spatial_ref  LC_Class_I                        geometry
0               0           1  POINT (761725.704 9725561.901)
1               0           1  POINT (779015.704 9826591.901)
2               0           1  POINT (743245.704 9727491.901)
3               0           1  POINT (890175.704 9863001.901)
4               0           1  POINT (751905.704 9714111.901)
...           ...         ...                             ...
8995            0          14  POINT (764505.704 9720661.901)
8996            0          14  POINT (775275.704 9742871.901)
8997            0          14  POINT (788625.704 9737561.901)
8998            0          14 

### export training points

In [22]:
# export as geojson
out_fname='Results/Training_samples_Rwanda.geojson'
gpd_samples.to_file(out_fname, driver="GeoJSON")