In [1]:
import numpy as np
import geopandas as gpd
import pandas as pd
import xarray as xr
from rasterio.enums import Resampling
from random_sampling import random_sampling # adapted from function by Chad Burton: https://gist.github.com/cbur24/04760d645aa123a3b1817b07786e7d9f
from datacube.utils.cog import write_cog
# file paths and attributes
# rf2021_path='Data/landcover2021baseline.tif' # baseline classification map
# rf2021_path='Data/Rwanda_Sentinel2_LULC2016.tif' # baseline classification map
rf2021_path='Data/rwanda_landcover_2015_scheme_ii.tif' # baseline classification map
output_crs='epsg:32735' # WGS84/UTM Zone 35S
class_name='LC_Class_I' # class label in integer format
# n_samples=100
n_samples=5000
# buffer_distance=1000
# load baseline classification map
rf_2021_raster = xr.open_dataset(rf2021_path,engine="rasterio").astype(np.uint8).squeeze("band", drop=True)
# reproject the raster
rf_2021_raster= rf_2021_raster.rio.reproject(resolution=10, dst_crs=output_crs,resampling=Resampling.nearest)
print('baseline classifcation raster:\n',rf_2021_raster)
da_mask=rf_2021_raster.band_data

# merge classes when needed
da_mask=da_mask.where(da_mask!=2,1) # merge moderate forest (2) and dense forest (1)
da_mask=da_mask.where(da_mask!=6,5) # merge open grassland (6) and closed grassland (5)
da_mask=da_mask.where(da_mask!=8,7) # merge open shrubland (8) and closed shrubland (7)
da_mask=da_mask.where((da_mask!=14)&(da_mask!=3)&(da_mask!=0)&(da_mask!=255),0)
# export map with merged classes
# write_cog(da_mask,'Results/rwanda_landcover_2015_scheme_ii_classes_merged.tif', overwrite=True)

da_mask=da_mask.where((da_mask!=0),np.nan) # replace other class values as nan so they won't be sampled
# da_mask=da_mask.where((da_mask!=255)&(da_mask!=0),np.nan) # replace other and nodata class values as nan so they won't be sampled

# gpd_samples=random_sampling(da_mask,n_samples,sampling='stratified_random', manual_class_ratios=None,out_fname=None)
gpd_samples=random_sampling(da_mask,n_samples,sampling='manual',
                            manual_class_ratios={'1.0':900,'5.0':600,'7.0':900,'9.0':900,'10.0':1300,'11.0':900,'12.0':600,'13.0':1000},
                            out_fname=None)
# gpd_samples=random_sampling(da_mask,n_samples,sampling='equal_stratified_random',manual_class_ratios=None,out_fname=None)
# gpd_samples=random_sampling(da_mask,n_samples,sampling='stratified_random',manual_class_ratios=None,out_fname=None)
if gpd_samples.crs is None:
    gpd_samples=gpd_samples.set_crs(output_crs)
# gpd_samples['geometry']=gpd_samples['geometry'].buffer(buffer_distance,cap_style = 3)
gpd_samples[class_name]=gpd_samples['class'].astype(int)
# print('stratified samples after buffering:\n')
print('stratified samples:\n')
print(gpd_samples)
gpd_samples.to_file('Results/manual_number_random_training_points_scheme_ii_2015.geojson', driver="GeoJSON")
# gpd_samples.to_file('Results/stratified_random_training_points_scheme_ii_2015.geojson', driver="GeoJSON")
# gpd_samples.to_file('Results/manual_number_random_training_points_2016.geojson', driver="GeoJSON")
# gpd_samples.to_file('Results/Rwanda_random_sampling_AOIs.geojson', driver="GeoJSON")
# gpd_samples.to_file('Results/stratified_random_training_points_2016.geojson', driver="GeoJSON")



baseline classifcation raster:
 <xarray.Dataset>
Dimensions:      (x: 23234, y: 20992)
Coordinates:
  * x            (x) float64 7.043e+05 7.044e+05 ... 9.367e+05 9.367e+05
  * y            (y) float64 9.887e+06 9.887e+06 ... 9.678e+06 9.678e+06
    spatial_ref  int64 0
Data variables:
    band_data    (y, x) uint8 255 255 255 255 255 255 ... 255 255 255 255 255
Class 1.0: sampled at 900 coordinates
Class 5.0: sampled at 600 coordinates
Class 7.0: sampled at 900 coordinates
Class 9.0: sampled at 900 coordinates
Class 10.0: sampled at 1300 coordinates
Class 11.0: sampled at 900 coordinates
Class 12.0: sampled at 600 coordinates
Class 13.0: sampled at 1000 coordinates
stratified samples:

      spatial_ref  class                        geometry  LC_Class_I
0               0    1.0  POINT (761265.704 9740241.901)           1
1               0    1.0  POINT (757655.704 9755451.901)           1
2               0    1.0  POINT (833365.704 9755131.901)           1
3               0    1.0  PO

In [21]:
da_mask

In [4]:
classes.dtype

dtype('float64')

In [16]:
manual_class_ratios={'1.0':1000,'2.0':1000,'3.0':1000,'4.0':1000,'5.0':1000,'7.0':500,'8.0':1000,'10.0':200}
list(manual_class_ratios.keys())

['1.0', '2.0', '3.0', '4.0', '5.0', '7.0', '8.0', '10.0']

In [17]:
dict_classes = list(manual_class_ratios.keys())
set(dict_classes).issubset([str(i) for i in classes])

True

In [12]:
set(dict_classes)

{1, 2, 3, 4, 5, 7, 8, 10}

In [13]:
[str(i) for i in classes]

['1.0',
 '2.0',
 '3.0',
 '4.0',
 '5.0',
 '6.0',
 '7.0',
 '8.0',
 '10.0',
 '200.0',
 'nan']