In [1]:
%matplotlib inline
import os
import datacube
import warnings
import time
import numpy as np
from scipy import stats
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from odc.io.cgroups import get_cpu_quota
from odc.algo import xr_geomedian
import xarray as xr
from joblib import load
from deafrica_tools.classification import predict_xr
from deafrica_tools.dask import create_local_dask_cluster
from deafrica_tools.datahandling import load_ard
from deafrica_tools.bandindices import calculate_indices
from deafrica_tools.plotting import display_map
from datacube.utils.cog import write_cog

ncpus=round(get_cpu_quota())
print('ncpus = '+str(ncpus))

# file paths and attributes
# lesotho_tiles_shp='Results/Rwanda_random_sampling_AOIs.geojson'
lesotho_tiles_shp='Data/Rwanda_tiles_epsg32736_smaller.shp'
# rf_model_path='Results/RF_model_feature_selection_Rwanda_2017.joblib'
# rf_model_path='Results/RF_model_Rwanda_2017_balanced_7_classes.joblib'
# rf_model_path='Results/RF_model_Rwanda_2021_using_stratified_samples.joblib'
# rf_model_path='Results/RF_model_Rwanda_2021_using_manual_number_samples.joblib'
# rf_model_path='Results/RF_model_Rwanda_2021_using_2015_scheme2_stratified_samples.joblib'
rf_model_path='Results/RF_model_Rwanda_2021_using_2015_scheme2_stratified_samples_balanced.joblib'
# rf_model_path='Results/RF_model_Rwanda_2021_using_2015_scheme2_manual_random_samples.joblib'

class_name = 'LC_Class_I' # class label in integer format
crs='epsg:32735' # WGS84/UTM Zone 35S
fill_nan_value=-999 # value to replace nans in query results
## get Lesotho bounding box and reproject
#lesotho_polygon=gpd.read_file(lesotho_shp).to_crs(crs)
#[x_min,y_min,x_max,y_max]=lesotho_polygon.total_bounds
# get bounding boxes of tiles covering Lesotho
lesotho_tiles=gpd.read_file(lesotho_tiles_shp).to_crs('epsg:4326')
tile_bboxes=lesotho_tiles.bounds
print('tile boundaries for Rwanda: \n',tile_bboxes)

# load trained classifier
rf_models = load(rf_model_path).set_params(n_jobs=1)
print('loaded random forest models:\n',rf_models)

# variables=['red_4', 'NDVI_2', 'red_2', 'NDVI_0', 'nir_1_3', 'swir_2_2', 'swir_1_2',
#        'red_edge_1_2', 'nir_1_2', 'NDVI_4', 'green_2', 'red_edge_2_2',
#        'red_edge_2_3', 'nir_2_3', 'red_edge_3_3', 'red_3', 'NDVI_3',
#        'red_edge_1_3', 'swir_1_3', 'swir_2_3']
measurements = ['blue','green','red','red_edge_1','red_edge_2', 'red_edge_3','nir_1','nir_2','swir_1','swir_2']
# measurements = ['green','red','red_edge_1','red_edge_2', 'red_edge_3','nir_1','nir_2','swir_1','swir_2']
# define a function to feature layers
def feature_layers(query): 
    #connect to the datacube
    dc = datacube.Datacube(app='feature_layers')
    ds = load_ard(dc=dc,
                  products=['s2_l2a'],
                  group_by='solar_day',
                  verbose=False,
                  #mask_filters=[("opening", 2)], # morphological opening by 2 pixels to remove small masked regions
                  **query)
#     ds_index = calculate_indices(ds,index=['NDVI'],drop=False,collection='s2')
    ds_index = calculate_indices(ds,index=['NDVI'],drop=False,satellite_mission='s2')
    # interpolate nodata using mean of previous and next observation
#     ds=ds.interpolate_na(dim='time',method='linear',use_coordinate=False)
    # calculate geomedians within each two-month interval
    ds_geomedian=ds_index.resample(time='2MS').map(xr_geomedian)
#     ds_geomedian=ds_index.resample(time='3MS').map(xr_geomedian)
#     replace nan with a value
#     ds=ds.fillna(fill_nan_value)
    # stack multi-temporal measurements and rename them
    n_time=ds_geomedian.dims['time']
    list_measurements=list(ds_geomedian.keys())
    ds_stacked=None
    for j in range(len(list_measurements)):
        for k in range(n_time):
            variable_name=list_measurements[j]+'_'+str(k)
#             if variable_name in variables:
            measure_single=ds_geomedian[list_measurements[j]].isel(time=k).rename(variable_name)
            if ds_stacked is None:
                ds_stacked=measure_single
            else:
                ds_stacked=xr.merge([ds_stacked,measure_single],compat='override')
#     return ds_stacked[variables]
    return ds_stacked

  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)


ncpus = 62
tile boundaries for Rwanda: 
          minx      miny       maxx      maxy
0   28.858947 -2.399463  29.128840 -2.127967
1   28.858086 -2.670073  29.128121 -2.398489
2   28.857133 -2.940681  29.127316 -2.669009
3   29.129841 -1.588116  29.399398 -1.316832
4   29.129293 -1.858813  29.398969 -1.587446
5   29.128660 -2.129509  29.398460 -1.858059
6   29.127941 -2.400203  29.397871 -2.128671
7   29.127136 -2.670897  29.397201 -2.399282
8   29.126244 -2.941589  29.396451 -2.669892
9   29.398789 -1.588571  29.668395 -1.317238
10  29.398280 -1.859345  29.667998 -1.587935
11  29.397691 -2.130118  29.667526 -1.858632
12  29.397021 -2.400891  29.666981 -2.129328
13  29.396271 -2.671662  29.666361 -2.400022
14  29.395441 -2.942431  29.665667 -2.670716
15  29.668215 -1.318144  29.937763 -1.046839
16  29.667818 -1.588990  29.937466 -1.317615
17  29.667347 -1.859836  29.937101 -1.588390
18  29.666801 -2.130681  29.936667 -1.859164
19  29.666181 -2.401525  29.936166 -2.129937
20  29.665487 

In [None]:
# Set up a dask cluster
create_local_dask_cluster(n_workers=1)
# predictions=None
# iterate through each tile
for i in range(1,len(tile_bboxes)):
# for i in range(36,37):
# for i in [7]:
#     for j in len(rf_models):
#         rf_model=rf_models[j]
    minx,miny,maxx,maxy=tile_bboxes.iloc[i]
    print('bounding box for tile ',i,': minx: ',minx,'miny: ',miny,'maxx: ',maxx,'maxy: ',maxy)
    # load Sentinel-2 data
    query = {
        'x': (minx,maxx),
        'y': (miny,maxy),
#         'time': ('2017-01', '2017-12'),
        'time': ('2021-01', '2021-12'),
        'measurements': measurements,
        'resolution': (-10, 10),
        'crs':'epsg:4326',
        'output_crs':crs,
        'dask_chunks' : {'x':-1, 'y':-1}
#         'dask_chunks' : {'x':1600, 'y':1600}
    }
    #calculate features
#     all_data = feature_layers(query)[var_names] # making sure feature order is the same to training data
    all_data = feature_layers(query) # making sure feature order is the same to training data
    print('stacked Sentinel-2 dataset:\n',all_data)
    start_time = time.time()
    # predict classes of all data using the RF model
   # predicted = predict_xr(rf_model,all_data,proba=True,persist=False,clean=True).compute()
#     predicted = predict_xr(rf_models,all_data,clean=True).compute()
    predicted = predict_xr(rf_models,all_data,persist=False,clean=True).compute()
    print("%s seconds spent on predicting" % (time.time() - start_time))
    print('writing cog file...')
#     if predictions is None:
#         predictions=predicted.Predictions
#     else:
#         predictions=xr.merge([predictions,predicted.Predictions])

#     predictions=predictions.to_array()
#     stats.mode(predictions).mode.squeeze()
#     outname_prediction='Results/Land_cover_prediction_Rwanda_2021_using_2015_sheme2_manual_random_samples_AOI_'+str(i)+'.tif'
#     outname_prediction='Results/Land_cover_prediction_Rwanda_2021_using_2015_sheme2_stratified_samples_balanced_AOI_'+str(i)+'.tif'
    outname_prediction='Results/Land_cover_prediction_Rwanda_2021_using_2015_sheme2_stratified_samples_balanced_smaller_tile_'+str(i)+'.tif'
#     outname_prediction='Results/Land_cover_prediction_Rwanda_2021_using_2015_sheme2_stratified_samples_AOI_'+str(i)+'.tif'
#     outname_prediction='Results/Land_cover_prediction_Rwanda_2021_manual_number_sampling_AOI_'+str(i)+'.tif'
#     outname_prediction='Results/Land_cover_prediction_Rwanda_2021_random_sampling_AOI_'+str(i)+'.tif'
#     outname_prediction='Results/Land_cover_prediction_Rwanda_balanced_7_classes_2017_tile'+str(i)+'.tif'
#     outname_prediction='Results/Land_cover_prediction_Rwanda_feature_selection_2017_tile'+str(i)+'.tif'
    #outname_probability='Data/Land_cover_probability_2021_tile'+str(i)+'.tif'
    write_cog(predicted.Predictions, outname_prediction, overwrite=True)
    #write_cog(predicted.Probabilities, outname_probability, overwrite=True)

0,1
Client  Scheduler: tcp://127.0.0.1:41115  Dashboard: /user/whusggliuqx@gmail.com/proxy/8787/status,Cluster  Workers: 1  Cores: 62  Memory: 512.40 GB


bounding box for tile  1 : minx:  28.858086214740123 miny:  -2.6700728157274973 maxx:  29.12812104909637 maxy:  -2.398488769184952
stacked Sentinel-2 dataset:
 <xarray.Dataset>
Dimensions:       (y: 3009, x: 3009)
Coordinates:
    time          datetime64[ns] 2021-01-01
  * y             (y) float64 9.735e+06 9.735e+06 ... 9.705e+06 9.705e+06
  * x             (x) float64 7.066e+05 7.066e+05 ... 7.366e+05 7.367e+05
Data variables: (12/66)
    blue_0        (y, x) float32 dask.array<chunksize=(3009, 3009), meta=np.ndarray>
    blue_1        (y, x) float32 dask.array<chunksize=(3009, 3009), meta=np.ndarray>
    blue_2        (y, x) float32 dask.array<chunksize=(3009, 3009), meta=np.ndarray>
    blue_3        (y, x) float32 dask.array<chunksize=(3009, 3009), meta=np.ndarray>
    blue_4        (y, x) float32 dask.array<chunksize=(3009, 3009), meta=np.ndarray>
    blue_5        (y, x) float32 dask.array<chunksize=(3009, 3009), meta=np.ndarray>
    ...            ...
    NDVI_0        (y, x)

  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw),
  lambda x: nangeomedian_pcm(x, **kw)

7337.065276384354 seconds spent on predicting
writing cog file...
bounding box for tile  11 : minx:  29.3976906155535 miny:  -2.130118396768222 maxx:  29.667526496207802 maxy:  -1.8586319633404351
stacked Sentinel-2 dataset:
 <xarray.Dataset>
Dimensions:       (y: 3009, x: 3010)
Coordinates:
    time          datetime64[ns] 2021-01-01
  * y             (y) float64 9.794e+06 9.794e+06 ... 9.764e+06 9.764e+06
  * x             (x) float64 7.667e+05 7.667e+05 ... 7.968e+05 7.968e+05
Data variables: (12/66)
    blue_0        (y, x) float32 dask.array<chunksize=(3009, 3010), meta=np.ndarray>
    blue_1        (y, x) float32 dask.array<chunksize=(3009, 3010), meta=np.ndarray>
    blue_2        (y, x) float32 dask.array<chunksize=(3009, 3010), meta=np.ndarray>
    blue_3        (y, x) float32 dask.array<chunksize=(3009, 3010), meta=np.ndarray>
    blue_4        (y, x) float32 dask.array<chunksize=(3009, 3010), meta=np.ndarray>
    blue_5        (y, x) float32 dask.array<chunksize=(3009, 3010)

In [3]:
# ! gdal_merge.py -o Results/Land_cover_prediction_Rwanda_balanced_7_classes_2017_mosaic.tif -co COMPRESS=Deflate -ot Byte Results/Land_cover_prediction_Rwanda_balanced_7_classes_2017_tile*.tif
# ! gdal_merge.py -o Results/Land_cover_prediction_Rwanda_2021_random_sampling_AOIs_mosaic.tif -co COMPRESS=Deflate -ot Byte Results/Land_cover_prediction_Rwanda_2021_random_sampling_AOI_*.tif
# ! gdal_merge.py -o Results/Land_cover_prediction_Rwanda_2021_using_2015_sheme2_stratified_samples_balanced_AOIs_mosaic.tif -co COMPRESS=Deflate -ot Byte Results/Land_cover_prediction_Rwanda_2021_using_2015_sheme2_stratified_samples_balanced_AOI_*.tif
# ! gdal_merge.py -o Results/Land_cover_prediction_Rwanda_2021_using_2015_sheme2_stratified_samples_balanced_AOIs_mosaic.tif -co COMPRESS=Deflate -ot Byte Results/Land_cover_prediction_Rwanda_2021_using_2015_sheme2_stratified_samples_balanced_AOI_*.tif
! gdal_merge.py -o Results/Land_cover_prediction_Rwanda_2021_using_2015_sheme2_stratified_samples_balanced_smaller_mosaic.tif -co COMPRESS=Deflate -ot Byte Results/Land_cover_prediction_Rwanda_2021_using_2015_sheme2_stratified_samples_balanced_smaller_tile_*.tif

0...10...20...30...40...50...60...70...80...90...100 - done.
