In [1]:
%matplotlib inline
import os
import datacube
import warnings
import time
import numpy as np
from scipy import stats
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from odc.io.cgroups import get_cpu_quota
from odc.algo import xr_geomedian
import xarray as xr
from joblib import load
from deafrica_tools.classification import predict_xr
from deafrica_tools.dask import create_local_dask_cluster
from deafrica_tools.datahandling import load_ard
from deafrica_tools.bandindices import calculate_indices
from deafrica_tools.plotting import display_map
from datacube.utils.cog import write_cog

ncpus=round(get_cpu_quota())
print('ncpus = '+str(ncpus))

# file paths and attributes
lesotho_tiles_shp='Data/Mozambique_tiles.shp'
# lesotho_tiles_shp='Data/Mozambique_tiles_smaller.shp'
# rf_model_path='Results/RF_model_Mozambique_2017.joblib'
rf_model_path='Results/RF_model_feature_selection_Mozambique_2021.joblib'
class_name = 'LC_Class_I' # class label in integer format
crs='epsg:32736' # WGS84/UTM Zone 36S
fill_nan_value=-999 # value to replace nans in query results
## get Lesotho bounding box and reproject
#lesotho_polygon=gpd.read_file(lesotho_shp).to_crs(crs)
#[x_min,y_min,x_max,y_max]=lesotho_polygon.total_bounds
# get bounding boxes of tiles covering Lesotho
lesotho_tiles=gpd.read_file(lesotho_tiles_shp).to_crs('epsg:4326')
tile_bboxes=lesotho_tiles.bounds
print('tile boundaries for Mozambique: \n',tile_bboxes)

# load trained classifier
rf_models = load(rf_model_path).set_params(n_jobs=1)
print('loaded random forest model:\n',rf_models)

variables=['nir_2_4', 'nir_1_3', 'swir_1_3', 'swir_2_2', 'NDVI_3', 'swir_2_5',
       'swir_1_2', 'red_edge_1_1', 'swir_2_3', 'red_edge_1_2']
measurements = ['red','red_edge_1','nir_1','nir_2','swir_1','swir_2']
# measurements = ['blue','green','red','red_edge_1','red_edge_2', 'red_edge_3','nir_1','nir_2','swir_1','swir_2']

# define a function to feature layers
def feature_layers(query): 
    #connect to the datacube
    dc = datacube.Datacube(app='feature_layers')
    ds = load_ard(dc=dc,
                  products=['s2_l2a'],
                  group_by='solar_day',
                  verbose=False,
                  #mask_filters=[("opening", 2)], # morphological opening by 2 pixels to remove small masked regions
                  **query)
#     ds_index = calculate_indices(ds,index=['NDVI'],drop=False,collection='s2')
    ds_index = calculate_indices(ds,index=['NDVI'],drop=False,satellite_mission='s2')
    # interpolate nodata using mean of previous and next observation
#     ds=ds.interpolate_na(dim='time',method='linear',use_coordinate=False)
    # calculate geomedians within each two-month interval
    ds_geomedian=ds_index.resample(time='2MS').map(xr_geomedian)
#     replace nan with a value
#     ds=ds.fillna(fill_nan_value)
    # stack multi-temporal measurements and rename them
    n_time=ds_geomedian.dims['time']
    list_measurements=list(ds_geomedian.keys())
    list_stack_measures=[]
#     ds_stacked=None
    for j in range(len(list_measurements)):
        for k in range(n_time):
            variable_name=list_measurements[j]+'_'+str(k)
            if variable_name in variables:
                measure_single=ds_geomedian[list_measurements[j]].isel(time=k).rename(variable_name)
                list_stack_measures.append(measure_single)
#                 if ds_stacked is None:
#                     ds_stacked=measure_single
#                 else:
#                     ds_stacked=xr.merge([ds_stacked,measure_single],compat='override')
    ds_stacked=xr.merge(list_stack_measures,compat='override')
    return ds_stacked[variables] # keep variables the same order as input to train classifier
#     return ds_stacked

ncpus = 62
tile boundaries for Mozambique: 
           minx       miny       maxx       maxy
0    30.167754 -14.981332  30.637192 -14.524515
1    30.161718 -15.432815  30.632311 -14.975851
2    30.155481 -15.884279  30.627263 -15.427167
3    30.149040 -16.335722  30.622046 -15.878463
4    30.632123 -14.985685  31.100772 -14.529646
..         ...        ...        ...        ...
393  40.357003 -14.874461  40.832744 -14.411274
394  40.372051 -15.322573  40.849255 -14.858936
395  40.387613 -15.770651  40.866317 -15.306564
396  40.707903 -11.276938  41.173371 -10.817135
397  40.816588 -14.859122  41.292701 -14.395539

[398 rows x 4 columns]
loaded random forest model:
 RandomForestClassifier(max_features=None, max_samples=0.5, min_samples_leaf=2,
                       min_samples_split=8, n_estimators=161, n_jobs=1,
                       random_state=1)


In [None]:
# Set up a dask cluster
create_local_dask_cluster(n_workers=3)
# create_local_dask_cluster(n_workers=1,threads_per_worker=ncpus)
# predictions=None
# iterate through each tile
for i in range(393,len(tile_bboxes)):
# for i in range(36,len(tile_bboxes)):
# for i in [7]:
#     for j in len(rf_models):
#         rf_model=rf_models[j]
    minx,miny,maxx,maxy=tile_bboxes.iloc[i]
    print('bounding box for tile ',i,': minx: ',minx,'miny: ',miny,'maxx: ',maxx,'maxy: ',maxy)
    # load Sentinel-2 data
    query = {
        'x': (minx,maxx),
        'y': (miny,maxy),
        'time': ('2021-01', '2021-12'),
        'measurements': measurements,
        'resolution': (-10, 10),
        'crs':'epsg:4326',
        'output_crs':crs,
#         'dask_chunks' : {'x':-1, 'y':-1}
        'dask_chunks' : {'x':1700, 'y':1700}
    }
    #calculate features
#     all_data = feature_layers(query)[var_names] # making sure feature order is the same to training data
    all_data = feature_layers(query) # making sure feature order is the same to training data
    print('stacked Sentinel-2 dataset:\n',all_data)
    start_time = time.time()
    # predict classes of all data using the RF model
   # predicted = predict_xr(rf_model,all_data,proba=True,persist=False,clean=True).compute()
#     predicted = predict_xr(rf_models,all_data,clean=True).compute()
    predicted = predict_xr(rf_models,all_data,persist=False,clean=True).compute()
    print("%s seconds spent on predicting" % (time.time() - start_time))
    print('writing cog file...')
#     if predictions is None:
#         predictions=predicted.Predictions
#     else:
#         predictions=xr.merge([predictions,predicted.Predictions])

#     predictions=predictions.to_array()
#     stats.mode(predictions).mode.squeeze()
#     outname_prediction='Results/Land_cover_prediction_GEE_2021_smaller_tile_'+str(i)+'.tif'
    outname_prediction='Results/Land_cover_prediction_feature_selection_Mozambique_2021_tile'+str(i)+'.tif'
#     outname_prediction='Results/Land_cover_prediction_feature_selection_Mozambique_2017_smaller_tile'+str(i)+'.tif'
    #outname_probability='Data/Land_cover_probability_2021_tile'+str(i)+'.tif'
    write_cog(predicted.Predictions, outname_prediction, overwrite=True)
    #write_cog(predicted.Probabilities, outname_probability, overwrite=True)

0,1
Client  Scheduler: tcp://127.0.0.1:33851  Dashboard: /user/whusggliuqx@gmail.com/proxy/8787/status,Cluster  Workers: 3  Cores: 63  Memory: 512.40 GB


bounding box for tile  393 : minx:  40.35700268682015 miny:  -14.874461321437146 maxx:  40.83274380128676 maxy:  -14.411273926242202
stacked Sentinel-2 dataset:
 <xarray.Dataset>
Dimensions:       (y: 5338, x: 5338)
Coordinates:
    time          datetime64[ns] 2021-03-01
  * y             (y) float64 8.394e+06 8.394e+06 ... 8.341e+06 8.341e+06
  * x             (x) float64 1.293e+06 1.293e+06 ... 1.347e+06 1.347e+06
Data variables:
    nir_2_4       (y, x) float32 dask.array<chunksize=(1700, 1700), meta=np.ndarray>
    nir_1_3       (y, x) float32 dask.array<chunksize=(1700, 1700), meta=np.ndarray>
    swir_1_3      (y, x) float32 dask.array<chunksize=(1700, 1700), meta=np.ndarray>
    swir_2_2      (y, x) float32 dask.array<chunksize=(1700, 1700), meta=np.ndarray>
    NDVI_3        (y, x) float32 dask.array<chunksize=(1700, 1700), meta=np.ndarray>
    swir_2_5      (y, x) float32 dask.array<chunksize=(1700, 1700), meta=np.ndarray>
    swir_1_2      (y, x) float32 dask.array<chunksize

In [1]:
! gdal_merge.py -o Results/Land_cover_prediction_feature_selection_Mozambique_2021_mosaic.tif -co COMPRESS=Deflate -ot Byte Results/Land_cover_prediction_feature_selection_Mozambique_2021_tile*.tif

0...10...20...30...40...50...60...70...80...90...100 - done.
