This notebook loads in satellite data, predict national land cover of Rwanda using the pre-trained random forest model saved in previous notebook.

### load packages

In [None]:
%matplotlib inline
import os
import datacube
import warnings
import time
import numpy as np
from scipy import stats
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from odc.algo import xr_geomedian
import xarray as xr
from joblib import load
from deafrica_tools.classification import predict_xr
from deafrica_tools.dask import create_local_dask_cluster
from deafrica_tools.datahandling import load_ard
from deafrica_tools.bandindices import calculate_indices
from deafrica_tools.plotting import display_map
from datacube.utils.cog import write_cog

### set parameters

In [None]:
# file paths and attributes

rwanda_tiles_shp='Data/Rwanda_tiles_epsg32736_smaller.shp' # tiles covering the entire country

rf_model_path='Results/RF_model_Rwanda.joblib' # trained random forest model

class_name = 'LC_Class_I' # class label in integer format
crs='epsg:4326' # input crs: WGS84
output_crs='epsg:32735' # output crs: WGS84/UTM Zone 35S
# band mesurements for query
measurements = ['blue','green','red','red_edge_1','red_edge_2', 'red_edge_3','nir_1','nir_2','swir_1','swir_2']

### load data

In [None]:
# load and get bounding boxes of tiles covering Rwanda
rwanda_tiles=gpd.read_file(rwanda_tiles_shp).to_crs(crs)
tile_bboxes=rwanda_tiles.bounds
print('tile boundaries for Rwanda: \n',tile_bboxes)

# load trained model
rf_model = load(rf_model_path).set_params(n_jobs=1)
print('loaded random forest models:\n',rf_model)

### define feature layer function - same as features for training data

In [None]:
# define a function to define features
def feature_layers(query): 
    #connect to the datacube
    dc = datacube.Datacube(app='feature_layers')
    # query bands
    ds = load_ard(dc=dc,
                  products=['s2_l2a'],
                  group_by='solar_day',
                  verbose=False,
                  **query)
    # calculate NDVI
    ds = calculate_indices(ds,index=['NDVI'],drop=False,satellite_mission='s2')
    # calculate geomedians within each two-month interval
    ds=ds.resample(time='2MS').map(xr_geomedian)

    # stack multi-temporal measurements and rename them
    n_time=ds.dims['time']
    list_measurements=list(ds.keys())
    list_stack_measures=[]
    for j in range(len(list_measurements)):
        for k in range(n_time):
            variable_name=list_measurements[j]+'_'+str(k)
            measure_single=ds[list_measurements[j]].isel(time=k).rename(variable_name)
            list_stack_measures.append(measure_single)
    ds_stacked=xr.merge(list_stack_measures,compat='override')
    return ds_stacked

### set up dask cluster for parallel processing

In [None]:
# Set up a dask cluster
create_local_dask_cluster(n_workers=1)

### run prediction for all tiles and export geotiffs

In [None]:
# loop through all tiles to predict land cover across the country
for i in range(0,len(tile_bboxes)):
    # get bounding box
    minx,miny,maxx,maxy=tile_bboxes.iloc[i]
    print('bounding box for tile ',i,': minx: ',minx,'miny: ',miny,'maxx: ',maxx,'maxy: ',maxy)

    # load Sentinel-2 data
    query = {
        'x': (minx,maxx),
        'y': (miny,maxy),
        'time': ('2021-01', '2021-12'),
        'measurements': measurements,
        'resolution': (-10, 10),
        'crs':crs,
        'output_crs':output_crs,
        'dask_chunks' : {'x':1000, 'y':1000} # update here as needed depending on tile size and sandbox instance
    }

    # calculate features
    all_data = feature_layers(query) 
    print('stacked Sentinel-2 dataset:\n',all_data)

    # timing how long it takes for the prediction
    start_time = time.time() 
    predicted = predict_xr(rf_model,all_data,proba=True,persist=False,clean=True).compute() # predict classes of all data using the RF model
    print("%s seconds spent on predicting" % (time.time() - start_time))  # print time spent on prediction
    
    # write final prediction as cog file
    print('writing cog file...')
    outname_prediction='Results/Land_cover_prediction_Rwanda_2021_tile_'+str(i)+'.tif'
    outname_probability='Results/Land_cover_probability_Rwanda_2021_tile_'+str(i)+'.tif'
    write_cog(predicted.Predictions, outname_prediction, overwrite=True)
    write_cog(predicted.Probabilities.astype(int), outname_probability, overwrite=True)

### do mosaic

In [None]:
! gdal_merge.py -o Results/Land_cover_prediction_Rwanda_2021_tiles_mosaic.tif -co COMPRESS=Deflate -ot Byte Results/Land_cover_prediction_Rwanda_2021_tile_*.tif

In [None]:
! gdal_merge.py -o Results/Land_cover_probability_Rwanda_2021_tiles_mosaic.tif -co COMPRESS=Deflate -ot Byte Results/Land_cover_probability_Rwanda_2021_tile_*.tif