## Import libaries

In [1]:
import json
import joblib
from odc.io.cgroups import get_cpu_quota

from dea_ml.core.feature_layer import merge_tile_ds, get_xy_from_task

from dea_ml.helpers.json_to_taskstr import extract_taskstr_from_geojson
from dea_ml.helpers.io import download_file
from dea_ml.core.africa_geobox import AfricaGeobox
from dea_ml.core.predict_from_feature import PredictFromFeature

from dea_ml.config.product_feature_config import FeaturePathConfig

import feature_layer_chad_
import warnings
warnings.filterwarnings("ignore")

## Analysis Params


In [2]:
# define the feature layer fucntion
from gm_mads_two_seasons import gm_mads_two_seasons
feature_layer_function = gm_mads_two_seasons 

#define the post_processing function
from post_processing import post_processing
post_process = post_processing

#define the chunks to use for dask
dask_chunks = {}

## Initiate configuration class

In [3]:
# import the configuration
config = FeaturePathConfig
print(config)

<class 'dea_ml.config.product_feature_config.FeaturePathConfig'>


## Open tiles and model

In [4]:
# open tiles geojson
tile_geojson_url= config.tiles_geojson #"s2_tiles_eastern_aez.geojson"
local_file = download_file(tile_geojson_url, '/tmp/tiles_geojson.json')
with open(local_file) as fh:
    tiles_geojson_dict = json.load(fh)

# Open model
ml_model_url = config.model_path #'gm_mads.joblib'
local_model_path = download_file(ml_model_url, '/tmp/ml-model-sample')
model = joblib.load(local_model_path)
model.n_jobs = round(get_cpu_quota()) #update model with cpus available on this machine
print(model)


RandomForestClassifier(criterion='entropy', max_features='log2',
                       n_estimators=400, n_jobs=2, random_state=1)


## Generate 'tasks' based on tiles


In [5]:
tasks = extract_taskstr_from_geojson(time_range='2019-01--P6M', geojson=tiles_geojson_dict)
len(tasks)

390

## Generate features for model


First generate a dictionary of geobox's for each tile

In [6]:
x, y = get_xy_from_task(tasks[0])
print(x,y)

geobox_dict = AfricaGeobox().geobox_dict


29 0


Pass the feature layer function into the create_feature function


In [7]:
subfld, geobox, data = feature_layer_chad_.create_features(x,
                                       y,
                                       config,
                                       geobox_dict,
                                       feature_func=gm_mads_two_seasons,
                                       dask_chunks=dask_chunks) 

print(data)

<xarray.Dataset>
Dimensions:        (x: 4800, y: 4800)
Coordinates:
    time           datetime64[ns] 2019-07-02T11:59:59.999999
  * y              (y) float64 9.599e+04 9.597e+04 9.595e+04 ... 50.0 30.0 10.0
  * x              (x) float64 2.784e+06 2.784e+06 ... 2.88e+06 2.88e+06
    spatial_ref    int32 6933
    band           int64 1
Data variables:
    blue_S1        (y, x) float32 dask.array<chunksize=(4800, 4800), meta=np.ndarray>
    green_S1       (y, x) float32 dask.array<chunksize=(4800, 4800), meta=np.ndarray>
    red_S1         (y, x) float32 dask.array<chunksize=(4800, 4800), meta=np.ndarray>
    nir_S1         (y, x) float32 dask.array<chunksize=(4800, 4800), meta=np.ndarray>
    swir_1_S1      (y, x) float32 dask.array<chunksize=(4800, 4800), meta=np.ndarray>
    swir_2_S1      (y, x) float32 dask.array<chunksize=(4800, 4800), meta=np.ndarray>
    red_edge_1_S1  (y, x) float32 dask.array<chunksize=(4800, 4800), meta=np.ndarray>
    red_edge_2_S1  (y, x) float32 dask.arra

## Run prediction

In [8]:
pff = PredictFromFeature(config, geobox_dict)

In [9]:
predicted = pff.predict_with_model(config, model, data)

   predicting...
   probabilities...


In [10]:
predicted

Unnamed: 0,Array,Chunk
Bytes,184.32 MB,7.68 MB
Shape,"(4800, 4800)","(200, 4800)"
Count,24 Tasks,24 Chunks
Type,int64,numpy.ndarray
"Array Chunk Bytes 184.32 MB 7.68 MB Shape (4800, 4800) (200, 4800) Count 24 Tasks 24 Chunks Type int64 numpy.ndarray",4800  4800,

Unnamed: 0,Array,Chunk
Bytes,184.32 MB,7.68 MB
Shape,"(4800, 4800)","(200, 4800)"
Count,24 Tasks,24 Chunks
Type,int64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,184.32 MB,7.68 MB
Shape,"(4800, 4800)","(200, 4800)"
Count,24 Tasks,24 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 184.32 MB 7.68 MB Shape (4800, 4800) (200, 4800) Count 24 Tasks 24 Chunks Type float64 numpy.ndarray",4800  4800,

Unnamed: 0,Array,Chunk
Bytes,184.32 MB,7.68 MB
Shape,"(4800, 4800)","(200, 4800)"
Count,24 Tasks,24 Chunks
Type,float64,numpy.ndarray


In [None]:
predicted.persist()

## Post processing

In [None]:
# there some minor issue with dask_ml functions to raise the warning.
predict = post_process(data, predicted, config, geobox)
prob = predicted.Probabilities

## save data
- result will be in: 
```/home/jovyan/wa/u23/data/crop_mask_eastern/v0.1.7/x+029/y+000/2019```

In [None]:
pff.save_data(subfld, predict, prob, geobox)

In [None]:
!ls /home/jovyan/wa/u23/data/crop_mask_eastern/v0.1.7/x+029/y+000/2019

## Either in this notebook or a seperate one, do the next steps: 

1. Database building
2. AWS syncing to S3

For command line operations in a notebook we can run commands using magic:
        
        e.g. !aws s3 mb s3://deafrica-data-dev-af --endpoint-url=htp://192.168.0.19:4566