## Import libaries

In [1]:
#!pip uninstall dea_ml -y

In [2]:
#!pip install -e dea_ml

In [11]:
import json
import joblib
from odc.io.cgroups import get_cpu_quota

from dea_ml.core.feature_layer import create_features, get_xy_from_task
from dea_ml.helpers.json_to_taskstr import extract_taskstr_from_geojson
from dea_ml.helpers.io import download_file
from dea_ml.core.africa_geobox import AfricaGeobox
from dea_ml.core.predict_from_feature import PredictContext, predict_with_model
from dea_ml.config.product_feature_config import FeaturePathConfig

import warnings
warnings.filterwarnings("ignore")

## Analysis Params


In [12]:
# define the feature layer fucntion
from gm_mads_two_seasons import gm_mads_two_seasons
feature_layer_function = gm_mads_two_seasons 

#define the post_processing function
from post_processing import post_processing
post_process = post_processing

#define the chunks to use for dask
dask_chunks = {'x':1500, 'y':1500}


## Initiate configuration class

In [13]:
# import the configuration
config = FeaturePathConfig
config

dea_ml.config.product_feature_config.FeaturePathConfig

## Open tiles and model

In [20]:
config.tiles_geojson = '/home/jovyan/wa/u23/crop-mask/testing/eastern_cropmask/data/s2_tiles_eastern_aez.geojson'

In [21]:
# open tiles geojson
import fsspec

tile_geojson_url= config.tiles_geojson 
# local_file = download_file(tile_geojson_url, '/tmp/tiles_geojson.json')

with fsspec.open(tile_geojson_url) as fh:
    tiles_geojson_dict = json.load(fh)

In [22]:
config.model_path = '/home/jovyan/wa/u23/crop-mask/testing/eastern_cropmask/results/gm_mads_two_seasons_ml_model_20210301.joblib'

In [23]:
# Open model
ml_model_url = config.model_path
with fsspec.open(ml_model_url) as fh:
    model = joblib.load(fh)
model.n_jobs = round(get_cpu_quota()) #update model with cpus available on this machine
print(model)

RandomForestClassifier(criterion='entropy', max_features='log2',
                       n_estimators=400, n_jobs=31, random_state=1)


## Generate 'tasks' based on tiles


In [24]:
tasks = extract_taskstr_from_geojson(time_range='2019-01--P6M', geojson=tiles_geojson_dict)
len(tasks)

390

## Generate features for model


First generate a dictionary of geobox's for each tile

In [25]:
x, y = get_xy_from_task(tasks[0])
print(x,y)

geobox_dict = AfricaGeobox().geobox_dict


29 0


Pass the feature layer function into the `create_features` function

**Note:** This will take a couple of minutes to run.


In [26]:
subfld, geobox, data = create_features(x,
                                       y,
                                       config,
                                       geobox_dict,
                                       feature_func=gm_mads_two_seasons,
                                       dask_chunks=dask_chunks) 

print(data)

<xarray.Dataset>
Dimensions:        (x: 4800, y: 4800)
Coordinates:
    time           datetime64[ns] 2019-07-02T11:59:59.999999
  * y              (y) float64 9.599e+04 9.597e+04 9.595e+04 ... 50.0 30.0 10.0
  * x              (x) float64 2.784e+06 2.784e+06 ... 2.88e+06 2.88e+06
    spatial_ref    int32 6933
    band           int64 1
Data variables:
    blue_S1        (y, x) float32 dask.array<chunksize=(1500, 1500), meta=np.ndarray>
    green_S1       (y, x) float32 dask.array<chunksize=(1500, 1500), meta=np.ndarray>
    red_S1         (y, x) float32 dask.array<chunksize=(1500, 1500), meta=np.ndarray>
    nir_S1         (y, x) float32 dask.array<chunksize=(1500, 1500), meta=np.ndarray>
    swir_1_S1      (y, x) float32 dask.array<chunksize=(1500, 1500), meta=np.ndarray>
    swir_2_S1      (y, x) float32 dask.array<chunksize=(1500, 1500), meta=np.ndarray>
    red_edge_1_S1  (y, x) float32 dask.array<chunksize=(1500, 1500), meta=np.ndarray>
    red_edge_2_S1  (y, x) float32 dask.arra

## Run prediction

**Note:** This will take a couple of minutes to run as the calculations are computed and brought into memory

In [27]:
pff = PredictContext(config, geobox_dict)

In [28]:
predicted = predict_with_model(config, model, data).persist()

   predicting...
   probabilities...


In [29]:
print(predicted)

<xarray.Dataset>
Dimensions:        (x: 4800, y: 4800)
Coordinates:
  * x              (x) float64 2.784e+06 2.784e+06 ... 2.88e+06 2.88e+06
  * y              (y) float64 9.599e+04 9.597e+04 9.595e+04 ... 50.0 30.0 10.0
    spatial_ref    int32 0
Data variables:
    Predictions    (y, x) int64 dask.array<chunksize=(156, 4800), meta=np.ndarray>
    Probabilities  (y, x) float64 dask.array<chunksize=(156, 4800), meta=np.ndarray>
Attributes:
    grid_mapping:  spatial_ref


## Post processing

In [30]:
# there some minor issue with dask_ml functions to raise the warning.
predict = post_process(data, predicted, config, geobox)
prob = predicted.Probabilities



In [31]:
print(prob)

<xarray.DataArray 'Probabilities' (y: 4800, x: 4800)>
dask.array<reshape, shape=(4800, 4800), dtype=float64, chunksize=(157, 4800), chunktype=numpy.ndarray>
Coordinates:
  * x            (x) float64 2.784e+06 2.784e+06 2.784e+06 ... 2.88e+06 2.88e+06
  * y            (y) float64 9.599e+04 9.597e+04 9.595e+04 ... 50.0 30.0 10.0
    spatial_ref  int32 0
Attributes:
    grid_mapping:  spatial_ref


## save data
- result will be in: 
```/home/jovyan/wa/u23/data/crop_mask_eastern/v0.1.7/x+029/y+000/2019```

In [13]:
# pff.save_data(subfld, predict, prob, geobox)

In [14]:
# !ls /home/jovyan/wa/u23/data/crop_mask_eastern/v0.1.7/x+029/y+000/2019

## Either in this notebook or a seperate one, do the next steps: 

1. Database building
2. AWS syncing to S3

For command line operations in a notebook we can run commands using magic:
        
        e.g. !aws s3 mb s3://deafrica-data-dev-af --endpoint-url=htp://192.168.0.19:4566