## Data Cube Classification from Open Data Cube

In [1]:
import datacube
import numpy as np
import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt

In [2]:
from datacube_classification.sits import datacube_get_sits

### Data Cube Loading

**data cube metadata**

In [3]:
dc = datacube.Datacube(app='datacube')

**showing avaliable products**

In [4]:
dc.list_products()

Unnamed: 0_level_0,name,description,label,time,lon,creation_time,instrument,lat,format,platform,product_type,crs,resolution,tile_size,spatial_dimensions
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2,CB4_64_16D_STK_1,This datacube was generated with all available...,,,,,AWFI,,GeoTiff,,cyclic_16_day,+proj=aea +lat_0=-12 +lon_0=-54 +lat_1=-2 +lat...,"(-64, 64)",,"(y, x)"


**Selecting CBERS-4/AWFI Product**

In [5]:
PRODUCT_NAME = "CB4_64_16D_STK_1"

**Load!**

In [6]:
cb4_64_16d_ftile = dc.load(PRODUCT_NAME, measurements = ['BAND13', 'BAND14', 'BAND15', 'BAND16', 'EVI', 'NDVI'],
                                            resolution = (64, -64), 
                                            longitude=(-46.611, -43.4596207498523), latitude=(-14.181, -12.2317166349184))

### Training a Random Forest Model

**load training samples**

In [7]:
gdf = gpd.read_file(
    "/vsicurl/https://brazildatacube.dpi.inpe.br/geo-knowledge-hub/bdc-article/training-samples/shp/bdc_paper_samples.shp"
)

**Organizing labels as int values**

In [8]:
gdf.loc[gdf["label"] == "Crop", "label"] = 0
gdf.loc[gdf["label"] == "Pasture", "label"] = 1
gdf.loc[gdf["label"] == "Natural", "label"] = 2

**Extract Time Series**

In [9]:
samples = datacube_get_sits(cb4_64_16d_ftile, gdf)
samples

index,BAND130,BAND131,BAND132,BAND133,BAND134,BAND140,BAND141,BAND142,BAND143,BAND144,...,EVI1,EVI2,EVI3,EVI4,NDVI0,NDVI1,NDVI2,NDVI3,NDVI4,label
0,0.1031,0.0772,0.0962,0.0801,0.0722,0.1741,0.1326,0.1675,0.0973,0.1086,...,0.1565,0.1712,0.2401,0.3956,0.2181,0.2353,0.2319,0.3459,0.5118,0
1,0.0937,0.0832,0.1005,0.0763,0.1183,0.1510,0.1263,0.1534,0.1049,0.1585,...,0.1815,0.2165,0.4567,0.2879,0.2620,0.2691,0.2842,0.5630,0.3355,0
2,0.1037,0.0763,0.0931,0.0694,0.0899,0.1808,0.1410,0.1714,0.1151,0.1496,...,0.1352,0.1419,0.3645,0.1807,0.2144,0.2039,0.1986,0.4857,0.2516,0
3,0.0874,0.0807,0.1104,0.0827,0.1090,0.1481,0.1449,0.1897,0.2862,0.1710,...,0.2307,0.1480,0.1292,0.3064,0.1925,0.3151,0.1937,0.2000,0.3555,0
4,0.1301,0.1013,0.1202,0.4291,0.1498,0.1991,0.1603,0.1796,0.8050,0.1863,...,0.1905,0.1893,-0.0242,0.3213,0.2089,0.2523,0.2336,0.0709,0.3180,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
917,0.0899,0.0803,0.1028,0.0212,0.0594,0.1499,0.1356,0.1590,0.0743,0.1064,...,0.2334,0.3189,0.5565,0.7240,0.2602,0.3240,0.3761,0.7931,0.7432,1
918,0.0923,0.0698,0.0907,0.0240,0.0702,0.1609,0.1242,0.1449,0.0642,0.1099,...,0.2179,0.3100,0.4948,0.5114,0.2739,0.3189,0.3848,0.7521,0.5966,1
919,0.0671,0.0609,0.0703,0.0173,0.0719,0.1220,0.1042,0.1263,0.0511,0.1208,...,0.2330,0.2947,0.4994,0.5358,0.2949,0.3479,0.4000,0.8045,0.5993,1
920,0.0735,0.0642,0.0847,0.0546,0.0772,0.1306,0.1117,0.1480,0.0631,0.1265,...,0.2485,0.2811,0.3239,0.3661,0.3117,0.3667,0.3630,0.5284,0.4600,1


**Training the model**

In [10]:
from sklearn.ensemble import RandomForestClassifier
from datacube_classification.models import train_sklearn_model

**Creating a model**

In [11]:
rfor_1000 = rfor_1000 = RandomForestClassifier(n_estimators=1000)

**Training!**

In [12]:
rfor_1000 = train_sklearn_model(rfor_1000, samples)

### Export trained model

In [13]:
from joblib import dump

In [14]:
dump(rfor_1000, "rfor_1000_cb4_6bands.joblib")

['rfor_1000_cb4_6bands.joblib']