# Open farm and simple diagnostic

## what to do(s)?
1. Import as we need - dar os updates
2. Abrir CAR-Estado
3. Pegar a fazenda (pode ser pelo CAR)
4. Pegar a bbox
5. Carregar Imagens Sentinel 2 ou Landsat 8-9
6. Aplicar mask da fazenda no xarray - (deafrica-sandbox-notebooks/Use_cases/Lake_baringo_grazing
/Vegetation_phenology_1D.ipynb)

## Load packages

In [None]:
import time
start = time.time()

In [None]:
%%time
# from specific examples
import matplotlib.pyplot as plt

import rioxarray

import numpy as np
import xarray as xr
import geopandas as gpd

import rasterio.features
import stackstac
import pystac_client
import planetary_computer

import xrspatial.multispectral as ms



In [None]:
import subprocess
import pkg_resources
import sys

required = {'rasterstats','odc-ui'}
installed = {pkg.key for pkg in pkg_resources.working_set}
missing = required - installed

if missing:
    python = sys.executable
    subprocess.check_call([python, '-m', 'pip', 'install', *missing], stdout=subprocess.DEVNULL)

In [None]:
%%time
# From DEA
import sys
sys.path.append('/home/jovyan/PlanetaryComputerExamples/grasspace/deafrica-sandbox-notebooks/Tools/deafrica_tools/')

from plotting import display_map, rgb
from spatial import xr_rasterize
from bandindices import calculate_indices


## Área de análise
É aqui que definimos o que será analisado. Ainda está em fase bem inicial.

In [None]:
path = '/home/jovyan/PlanetaryComputerExamples/'

# se pq == True, vamos por piquetes (ai especifica lah mesmo)
pq = True
tl = False
lm = False

datetime = "2021-08-01/2023-08-24"
pace = 4

In [None]:
if pq: 
    # piquetes
    piquetes = gpd.read_file( path + 'vetorial/FAZENDAS/fazenda_uniguiri.gpkg', layer = 'piquetes')

    #filtering piquetes?
    R = 'R8_'
    piquetes = piquetes[piquetes['R'] == R]

    piquetes.plot(column = 'R', legend = True, figsize=(10,8));
    piquetes.head();
    LIM = piquetes;

In [None]:
if tl: #tl
    # talhoes
    talhoes = gpd.read_file( path + 'vetorial/FAZENDAS/iacanga_22_23.gpkg', layer = 'talhoes')

    #filtering talhoes?
    R = '032 APARECIDA' 
    talhoes = talhoes[talhoes['LAYER'] == R]

    talhoes.plot(column = 'LAYER', legend = True, figsize=(10,8));
    talhoes.head();
    LIM = talhoes;

In [None]:
# Limites
if lm:
    limites = gpd.read_file( path + 'vetorial/FAZENDAS/fazenda_uniguiri.gpkg', layer = 'limites' )
    limites.plot('parte');
    limites.head();
    limites = limites[limites['parte'].isin([1,2,3])];
        
    LIM = limites
    print('using limites bounding box')
    
else:
    print('using piquetes or talhoes bouding box')

In [None]:
def get_lims(gdf):
    limites = gdf
    bbox = (limites.bounds.minx.min(),
            limites.bounds.miny.min(),
            limites.bounds.maxx.max(),
            limites.bounds.maxy.max()
           )

    lat_range = (bbox[1],bbox[3])
    lon_range = (bbox[0], bbox[2])
    print('got bbox, lat_range, lon_range')
    return bbox, lat_range, lon_range

# using limites OR piquetes

bbox, lat_range, lon_range = get_lims(LIM)
bbox, lat_range, lon_range

In [None]:
# Display from lat and lon range
display_map(x=lon_range, y=lat_range, zoom_bias = 0.3)

## Get images

### About indices 
''index : str or list of strs
    A string giving the name of the index to calculate or a list of
    strings giving the names of the indices to calculate:
    
- ``'ASI'``  (Artificial Surface Index, Yongquan Zhao & Zhe Zhu 2022)
- ``'AWEI_ns'`` (Automated Water Extraction Index, no shadows, Feyisa 2014)
- ``'AWEI_sh'`` (Automated Water Extraction Index, shadows, Feyisa 2014)
- ``'BAEI'`` (Built-Up Area Extraction Index, Bouzekri et al. 2015)
- ``'BAI'`` (Burn Area Index, Martin 1998)
- ``'BSI'`` (Bare Soil Index, Rikimaru et al. 2002)
- ``'BUI'`` (Built-Up Index, He et al. 2010)
- ``'CMR'`` (Clay Minerals Ratio, Drury 1987)
- ``'ENDISI'`` (Enhanced Normalised Difference for Impervious Surfaces Index, Chen et al. 2019)
- ``'EVI'`` (Enhanced Vegetation Index, Huete 2002)
- ``'FMR'`` (Ferrous Minerals Ratio, Segal 1982)
- ``'IOR'`` (Iron Oxide Ratio, Segal 1982)
- ``'LAI'`` (Leaf Area Index, Boegh 2002)
- ``'MBI'`` (Modified Bare Soil Index, Nguyen et al. 2021)
- ``'MNDWI'`` (Modified Normalised Difference Water Index, Xu 1996)
- ``'MSAVI'`` (Modified Soil Adjusted Vegetation Index, Qi et al. 1994)
- ``'NBI'`` (New Built-Up Index, Jieli et al. 2010)
- ``'NBR'`` (Normalised Burn Ratio, Lopez Garcia 1991)
- ``'NDBI'`` (Normalised Difference Built-Up Index, Zha 2003)
- ``'NDCI'`` (Normalised Difference Chlorophyll Index, Mishra & Mishra, 2012)
- ``'NDMI'`` (Normalised Difference Moisture Index, Gao 1996)
- ``'NDSI'`` (Normalised Difference Snow Index, Hall 1995)
- ``'NDTI'`` (Normalised Difference Turbidity Index, Lacaux et al. 2007)
- ``'NDVI'`` (Normalised Difference Vegetation Index, Rouse 1973)
- ``'NDWI'`` (Normalised Difference Water Index, McFeeters 1996)
- ``'SAVI'`` (Soil Adjusted Vegetation Index, Huete 1988)
- ``'TCB'`` (Tasseled Cap Brightness, Crist 1985)
- ``'TCG'`` (Tasseled Cap Greeness, Crist 1985)
- ``'TCW'`` (Tasseled Cap Wetness, Crist 1985)
- ``'WI'`` (Water Index, Fisher 2016)

In [None]:
%%time
# create stac client
stac = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)

### get Sentinel 2 L2A Data
colocar as especificidades aqui

In [None]:
%%time
# SENTINEL 2
# Get image cube
search = stac.search(
    bbox=bbox,
    datetime=datetime, #partir de 2016 para tratar os dados ruins
    collections=["sentinel-2-l2a"],
    query={"eo:cloud_cover": {"lt": 40}},
)

items = search.item_collection()
print(len(items));
print(items[0].assets.keys());

In [None]:
#items[0]

In [None]:
# For Sentinel 2
data = (
    stackstac.stack(
        items,
        assets = ["B08", "B04", "B03","B02"],    #["B12","B11","B08","B08","B05","B04","B03","B02"],  
        chunksize=1024,
        resolution=0.0001, #0.0001
        epsg=4326,
        bounds=bbox,
        properties = True,
        fill_value = np.nan,
    )
    .where(lambda x: x > 0, other=np.nan)  # sentinel-2 uses 0 as nodata
    .assign_coords(band=lambda x: x.common_name.rename("band"))  # use common names
)

# fix xy names
data = data.rename({'x': 'longitude','y': 'latitude'})

data

### Mask dataset com fazenda

In [None]:
#create mask versao dataarray
mask = xr_rasterize(LIM,data,
                    # x_dim='x',
                    # y_dim='y',
                   export_tiff='masked2.tiff') #,x_dim='longitude',y_dim='latitude'

# #mask data
data = data.where(mask)

# #convert to float 32 to conserve memory
data= data.astype(np.float32)

In [1]:
# calcula indices
ds_ = data.to_dataset(dim='band')

# os indices
indices = ['LAI', 'NDVI', "MSAVI", "SAVI","EVI"]
ds = calculate_indices(ds_, 
                       index= indices, 
                       satellite_mission='s2', 
                       drop=True);

NameError: name 'data' is not defined

In [None]:
%%time
ds.compute();

#### Visualizing RGB images sequence

In [None]:
%%time
rgb(ds_, col='time', size=6, col_wrap=3)

#### Plot Indices for specific dates

In [None]:
%%time
# get vmin and vmax for each variable considering the entire time-series
# that makes the color comparison fair!
mms = {}
keys = indices
for i in keys:
    line = np.nanquantile(ds[i].values,[0.01,0.99])
    mms[i] = line
print(mms)

In [None]:
%%time

for t in range(0,len(ds.time),pace):
    
    date = str(ds.time[t].values).split('T')[0]
    for i in indices[0:1]:
        ds[i].isel(time=[t]).plot(cmap = 'YlGn', vmin = mms[i][0], vmax = mms[i][1], figsize=(8,5));
        plt.title(f'{i} for {date}');
        plt.grid();
        plt.show();
        plt.close();

        flat = ds[i].isel(time=[t]).values.flatten()
        flat = np.where((flat> 3.5) | (flat<0), np.nan, flat)
        plt.figure(figsize=(6,2))
        plt.hist(flat,20);
        plt.title(f'{i} for {date}, sum = {np.nansum(flat):.0f} quantiles .05, .5, .95 = {np.round(np.nanquantile(flat,[.05,.5,.95]),1)}');
        plt.show();
        plt.close()
        print('\n');

In [None]:
end = time.time()
print(f'{(end - start):.0f} seconds')

### Monthly z-scores


In [None]:
i = 'LAI'

dsmean = ds[i].chunk(dict(time=-1)).interpolate_na(dim='time').groupby('time.month').mean()
dsstd = ds[i].chunk(dict(time=-1)).interpolate_na(dim='time').groupby('time.month').std()

In [None]:
%%time

mm_mean = np.nanquantile(dsmean.values,[0.01,0.99])

mm_std = np.nanquantile(dsstd.values,[0.01,0.99])
mm_std


In [None]:
# Monthly means of i
dsmean.plot(col='month', cmap = 'RdBu', 
            vmin = mm_mean[0], vmax = mm_mean[1], col_wrap=3)

In [None]:
# Monthly stds of i
# the redder, the more deviant
dsstd.plot(col='month', cmap = 'RdBu_r', 
            vmin = mm_std[0], vmax = mm_std[1], col_wrap=3)

### TO-DO -> CALCULATE THE FUCKING Z-SCORE
NAO ENTENDI A PARADA DOS MES , mas eu vou chegar lah

In [None]:
dsmonth = ds['LAI'].chunk(dict(time=-1)).interpolate_na(dim='time').resample(time='M').mean(dim='time')

In [None]:
end = time.time()
print(f'{(end - start):.0f} seconds')