<div align="center"; span style="color:#336699"><b><h2> Track Deforestation Dataset </h2></b></div>
<hr style="border:2px solid #0077b9;">
<br/>
<div style="text-align: center;font-size: 90%;">
    Helvécio B. Leal Neto, <sup><a href="https://orcid.org/0000-0002-7526-2094"><i class="fab fa-lg fa-orcid" style="color: #a6ce39"></i></a></sup>
    Alan J. P. Calheiros<sup><a href="https://orcid.org/0000-0002-7526-2094"><i class="fab fa-lg fa-orcid" style="color: #a6ce39"></i></a></sup>
    <br/><br/>
    National Institute for Space Research (INPE)
    <br/>
    Avenida dos Astronautas, 1758, Jardim da Granja, São José dos Campos, SP 12227-010, Brazil
    <br/><br/>
    Contact: <a href="mailto:helvecio.neto@inpe.br">helvecio.neto@inpe.br</a>, <a href="mailto:alan.calheiros@inpe.br">alan.calheiros@inpe.br</a>
    <br/><br/>
    Last Update: Apr 17, 2024
</div>

<br/>

<div style="text-align: justify;  margin-left: 25%; margin-right: 25%;">
<b>Abstract.</b> This Jupyter Notebook shows how to use a fortracc for track a Deforestation Dataset.
</div>    
<br/>
<div style="text-align: justify;  margin-left: 15%; margin-right: 15%;font-size: 75%; border-style: solid; border-color: #0077b9; border-width: 1px; padding: 5px;">
    <b>In this example, we will use fortracc to compute track of Deforescation Clusters.
</b>
    <div style="margin-left: 10px; margin-right: 10px; margin-top:10px">
      <p> Leal Neto, H.B.; Calheiros, A.J.P.;  Fortracc Algorithm. São José dos Campos, INPE, 2024. <a href="https://github.com/fortracc-project/" target="_blank"> Online </a>. </p>
    </div>
</div>

In [14]:
# !pip install --upgrade git+https://github.com/fortracc-project/pyfortracc.git@main#egg=pyfortracc

In [None]:
# # If you want run the library from the source code, uncomment the following lines
import sys
library_path = '../../'
sys.path.append(library_path)
%load_ext autoreload
%autoreload 2

In [None]:
import pyfortracc

In [None]:
# Run the following command to install the GDAL library
# !sudo apt install gdal-bin -y

In [None]:
import os
import pathlib
import threading
pathlib.Path('input').mkdir(parents=True, exist_ok=True)
url = 'https://storage.googleapis.com/mapbiomas-public/initiatives/brasil/collection_8/lclu/coverage/brasil_coverage_{}.tif'
box = '-55 -3.54 -54 -4.46' # lonmin latmax lonmax latmin
def download(command):
    os.system(command)
for year in range(1985, 2023):
    url2 = url.format(year)
    gdal_command = 'gdal_translate /vsicurl/'+url2+' -b 1 -projwin ' + box + ' -of GTiff -outsize 1024 1024 input/'+str(year)+'.tif'
    threading.Thread(target=download, args=(gdal_command,)).start()

In [None]:
import rasterio
import numpy as np
def read_function(path):
	with rasterio.open(path) as src:
		data = src.read(1)[::-1]
	# Natural mask based on class
	# https://brasil.mapbiomas.org/wp-content/uploads/sites/4/2023/08/EN__Codigos_da_legenda_Colecao_7.pdf
	natural_mask = [1,3,4,5,49,10,11,12,32,29,50,13]
	antropogenic_mask = [14,15,18,19,39,20,40,62,41,36,46,47,48,9,21]
	# Apply natural mask, everything else is antropogenic
	data = np.where(np.isin(data, antropogenic_mask), 1, 0)
	return data

In [None]:
name_list = {} # Set name_list dict
name_list['input_path'] = 'input/'
name_list['output_path'] = 'output/'
name_list['thresholds'] = [1]
name_list['min_cluster_size'] = [5]
name_list['operator'] = '=='
name_list['timestamp_pattern'] = '%Y.tif'
name_list['delta_time'] = 525960 # Minutes in a year
name_list['delta_tolerance'] = 60 * 24# Minutes in a day
name_list['min_overlap'] = 10
name_list['cluster_method'] = 'ndimage'
name_list['opt_correction'] = True
name_list['validation'] = True

In [None]:
pyfortracc.track(name_list, read_function, parallel=True)

In [None]:
name_list['x_dim'] = 1024
name_list['y_dim'] = 1024
name_list['lon_min'] = -55.0
name_list['lon_max'] = -54.0
name_list['lat_min'] = -4.46
name_list['lat_max'] = -3.54

In [None]:
pyfortracc.plot_animation(name_list, read_function, start_stamp='1985', end_stamp='2022', trajectory=False)

In [None]:
pyfortracc.plot_animation(name_list, read_function, start_stamp='1985', end_stamp='2022', 
                        zoom_region=[-54.6, -54.5, -3.8, -3.9], 
                        x_scale=0.001, y_scale=0.001, info=True, info_cols=['uid','status'],
                         trajectory=False, vector=True,  vector_color='white', vector_scale=1)

In [None]:
import pandas as pd
import glob
tracking_files = sorted(glob.glob(name_list['output_path'] + '/track/trackingtable/*.parquet'))
tracking_table = pd.concat(pd.read_parquet(f) for f in tracking_files)
display(tracking_table.head())

# Apply size of pixels, each pixel is 100m x 100m
tracking_table['area'] = tracking_table['size'] * 100

In [None]:
lifetime = tracking_table.groupby('uid')['lifetime'].max().to_frame()
lifetime = lifetime.sort_values(by='lifetime', ascending=False)
lifetime.head(5)

In [None]:
uid_list = lifetime.index[0:10].tolist()
tracking_table.loc[tracking_table['uid'].isin(uid_list)].groupby('uid')['area'].plot(legend=True, 
                                                                                     title='Area of the 10 largest clusters',
                                                                                     xlabel='Timestamp', 
                                                                                     ylabel='Area (m²)');