<div align="center"; span style="color:#336699"><b><h2> Track Global Precipitation JAXA GSMAP </h2></b></div>
<hr style="border:2px solid #0077b9;">
<br/>
<div style="text-align: center;font-size: 90%;">
    Helvécio B. Leal Neto, <sup><a href="https://orcid.org/0000-0002-7526-2094"><i class="fab fa-lg fa-orcid" style="color: #a6ce39"></i></a></sup>
    Alan J. P. Calheiros<sup><a href="https://orcid.org/0000-0002-7526-2094"><i class="fab fa-lg fa-orcid" style="color: #a6ce39"></i></a></sup>
    <br/><br/>
    National Institute for Space Research (INPE)
    <br/>
    Avenida dos Astronautas, 1758, Jardim da Granja, São José dos Campos, SP 12227-010, Brazil
    <br/><br/>
    Contact: <a href="mailto:helvecio.neto@inpe.br">helvecio.neto@inpe.br</a>, <a href="mailto:alan.calheiros@inpe.br">alan.calheiros@inpe.br</a>
    <br/><br/>
    Last Update: Jun 16, 2024
</div>

<br/>

<div style="text-align: justify;  margin-left: 25%; margin-right: 25%;">
<b>Abstract.</b> This Jupyter Notebook shows how to use a pyfortracc for track a global precipitation system using JAXA GSMAP data.:
</div>    
<br/>
<div style="text-align: justify;  margin-left: 15%; margin-right: 15%;font-size: 75%; border-style: solid; border-color: #0077b9; border-width: 1px; padding: 5px;">
    <b>In this example, we will use pyfortracc to compute track of precipitating systems over the globe and explore the output data after the algorithm workflow.
</b>
    <div style="margin-left: 10px; margin-right: 10px; margin-top:10px">
      <p> Leal Neto, H.B.; Calheiros, A.J.P.;  pyForTraCC Algorithm. São José dos Campos, INPE, 2024. <a href="https://github.com/fortracc-project/pyfortracc" target="_blank"> Online </a>. </p>
    </div>
</div>

### Schedule
 [1. Goals and the Data](#goals)<br>
 [2. Setup Env](#setup)<br>
 [3. Parameters: Name_list](#namelist)<br>
 [4. Track Visualization](#visualization)<br>
 [5. The Tracking Table](#tracktable)<br>
 [6. Post Processing](#post)<br>

In [None]:
!pip3 install --upgrade earthengine-api xee> /dev/null 2>&1
!pip3 install --upgrade git+https://github.com/fortracc/pyfortracc.git@main#egg=pyfortracc &> /dev/null

In [1]:
import ee
import xarray as xr

import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm

import logging
logging.getLogger('googleapiclient.http').setLevel(logging.ERROR)

import pyfortracc

In [2]:
# Authenticate to the Earth Engine servers. Create an project using link https://console.cloud.google.com/projectcreate
ee.Authenticate()
ee.Initialize(project="project_name")

In [3]:
# Get collection
ic = ee.ImageCollection('JAXA/GPM_L3/GSMaP/v8/operational').filterDate('2000-06-01', '2000-06-03').select('hourlyPrecipRate')

In [None]:
# Transform to xarray
global ds
ds = xr.open_dataset(ic, engine='ee', crs='EPSG:4326', scale=0.1)

In [None]:
# Download files
def save_single_file(ds, t, output_dir='input'):
    timestamp = pd.to_datetime(t.values).strftime('%Y%m%dT%H%M%S')
    ds.sel(time=t)[['hourlyPrecipRate']].to_netcdf(f"{output_dir}/{timestamp}.nc")

def save_times_as_files(ds, output_dir='input'):
    os.makedirs(output_dir, exist_ok=True)
    with ProcessPoolExecutor() as executor:
        futures = [executor.submit(save_single_file, ds, t, output_dir) for t in ds.time]
        for _ in tqdm(as_completed(futures), total=len(futures), desc="Downloading files"):
            pass
# Save files
save_times_as_files(ds)

In [6]:
# Create a function to read the data
def read_function(path):
  data = xr.open_dataarray(path).data
  return np.rot90(data)

In [7]:
# Set name_list
name_list = {}
name_list['input_path'] = 'input/'
name_list['output_path'] = 'output/'
name_list['thresholds'] = [1]
name_list['min_cluster_size'] = [10]
name_list['operator'] = '>='
name_list['timestamp_pattern'] = ['%Y%m%dT%H%M%S.nc']
name_list['delta_time'] = 60
name_list['cluster_method'] = 'ndimage'
name_list['lon_min'] = ds.lon.data[0]
name_list['lon_max'] = ds.lon.data[-1]
name_list['lat_min'] = ds.lat.data[0]
name_list['lat_max'] = ds.lat.data[-1]
name_list['x_dim'] = ds['hourlyPrecipRate'].shape[1]
name_list['y_dim'] = ds['hourlyPrecipRate'].shape[2]

In [None]:
# Run pyfortracc
pyfortracc.track(name_list, read_function, parallel=True)

In [69]:
# Get the timestamps
files = glob.glob('input/*')
timestamps = [pd.to_datetime(file.split('/')[-1].split('.')[0], format='%Y%m%dT%H%M%S') for file in files]
timestamps = [str(ts) for ts in sorted(timestamps)]

In [None]:
# Plot the tracking data for periods of time. Note: If the min and max value is a larger time interval, 
# the plot will be slower
pyfortracc.plot_animation(read_function=read_function, # Read function
                          figsize=(15,8), # Figure size
                          name_list=name_list, # Name list dictionary
                          start_timestamp = timestamps[0], # Start timestamp
                          end_timestamp= timestamps[-1], # End timestamp
                          info_col_name=False,
                          cbar_title='mm/h', # Colorbar title
                          trajectory=True, # Plot the trajectory
                          smooth_trajectory=True, # Smooth the trajectory
                          cmap='turbo', # Colormap
                          min_val=1, # Min value
                          max_val=25, # Max value
                          nan_value=1, # NaN value
                          nan_operation=np.less_equal, # NaN operation
                          )

In [None]:
# Plot the tracking data for a specific region (Brazil)
sp_lat_min = -30
sp_lat_max = 5
sp_lon_min = -75
sp_lon_max = -35
zoom_region = [sp_lon_min, sp_lon_max, sp_lat_min, sp_lat_max]


# Plot the tracking data for periods of time. Note: If the min and max value is a larger time interval, 
# the plot will be slower
pyfortracc.plot_animation(read_function=read_function, # Read function
                          figsize=(10,8), # Figure size
                          name_list=name_list, # Name list dictionary
                          start_timestamp = timestamps[0], # Start timestamp
                          end_timestamp= timestamps[-1], # End timestamp
                          info_col_name=False,
                          cbar_title='mm/h', # Colorbar title
                          trajectory=True, # Plot the trajectory
                          smooth_trajectory=True, # Smooth the trajectory
                          cmap='turbo', # Colormap
                          min_val=1, # Min value
                          max_val=25, # Max value
                          nan_value=1, # NaN value
                          nan_operation=np.less_equal, # NaN operation
                            zoom_region=zoom_region
                          )

In [72]:
# Read the tracking table
tracking_files = sorted(glob.glob(name_list['output_path'] + '/track/trackingtable/*.parquet'))
tracking_table = pd.concat(pd.read_parquet(f) for f in tracking_files)

In [None]:
# Get two maxlifetime clusters from the track_table
maxlifetime = 1
max_lifetimes = tracking_table.groupby('uid').size().nlargest(maxlifetime).index.values
max_clusters = tracking_table[tracking_table['uid'].isin(max_lifetimes)]
print('The clusters with the highest lifetime are the uids: {}'.format(max_lifetimes))

In [103]:
CLUSTER_UID = max_lifetimes[0]
THRESHOLD = name_list['thresholds'][0]
filterd_cluster = tracking_table.loc[(tracking_table['uid'] == CLUSTER_UID) & (tracking_table['threshold'] == THRESHOLD)]
filterd_cluster.reset_index(drop=True, inplace=True)
filterd_cluster.set_index('timestamp', inplace=True)

In [None]:
# Plot the tracking data for periods of time. Note: If the min and max value is a larger time interval, 
# the plot will be slower
pyfortracc.plot_animation(read_function=read_function, # Read function
                          figsize=(15,8), # Figure size
                          name_list=name_list, # Name list dictionary
                          start_timestamp = filterd_cluster.index.min().strftime('%Y-%m-%d %H:%M:%S'), # Start timestamp
                          end_timestamp= filterd_cluster.index.max().strftime('%Y-%m-%d %H:%M:%S'),
                          info_col_name=False,
                          cbar_title='mm/h', # Colorbar title
                          trajectory=True, # Plot the trajectory
                          smooth_trajectory=True, # Smooth the trajectory
                          cmap='turbo', # Colormap
                          min_val=1, # Min value
                          max_val=25, # Max value
                          nan_value=1, # NaN value
                          nan_operation=np.less_equal, # NaN operation
                          uid_list=[CLUSTER_UID]
                          )

In [None]:
# Código anterior
fig, ax1 = plt.subplots(1, 1, figsize=(15, 6))
ax2 = ax1.twinx()
ax3 = ax2.twiny()

# Plot dos dados
f1 = filterd_cluster['size'].plot(ax=ax1, marker='o', linestyle='dashed', color='C1', zorder=5, linewidth=3, markersize=10)
f2 = filterd_cluster['max'].plot(ax=ax2, marker='o', linestyle='dotted', color='C0', zorder=1, linewidth=2, markersize=10)
# Legendas
lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
labels = ['Size (pixels)']
labels2 = ['Max Precipitation(mm/h)']
ax2.legend(lines + lines2, labels + labels2, loc=0)
ax1.set_ylabel('Size (pixels)')
ax2.set_ylabel('Max Precipitation(mm/h)')
ax3.set_xticks(range(0, int(filterd_cluster.lifetime.max()) + 1, 60))
ax3.tick_params(axis='x', which='major', labelsize=8, rotation=45)
# Inserir os labels no eixo x principal
ax1.set_xticks(filterd_cluster.index)
ax1.set_xticklabels([ts.strftime('%H:%M') for ts in filterd_cluster.index], rotation=45, ha='right', fontsize=8)
# Status acima do gráfico
for i in range(len(filterd_cluster)):
    ax3.text(filterd_cluster.lifetime[i] - 20, 33.5, filterd_cluster['status'][i], fontsize=8, fontweight='bold')
# Eixo Status
ax1.set_xlabel('Tracking Start at ' + filterd_cluster.index.min().strftime('%Y-%m-%d %H:%M'), labelpad=12, fontsize=12, fontweight='bold')
ax3.set_xlabel('Status and Lifetime (hours)', labelpad=16, fontsize=16, fontweight='bold')
ax3.set_xlim(filterd_cluster.lifetime.min(), filterd_cluster.lifetime.max())
ax3.grid(True, which='both', axis='x', linestyle='--', alpha=0.5)
ax1.grid(True, which='both', axis='y', linestyle='--', alpha=0.2)
plt.tight_layout()
plt.show()
