<div align="center"; span style="color:#336699"><b><h2> Track Global Precipitation JAXA GSMAP </h2></b></div>
<hr style="border:2px solid #0077b9;">
<br/>
<div style="text-align: center;font-size: 90%;">
    Helvécio B. Leal Neto, <sup><a href="https://orcid.org/0000-0002-7526-2094"><i class="fab fa-lg fa-orcid" style="color: #a6ce39"></i></a></sup>
    Alan J. P. Calheiros<sup><a href="https://orcid.org/0000-0002-7526-2094"><i class="fab fa-lg fa-orcid" style="color: #a6ce39"></i></a></sup>
    <br/><br/>
    National Institute for Space Research (INPE)
    <br/>
    Avenida dos Astronautas, 1758, Jardim da Granja, São José dos Campos, SP 12227-010, Brazil
    <br/><br/>
    Contact: <a href="mailto:helvecio.neto@inpe.br">helvecio.neto@inpe.br</a>, <a href="mailto:alan.calheiros@inpe.br">alan.calheiros@inpe.br</a>
    <br/><br/>
    Last Update: Jun 16, 2024
</div>

<br/>

<div style="text-align: justify;  margin-left: 25%; margin-right: 25%;">
<b>Abstract.</b> This Jupyter Notebook shows how to use a pyfortracc for track a global precipitation system using JAXA GSMAP data.:
</div>    
<br/>
<div style="text-align: justify;  margin-left: 15%; margin-right: 15%;font-size: 75%; border-style: solid; border-color: #0077b9; border-width: 1px; padding: 5px;">
    <b>In this example, we will use pyfortracc to compute track of precipitating systems over the globe and explore the output data after the algorithm workflow.
</b>
    <div style="margin-left: 10px; margin-right: 10px; margin-top:10px">
      <p> Leal Neto, H.B.; Calheiros, A.J.P.;  pyForTraCC Algorithm. São José dos Campos, INPE, 2024. <a href="https://github.com/fortracc-project/pyfortracc" target="_blank"> Online </a>. </p>
    </div>
</div>

### Schedule
 [1. Goals and the Data](#goals)<br>
 [2. Setup Env](#setup)<br>
 [3. Parameters: Name_list](#namelist)<br>
 [4. Track Visualization](#visualization)<br>
 [5. The Tracking Table](#tracktable)<br>
 [6. Post Processing](#post)<br>

In [None]:
user = 'rainmap'
password = 'P@$$w0rd'
source = '/standard/v8/netcdf/'
start_date = '2002-08-15'
end_date = '2002-08-16'

!python download_gsmap.py $start_date $end_date $source $user $password 

In [2]:
%reload_ext autoreload
%autoreload 2
import sys
sys.path.append('../../')

In [3]:
import netCDF4 as nc
def read_function(path):
    data = nc.Dataset(path)
    variable = 'hourlyPrecipRate'
    data = data[variable][:].data[0]
    return data

In [7]:
import pyfortracc

In [8]:
name_list = {}
name_list['input_path'] = 'netcdf/'
name_list['output_path'] = 'output/'
name_list['thresholds'] = [0.1,1,5]
name_list['min_cluster_size'] = [10,5,3]
name_list['operator'] = '>='
name_list['timestamp_pattern'] = ['gsmap_mvk.%Y%m%d.%H%M.v8.0000.0.nc',
								  'gsmap_mvk.%Y%m%d.%H%M.v8.0000.1.nc']
name_list['delta_time'] = 60
name_list['cluster_method'] = 'ndimage'
name_list['edges'] = True
name_list['spl_correction'] = True
name_list['mrg_correction'] = True
name_list['inc_correction'] = True
name_list['opt_correction'] = True
name_list['validation'] = True
name_list['validation_scores'] = True
name_list['lon_min'] = -180
name_list['lon_max'] = 180
name_list['lat_min'] = -90
name_list['lat_max'] = 90
name_list['x_dim'] = 3600
name_list['y_dim'] = 1800
name_list['n_jobs'] = 10

In [None]:
# Track the clusters
pyfortracc.track(name_list, read_function, parallel=True)

In [None]:
# Post processing
pyfortracc.post_processing.compute_duration(name_list, parallel=True)

In [8]:
import netCDF4 as nc
def plot_function(path):
    data = nc.Dataset(path)
    variable = 'hourlyPrecipRate'
    data = data[variable][:].data[0]
    data[data < 0] = 0
    data[data > 25] = 25
    return data

In [None]:
pyfortracc.plot(name_list, plot_function, '2002-08-15 12:00:00', title='GSMap Hourly Precipitation',
                figsize=(12, 8), scalebar=False, boundary=False, vector=False,  trajectory=False,
                cmap='turbo', cbar_title='mm/h', cbar_extend='max', bounds_info=True,
                bound_linewidth=0.1, zoom_region=[-180, 180, -90, 91])

In [None]:
pyfortracc.plot(name_list, plot_function, '2002-08-15 07:00:00', title='Tracked PS UID',
                figsize=(12, 8), boundary=True, bound_linewidth=2, centroid=True, centr_size=30,centr_color='red',
                vector=False,  trajectory=True, traj_color='g', traj_linewidth=4, info=True, info_col_name=True,
                background='default', info_cols=['uid','status','lifetime','size','max','duration'],
                grid_deg=2, threshold_list=[0.1], cbar=False, ticks_fontsize=12, title_fontsize=18,
                uid_list=[8962], x_scale=-0.9, y_scale=0.8, box_fontsize=16,
                cmap='turbo', cbar_title='mm/h', bound_color='red', cbar_extend='max',
                zoom_region=[-60, -46, -32, -23])

In [None]:
pyfortracc.plot(name_list, plot_function, '2002-08-15 18:00:00', title='Tracked PS 8962',
                figsize=(12, 8), boundary=True, bound_linewidth=2, centroid=True, centr_size=30,centr_color='red',
                vector=False,  trajectory=True, traj_color='g', traj_linewidth=4, info=True, info_col_name=True,
                background='default', info_cols=['uid','status','lifetime','size','max','duration'],
                grid_deg=2, threshold_list=[0.1], cbar=False, ticks_fontsize=12, title_fontsize=18,
                uid_list=[8962], x_scale=-0.4, y_scale=1.3, box_fontsize=16,
                cmap='turbo', cbar_title='mm/h', bound_color='red', cbar_extend='max',
                zoom_region=[-60, -46, -32, -23])

In [None]:
pyfortracc.plot(name_list, plot_function, '2002-08-16 06:00:00', title='Tracked PS 8962',
                figsize=(12, 8), boundary=True, bound_linewidth=2, centroid=True, centr_size=30,centr_color='red',
                vector=False,  trajectory=True, traj_color='g', traj_linewidth=4, info=True, info_col_name=True,
                background='default', info_cols=['uid','status','lifetime','size','max','duration'],
                grid_deg=2, threshold_list=[0.1], cbar=False, ticks_fontsize=12, title_fontsize=18,
                uid_list=[8962], x_scale=-0.9, y_scale=0.8, box_fontsize=16,
                cmap='turbo', cbar_title='mm/h', bound_color='red', cbar_extend='max',
                zoom_region=[-60, -46, -32, -23])

In [None]:
pyfortracc.plot(name_list, plot_function, '2002-08-16 15:00:00', title='Tracked PS 8962',
                figsize=(12, 8), boundary=True, bound_linewidth=2, centroid=True, centr_size=30,centr_color='red',
                vector=False,  trajectory=True, traj_color='g', traj_linewidth=4, info=True, info_col_name=True,
                background='default', info_cols=['uid','status','lifetime','size','max','duration'],
                grid_deg=2, threshold_list=[0.1], cbar=False,
                uid_list=[8962], x_scale=-0.9, y_scale=0.8, box_fontsize=12,
                cmap='turbo', cbar_title='mm/h', bound_color='red', cbar_extend='max',
                zoom_region=[-60, -46, -32, -23])

In [27]:
import pandas as pd
import glob
import matplotlib.pyplot as plt

In [25]:
tracking_files = sorted(glob.glob(name_list['output_path'] + '/track/trackingtable/*.parquet'))
tracking_table = pd.concat(pd.read_parquet(f) for f in tracking_files)

In [None]:
CLUSTER_UID = 8962
THRESHOLD = 0.1
filterd_cluster = tracking_table.loc[(tracking_table['uid'] == CLUSTER_UID) & (tracking_table['threshold'] == THRESHOLD)]
filterd_cluster.reset_index(drop=True, inplace=True)
filterd_cluster.set_index('timestamp', inplace=True)
# Convert size column to km^2
filterd_cluster['size'] = filterd_cluster['size'] * 10 * 10
filterd_cluster.head(3)

In [None]:
# Código anterior
fig, ax1 = plt.subplots(1, 1, figsize=(15, 6))
ax2 = ax1.twinx()
ax3 = ax2.twiny()

# Plot dos dados
f1 = filterd_cluster['size'].plot(ax=ax1, marker='o', linestyle='dashed', color='C1', zorder=5, linewidth=3, markersize=10)
f2 = filterd_cluster['max'].plot(ax=ax2, marker='o', linestyle='dotted', color='C0', zorder=1, linewidth=2, markersize=10)
# Legendas
lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
labels = ['Size (km²)']
labels2 = ['Max Precipitation(mm/h)']
ax2.legend(lines + lines2, labels + labels2, loc=0)
# Eixos
ax1.set_ylabel('Size (km²)')
ax2.set_ylabel('Max Precipitation(mm/h)')
ax3.set_xticks(range(0, int(filterd_cluster.lifetime.max()) + 1, 60))
ax3.tick_params(axis='x', which='major', labelsize=10)
# Inserir os labels no eixo x principal
ax1.set_xticks(filterd_cluster.index)
ax1.set_xticklabels([ts.strftime('%H:%M') for ts in filterd_cluster.index], rotation=45, ha='right', fontsize=14)
# Status acima do gráfico
for i in range(len(filterd_cluster)):
    ax3.text(filterd_cluster.lifetime[i] - 20, 33.5, filterd_cluster['status'][i], fontsize=11, fontweight='bold')
# Eixo Status
ax1.set_xlabel('Tracking Start at ' + filterd_cluster.index.min().strftime('%Y-%m-%d %H:%M'), labelpad=12, fontsize=16, fontweight='bold')
ax3.set_xlabel('Status and Lifetime (hours)', labelpad=16, fontsize=16, fontweight='bold')
ax3.set_xlim(filterd_cluster.lifetime.min(), filterd_cluster.lifetime.max())
ax3.grid(True, which='both', axis='x', linestyle='--', alpha=0.5)
ax1.grid(True, which='both', axis='y', linestyle='--', alpha=0.2)
ax3.axvspan(0, 30, color='green', alpha=0.2)
ax3.axvspan(630, 690, color='C1', alpha=0.2)
ax3.axvspan(630, 690, color='red', alpha=0.2)
ax3.axvspan(1350, 1400, color='C0', alpha=0.2)
ax3.axvspan(1900, 1950, color='grey', alpha=0.2)
plt.tight_layout()
plt.show()


In [None]:
tracking_table.loc[(tracking_table['uid'] == 8962) & (tracking_table['threshold'] == 0.1)]

In [1]:
name_list = {}
name_list['input_path'] = 'netcdf/'
name_list['output_path'] = 'output/'
name_list['thresholds'] = [0.1,1,5]
name_list['min_cluster_size'] = [10,5,3]
name_list['operator'] = '>='
name_list['timestamp_pattern'] = ['gsmap_mvk.%Y%m%d.%H%M.v8.0000.0.nc',
								  'gsmap_mvk.%Y%m%d.%H%M.v8.0000.1.nc']
name_list['delta_time'] = 60
name_list['cluster_method'] = 'ndimage'
name_list['edges'] = True
name_list['spl_correction'] = True
name_list['mrg_correction'] = True
name_list['inc_correction'] = True
name_list['opt_correction'] = True
name_list['elp_correction'] = True
name_list['validation'] = True
name_list['validation_scores'] = True
name_list['lon_min'] = -180
name_list['lon_max'] = 180
name_list['lat_min'] = -90
name_list['lat_max'] = 90
name_list['x_dim'] = 3600
name_list['y_dim'] = 1800
name_list['n_jobs'] = 12

In [2]:
%reload_ext autoreload
%autoreload 2
import sys
sys.path.append('../../')
import pyfortracc

In [None]:
ds = pyfortracc.post_processing.spatial_info(name_list, threshold_level=0,
                                             min_duration=180, min_distance=50, 
                                             parallel=True, frequency='1M',
                                        var_cols=['status','lifetime','size',
                                                  'u_', 'v_','u_opt','u_noc','far','far_'])

In [None]:
ds

In [None]:
# In tste columns are datetime objects, i need resample the data and add to single row
tste.T.apply(lambda x: x.resample('3M')).T

In [35]:
import pandas as pd

In [None]:
pyfortracc.spatial_conversions(name_list, boundary=False, 
                               trajectory=False, cluster=True, 
                               vel_unit='km/h', driver='GeoJSON')