In [2]:
# Import local packages
import os
import sys
import glob

# Third party packages
import iris
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import iris.quickplot as qplt
import iris.plot as iplt
import datetime
import shutil
from six.moves import urllib
from pathlib import Path
import trackpy
from iris.time import PartialDateTime
import functions
import dask
import dask.array as da
import dask.distributed as dd
from dask import delayed
import cartopy.crs as ccrs
import xarray as xr
import netCDF4 as nc
import scipy
from scipy import ndimage
from scipy.ndimage import label, generate_binary_structure
import tobac #tobac package cloned from https://github.com/tobac-project/tobac.git


In [3]:
# Import and set up warnings
import warnings
warnings.filterwarnings('ignore', category=UserWarning, append=True)
warnings.filterwarnings('ignore', category=RuntimeWarning, append=True)
warnings.filterwarnings('ignore', category=FutureWarning, append=True)
warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning)
warnings.filterwarnings('ignore')

In [4]:
# Import the functions and dictionaries
import functions as fnc
import dictionaries as dic

In [5]:
# Open the datasets
mask, precip, tracks = fnc.open_datasets(dic.mask_file, dic.precip_file, dic.tracks_file)

In [6]:
# Create a copy of the tracks file
tracks = fnc.copy_tracks_file(tracks)

In [7]:
# Add the precip columns to tracks
tracks = fnc.add_precip_columns(tracks)

In [8]:
# Remove non track cells from tracks
tracks = fnc.remove_non_track_cells(tracks)

In [10]:
tracks

Unnamed: 0_level_0,frame,idx,hdim_1,hdim_2,num,threshold_value,feature,time,timestr,latitude,longitude,forecast_reference_time,forecast_period,cell,time_cell,total_precip,rain_flag,convective_precip,heavy_precip,extreme_precip,heavy_rain_flag,extreme_rain_flag
npartitions=100,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,int64,int64,float64,float64,int64,int64,int64,object,object,float64,float64,float64,float64,int64,timedelta64[ns],int64,int64,int64,int64,int64,int64,int64
91,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8991,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9080,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [9]:
# Find all of the unique track cells values within the tracks df
unique_cells = fnc.find_unique_cells(tracks)

The shape of the unique cells array is:  (nan,)
The unique cells array is:  dask.array<getitem, shape=(nan,), dtype=int64, chunksize=(nan,), chunktype=numpy.ndarray>


In [None]:
# Create a list of delayed computations for each cell
delayed_results = [delayed(fnc.precip_filtering_loop_cell)(cell, tracks, precip, mask, unique_cells, dic.precip_threshold, dic.heavy_precip_threshold, dic.extreme_precip_threshold, dic.s, dic.precip_area, dic.removed_tracks, dic.tracks_filtered_output, dic.tracks_cell_output_dir) for cell in unique_cells]

# compute the results in parallel
tracks_list = dask.compute(*delayed_results)

In [None]:
# Concatenate the results back into a single dataframe
# using a dask dataframe
tracks = pd.concat(tracks_list)