In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import os 
import timeit
import math
import time
import sparse
import dask
import dask.array as da
from dask.diagnostics import ProgressBar
from glob import glob
from os import path
import pickle
from tqdm import tqdm

from ll_Balltree import *
%run -i 'll_Balltree.py'

In [2]:
outputDir = 'data/posterior_computation_data/'
gom_masks = xr.open_dataset(outputDir + 'gom_masks.nc')

# GLOBAL CONSTANTS
MIN_LON = np.min(gom_masks['lon'].values)
MAX_LON = np.max(gom_masks['lon'].values)
MIN_LAT = np.min(gom_masks['lat'].values)
MAX_LAT = np.max(gom_masks['lat'].values)

#domain width and height (cell counts)
LAT_SIZE = gom_masks.dims['lat']
LON_SIZE = gom_masks.dims['lon']

#cell size
D_LON = gom_masks["lon"][1].values - gom_masks["lon"][0].values
D_LAT = gom_masks["lat"][1].values - gom_masks["lat"][0].values

BIN_CELL_LATS = gom_masks.bin_cell_lats.values
BIN_CELL_LONS = gom_masks.bin_cell_lons.values

MIN_LON, MAX_LON, MIN_LAT, MAX_LAT,LAT_SIZE,LON_SIZE, D_LON,D_LAT

(-97.98001098632812,
 -76.45999145507812,
 18.140000343322754,
 31.899998664855957,
 345,
 539,
 0.03997802734375,
 0.03999900817871094)

In [3]:
inputDir_Sim = ('data/output_v2/rechunked/')
inputFiles_Sim = sorted(glob(inputDir_Sim + '*.zarr' ))

# Save domain_cell_tree
fileObj = open(outputDir + 'output_dict.obj', 'rb')
output = pickle.load(fileObj)
fileObj.close()

In [4]:
n_cell_beaching = output['n_cell_beaching']
n_cell_source = output['n_cell_source']
n_window_beaching = output['n_window_beaching'] 
n_window_source = output['n_window_source']

particle_count = output['particle_count']

beaching_cells = output['beaching_cells'] 
beaching_cell_tree = output['beaching_cell_tree']

source_cell_mask = output['source_cell_mask']
source_cells = output['source_cells']
source_cell_tree = output['source_cell_tree']

beaching_windows = output['beaching_windows']
source_windows = output['source_windows']
d = output['d']
beaching_ym_mat = output['beaching_ym_mat']
source_ym_mat = output['source_ym_mat']

Compute Trajectories for Posterior Computation

In [5]:
file_particle_counts = []
file_obs_counts = []
for i in range(5):
        filename = inputFiles_Sim[i]
        df = xr.open_zarr(filename).dropna(dim="trajectory", how="all")
        file_particle_counts.append(df.dims['trajectory'])
        file_obs_counts.append(df.dims['obs'])

In [6]:
n_total_particles, n_obs = sum(file_particle_counts), file_obs_counts[0]
(n_total_particles, n_obs)

(349177, 182)

In [7]:
beaching_locs = np.zeros((n_total_particles))*np.nan
source_traj_locs = np.zeros((n_total_particles, (n_obs-1)))*np.nan
beaching_times = np.zeros((n_total_particles))*np.nan
source_traj_times = np.zeros((n_total_particles, (n_obs-1)))*np.nan

# Trajectories are stored in 5 seperate files 
particle_counter = 0

for file in inputFiles_Sim:

    print("Processing: " + file)

    df = xr.open_zarr(file).dropna(dim="trajectory", how="any")

    print("df shape: " + str(df.dims))

    # Get raw values
    time_raw = pd.DatetimeIndex(df['time'].values)
    x_raw = df['lon'].values
    y_raw = df['lat'].values
    oob_raw = df['beached'].values

    # Get Nurdle Beaching Count
    query = get_cells(None, y_raw[:,0], x_raw[:,0])
    _, cell_beaching_inds = query_ll_BallTree(tree = beaching_cell_tree, cells = query)

    # COMPUTE BIN COUNTS
    for i in tqdm(range(len(x_raw)), desc = "Compute Trajectory Counts"):
        #Get beaching location
        beaching_cell_ind = cell_beaching_inds[i]

        # Get source locations
        source_ll_query = get_cells(None,  y_raw[i, 1:], x_raw[i, 1:])
        _, source_cells_inds = query_ll_BallTree(tree = source_cell_tree, cells = source_ll_query)
        source_cells_inds = np.array(source_cells_inds, dtype=float)

        # Nans must be added since BallTree cannot query NaNs
        oob_inds = np.array(np.where(oob_raw[i,  1:] == 5.0)[0])
        source_cells_inds[oob_inds] = np.nan

        # Get beaching window
        beaching_windows_raw = time_raw[i][0]
        beaching_window_ind = beaching_ym_mat[beaching_windows_raw.year - 2019, beaching_windows_raw.month - 1]

        # Get source windows
        source_windows_raw = time_raw[i][1:]
        source_windows_inds = source_ym_mat[(source_windows_raw.year - 2019), (source_windows_raw.month - 1)]

        beaching_locs[particle_counter] = beaching_cell_ind
        source_traj_locs[particle_counter, :] = source_cells_inds
        beaching_times[particle_counter] = beaching_window_ind
        source_traj_times[particle_counter, :] = source_windows_inds

        particle_counter += 1
            

Processing: data/output_v2/rechunked/GOM_rt_2019_12.0-7.0_at_180.0.zarr
df shape: Frozen({'trajectory': 102710, 'obs': 182})


Compute Trajectory Counts: 100%|██████████| 102710/102710 [02:29<00:00, 687.55it/s]


Processing: data/output_v2/rechunked/GOM_rt_2020_12.0-7.0_at_180.0.zarr
df shape: Frozen({'trajectory': 113992, 'obs': 182})


Compute Trajectory Counts: 100%|██████████| 113992/113992 [02:14<00:00, 844.92it/s]


Processing: data/output_v2/rechunked/GOM_rt_2020_6.0-1.0_at_180.0.zarr
df shape: Frozen({'trajectory': 44828, 'obs': 182})


Compute Trajectory Counts: 100%|██████████| 44828/44828 [01:06<00:00, 676.91it/s] 


Processing: data/output_v2/rechunked/GOM_rt_2021_12.0-7.0_at_180.0.zarr
df shape: Frozen({'trajectory': 23475, 'obs': 182})


Compute Trajectory Counts: 100%|██████████| 23475/23475 [00:30<00:00, 758.42it/s]


Processing: data/output_v2/rechunked/GOM_rt_2021_6.0-1.0_at_180.0.zarr
df shape: Frozen({'trajectory': 61589, 'obs': 182})


Compute Trajectory Counts: 100%|██████████| 61589/61589 [01:43<00:00, 597.37it/s] 


In [8]:
ds = xr.Dataset(
    {
        'beaching_locs': (['trajectory'], beaching_locs),
        'source_traj_locs': (['trajectory', 'locations'], source_traj_locs),
        'beaching_times': (['trajectory'], beaching_times),
        'source_traj_times': (['trajectory', 'locations'], source_traj_times),
        

    },
    coords={
        'trajectory': np.arange(0,n_total_particles),
        'locations': np.arange((n_obs-1)),
    },
)

In [9]:
ds.to_netcdf(outputDir + 'trajectory_mat_st.nc')