In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import os 
import timeit
import math
import time
import sparse
import dask
import dask.array as da
from dask.diagnostics import ProgressBar
from glob import glob
from os import path
import pickle
from tqdm import tqdm

from ll_Balltree import *
%run -i 'll_Balltree.py'

In [2]:
outputDir = 'data/posterior_computation_data/'
gom_masks = xr.open_dataset(outputDir + 'gom_masks.nc')

# GLOBAL CONSTANTS
MIN_LON = np.min(gom_masks['lon'].values)
MAX_LON = np.max(gom_masks['lon'].values)
MIN_LAT = np.min(gom_masks['lat'].values)
MAX_LAT = np.max(gom_masks['lat'].values)

#domain width and height (cell counts)
LAT_SIZE = gom_masks.dims['lat']
LON_SIZE = gom_masks.dims['lon']

#cell size
D_LON = gom_masks["lon"][1].values - gom_masks["lon"][0].values
D_LAT = gom_masks["lat"][1].values - gom_masks["lat"][0].values

BIN_CELL_LATS = gom_masks.bin_cell_lats.values
BIN_CELL_LONS = gom_masks.bin_cell_lons.values

MIN_LON, MAX_LON, MIN_LAT, MAX_LAT,LAT_SIZE,LON_SIZE, D_LON,D_LAT

(-97.98001098632812,
 -76.45999145507812,
 18.140000343322754,
 31.899998664855957,
 345,
 539,
 0.03997802734375,
 0.03999900817871094)

In [3]:
# Load domain_cell_tree
fileObj = open(outputDir + 'output_dict.obj', 'rb')
output = pickle.load(fileObj)
fileObj.close()

In [4]:
n_cell_beaching = output['n_cell_beaching']
n_cell_source = output['n_cell_source']
n_window_beaching = output['n_window_beaching'] 
n_window_source = output['n_window_source']

particle_count = output['particle_count']

beaching_cells = output['beaching_cells'] 
beaching_cell_tree = output['beaching_cell_tree']

source_cell_mask = output['source_cell_mask']
source_cells = output['source_cells']
source_cell_tree = output['source_cell_tree']

beaching_windows = output['beaching_windows']
source_windows = output['source_windows']
d = output['d']
beaching_ym_mat = output['beaching_ym_mat']
source_ym_mat = output['source_ym_mat']
n_cell_beaching,n_cell_source,n_window_beaching, n_window_source

(188, 114024, 30, 36)

Compute Likelihood Quantities

In [5]:
ds = xr.open_dataset(outputDir + 'trajectory_mat_st.nc')

In [6]:
Likelihood_mat = np.zeros(((n_cell_source*n_window_source),n_cell_beaching*(d+1)))

for source_time in tqdm(range(n_window_source), desc="Calculating Likelihood Matrix"):
    # Create Output Matrix row block
    out = np.zeros((n_cell_source, n_cell_beaching * (d+1)))

    # Find all observations with indexed source time
    traj_idx, obs_idx = np.where(ds.source_traj_times == source_time)

    # Collect the source cells, beaching windows, beaching cells
    df ={"ll_row": ds.source_traj_locs.values[traj_idx, obs_idx],"beaching_window": ds.beaching_times.values[traj_idx], "beaching_cell": ds.beaching_locs.values[traj_idx]}
    df = pd.DataFrame(df)
    
    # Find frequency counts for each (source cells, beaching windows, beaching cells) combination 
    freqs = df.value_counts().reset_index().astype(int)

    # Shift beaching window to one of the (d+1) possible beaching windows from the indexed source time
    freqs['beaching_window'] = source_time - freqs['beaching_window'] 

    # Find the corresponding likelihood column index
    freqs['ll_col'] = (freqs['beaching_window'] * n_cell_beaching) + freqs['beaching_cell']

    # Assign counts at corresponding indicies
    out[freqs['ll_row'].values,freqs['ll_col'].values] = freqs['count'].values

    # Assign to likelihood matrix
    Likelihood_mat[((source_time)*n_cell_source):((source_time+1)*n_cell_source),:] = out

# Row Probabilities
print('normalized Likelihood_mat')
tic=time.time()
Likelihood_mat = np.nan_to_num(Likelihood_mat / np.sum(Likelihood_mat, axis = 1)[:,None])
print('   done in',time.time()-tic)

Calculating Likelihood Matrix: 100%|██████████| 36/36 [00:38<00:00,  1.06s/it]


normalized Likelihood_mat


  Likelihood_mat = np.nan_to_num(Likelihood_mat / np.sum(Likelihood_mat, axis = 1)[:,None])


   done in 126.62137913703918


In [7]:
num_nonzero_ll_counts = np.shape((np.where(Likelihood_mat >0.0)))[1]
num_all_ll_counts = n_cell_source*n_window_source*n_cell_beaching*(d+1)
ll_mat_density = 100 * num_nonzero_ll_counts / num_all_ll_counts

print("Likelihood Matrix Density - %", ll_mat_density)

Likelihood Matrix Density - % 0.12753479626145292


In [8]:
Likelihood_mat_rowSums = np.sum(Likelihood_mat, axis = 1)
np.unique(Likelihood_mat_rowSums)

array([0., 1., 1., 1., 1., 1.])

Prior

In [9]:
prior = xr.open_dataset(outputDir + 'prior.nc')

In [10]:
l_prior_source_cells = prior.l_prior.values[:, source_cell_mask[0], source_cell_mask[1]]
r_prior_source_cells = prior.r_prior.values[:, source_cell_mask[0], source_cell_mask[1]]
f_prior_source_cells = prior.f_prior.values[:, source_cell_mask[0], source_cell_mask[1]]

In [15]:
# Source Windows X Source Cells
np.shape(l_prior_source_cells)

(36, 114024)

In [11]:
# Compute unnormalized posteriors
print('unnormalized posteriors')
tic=time.time()
l_nn_post = Likelihood_mat * np.ravel(l_prior_source_cells)[:, None] 
r_nn_post = Likelihood_mat * np.ravel(r_prior_source_cells)[:, None] 
f_nn_post = Likelihood_mat * np.ravel(f_prior_source_cells)[:, None]
print('   done in',time.time()-tic)

unnormalized posteriors
   done in 86.83976912498474


In [12]:
ds = xr.Dataset(
    {
        'l_nn_post': (['source_window_cells', 'dbeaching_windows_cells'], l_nn_post),
        'r_nn_post': (['source_window_cells', 'dbeaching_windows_cells'], r_nn_post),
        'f_nn_post': (['source_window_cells', 'dbeaching_windows_cells'], f_nn_post),
    },
    coords={
        'source_window_cells': np.arange(n_cell_source*n_window_source),
        'dbeaching_windows_cells': np.arange(n_cell_beaching*(d+1)),
    },
)

In [13]:
chunksize = {'source_window_cells': n_cell_source,
        'dbeaching_windows_cells': n_cell_beaching}

print('re-chunking')
tic=time.time()
ds = ds.chunk(chunksize)
print('   done in',time.time()-tic)

re-chunking
   done in 138.3991813659668


In [14]:
delayedObj = ds.to_zarr(outputDir + 'nn_post.zarr',compute=False)
with ProgressBar():
        results=delayedObj.compute()

[########################################] | 100% Completed | 183.35 s
