# Calculate 2D lat-lon pathway maps for each basin


In [7]:
import xarray as xr
import numpy as np
import pandas as pd
from glob import glob

from dask.distributed import Client
import memory_profiler

import warnings
warnings.filterwarnings("ignore")

In [8]:
import sys
from pathlib import Path
# in jupyter (lab / notebook), based on notebook path
module_path = str(Path.cwd().parents[0] )
#print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)
import custom_functions as cf

In [9]:
client = Client()
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: /proxy/44407/status,

0,1
Dashboard: /proxy/44407/status,Workers: 6
Total threads: 18,Total memory: 200.00 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:44351,Workers: 6
Dashboard: /proxy/44407/status,Total threads: 18
Started: Just now,Total memory: 200.00 GiB

0,1
Comm: tcp://127.0.0.1:38983,Total threads: 3
Dashboard: /proxy/37651/status,Memory: 33.33 GiB
Nanny: tcp://127.0.0.1:44771,
Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-98uvdsai,Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-98uvdsai

0,1
Comm: tcp://127.0.0.1:36015,Total threads: 3
Dashboard: /proxy/42479/status,Memory: 33.33 GiB
Nanny: tcp://127.0.0.1:44313,
Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-q707mgr3,Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-q707mgr3

0,1
Comm: tcp://127.0.0.1:35481,Total threads: 3
Dashboard: /proxy/44971/status,Memory: 33.33 GiB
Nanny: tcp://127.0.0.1:45103,
Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-1u2qjv2w,Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-1u2qjv2w

0,1
Comm: tcp://127.0.0.1:41051,Total threads: 3
Dashboard: /proxy/41401/status,Memory: 33.33 GiB
Nanny: tcp://127.0.0.1:42913,
Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-2z240ur6,Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-2z240ur6

0,1
Comm: tcp://127.0.0.1:41023,Total threads: 3
Dashboard: /proxy/37227/status,Memory: 33.33 GiB
Nanny: tcp://127.0.0.1:34863,
Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-fpcz3w58,Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-fpcz3w58

0,1
Comm: tcp://127.0.0.1:33781,Total threads: 3
Dashboard: /proxy/45593/status,Memory: 33.33 GiB
Nanny: tcp://127.0.0.1:33459,
Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-7sob1r_z,Local directory: /home/561/hd4873/parcel_runs/antarctic_connectivity_paper/pathways/dask-worker-space/worker-7sob1r_z


## Calculate pathways up until shelf exit 

Load data for 2Dpxy calculation. 

In [12]:
%%time
# Open Antarctic basins file
antarctic_basins = xr.open_dataset('/g/data/e14/hd4873/runs/parcels/output/AntConn/data/basin_masks/Antarctic_shelf_basin_mask_hu_coarse.nc')

# Open initial particle parameters file
startfile = '/g/data/e14/hd4873/runs/parcels/output/AntConn/data/CircumAntarcticParticles_initial_values.nc'
ds_iv = xr.open_dataset(startfile, decode_cf=False)

# Define particle files
npart = 130146
datadir = '/g/data/e14/hd4873/runs/parcels/output/AntConn/data/xarray_files/traj_chunked_basin/'
files = sorted(glob(datadir+'CircumAntarcticParticles_*.nc')) 

## open complete particle dataset
ds = xr.open_mfdataset(files, decode_cf=False).drop_vars(['psal','thermo','mixedlayershuffle',
                                                          'mldepth', 'unbeachCount', 'z', 
                                                          'basin', 'shelf'])#.load()

CPU times: user 2.51 s, sys: 2.9 s, total: 5.41 s
Wall time: 12.1 s


Set histogram parameters. 

In [14]:
xstart = -280  # starting longitude
xend = 80      # final longitude
xbinlim = [xstart, xend]  # longitude bin limits
ystart =-81    # starting latitude
yend = -48     # final latitude
ybinlim = [ystart, yend]  # loatitude bin limits
dx_map = 0.5     # x-grid size for 2D pdf maps
dy_map = 0.25   # y-grid size for 2D pdf maps 
xbins = int(360/dx_map)   # number of longitude bins
ybins = int(33/dy_map)    # number of latitude bins
xarange = np.arange(xstart, xend+dx_map, dx_map)
yarange = np.arange(ystart, yend+dy_map, dy_map)
xmid = (xarange[1:]+xarange[:-1])/2. # longitude midpoints
ymid = (yarange[1:]+yarange[:-1])/2. # latitude midpoints

Now calculate 2D particle distribution in x and y space for each coarse basin region. 

In [6]:
%%time

for basinid in antarctic_basins.basin_lookup[2:5]:
    print(basinid.values)
    traj = np.where(ds_iv.basin == basinid.values)[0] # find trajectories that start in this basin
    #dsloop = ds[dict(trajectory=traj)].load()        # load particle data for specific basin
    print("Loading dataset...")
    dsloop = ds.isel(trajectory=traj).load()          # load particle data for specific basin
    ds_iv_t = ds_iv.trans[traj].load()                # load particle transport
    ds_iv_tidx = ds_iv.shelf_exit_indx[traj].load()   # load particle tidx
    print("~ Array size (GB):", dsloop.nbytes/1000**3, "Number of particles:", len(dsloop.lon[0,:]))
    
    #array to store binned particle counts from origin to final location
    in_box = np.zeros((xbins,ybins))
    in_box_trans = np.zeros((xbins,ybins))

    for n in np.arange(dsloop.lon.shape[1]):
        if n % 50000 == 0:
            print(n)
        t = int(ds_iv_tidx[n].values)    # last time on shelf
        x = dsloop.lon[:t+1,n]           # longitude of particle n up to last time on shelf
        y = dsloop.lat[:t+1,n]           # latitude of particle n up to last time on shelf
        H = np.histogram2d(x,y,[xbins,ybins],[xbinlim,ybinlim])
        boxind = np.nonzero(H[0])
        in_box[boxind[0],boxind[1]] = in_box[boxind[0],boxind[1]]+1  # count
        in_box_trans[boxind[0],boxind[1]] = in_box_trans[boxind[0],boxind[1]]+1*np.abs(ds_iv_t[n]).values  # transport weighted

    # create 2D pathways matrices
    pxy_pathway_pct = 100.*np.float64(in_box)/np.float64(dsloop.lon.shape[1]) # percentage count
    pxy_pathway_trans = np.float64(in_box_trans)                              # transport weighted
    pxy_pathway_trans_pct = 100.*np.float64(in_box_trans)/np.float64(np.sum(np.abs(ds_iv_t)))   # percentage transport weighted
    
    # convert to DataArrays
    pxy_pct = xr.DataArray(data = pxy_pathway_pct, dims=["lon", "lat"], coords = {"lat": ymid, "lon": xmid})
    pxy_transport = xr.DataArray(data = pxy_pathway_trans, dims=["lon", "lat"], coords = {"lat": ymid, "lon": xmid})
    pxy_transport_pct = xr.DataArray(data = pxy_pathway_trans_pct, dims=["lon", "lat"], coords = {"lat": ymid, "lon": xmid})
    pxy_pct = pxy_pct.where(pxy_pct>0, np.nan)
    pxy_transport = pxy_transport.where(pxy_transport>0, np.nan)
    pxy_transport_pct = pxy_transport_pct.where(pxy_transport_pct>0, np.nan)

    # Combined into one Dataset
    pxy = xr.Dataset({"pxy_pct": pxy_pct, "pxy_trans": pxy_transport, "pxy_trans_pct": pxy_transport_pct})
    
    # save to file
    outdir = '/g/data/e14/hd4873/runs/parcels/output/AntConn/data/2Dpxy/'
    outfile = outdir + '2Dpxy_basin_{:02d}_0.5x0.25.nc'.format(int(basinid.values))
    
    # Save to netCDF
    print("Saving to netCDF file", outfile)
    encod={}
    for var in pxy.data_vars:
        encod[var]={'zlib':True}
    pxy.to_netcdf(outfile)

4.0
Loading dataset...
~ Array size (GB): 40.191549396 Number of particles: 2212703
0
50000
100000
150000
200000
250000
300000
350000
400000
450000
500000
550000
600000
650000
700000
750000
800000
850000
900000
950000
1000000
1050000
1100000
1150000
1200000
1250000
1300000
1350000
1400000
1450000
1500000
1550000
1600000
1650000
1700000
1750000
1800000
1850000
1900000
1950000
2000000
2050000
2100000
2150000
2200000
Saving to netCDF file /g/data/e14/hd4873/runs/parcels/output/AntConn/data/2Dpxy/2Dpxy_basin_04_0.5x0.25.nc
5.0
Loading dataset...
~ Array size (GB): 42.870012836 Number of particles: 2360163
0
50000
100000
150000
200000
250000
300000
350000
400000
450000
500000
550000
600000
650000
700000
750000
800000
850000
900000
950000
1000000
1050000
1100000
1150000
1200000
1250000
1300000
1350000
1400000
1450000
1500000
1550000
1600000
1650000
1700000
1750000
1800000
1850000
1900000
1950000
2000000
2050000
2100000
2150000
2200000
2250000
2300000
2350000
Saving to netCDF file /g/data/e14

## Calculating entire pathways 

Load data for 2Dpxy calculation. 

In [15]:
%%time
# Open Antarctic basins file
antarctic_basins = xr.open_dataset('/g/data/e14/hd4873/runs/parcels/output/AntConn/data/basin_masks/Antarctic_shelf_basin_mask_hu_coarse.nc')

# Open initial particle parameters file
startfile = '/g/data/e14/hd4873/runs/parcels/output/AntConn/data/CircumAntarcticParticles_initial_values.nc'
ds_iv = xr.open_dataset(startfile, decode_cf=False)

# Define particle files
npart = 130146
datadir = '/g/data/e14/hd4873/runs/parcels/output/AntConn/data/xarray_files/traj_chunked_basin/'
files = sorted(glob(datadir+'CircumAntarcticParticles_*.nc')) 

## open complete particle dataset
ds = xr.open_mfdataset(files, decode_cf=False).drop_vars(['psal','thermo','mixedlayershuffle',
                                                          'mldepth', 'unbeachCount', 'z', 
                                                          'basin', 'shelf', 'basin_ZonalConn'])#.load()

CPU times: user 1.12 s, sys: 301 ms, total: 1.42 s
Wall time: 1.31 s


Set histogram parameters. 

In [16]:
xstart = -280  # starting longitude
xend = 80      # final longitude
xbinlim = [xstart, xend]  # longitude bin limits
ystart =-81    # starting latitude
yend = -48     # final latitude
ybinlim = [ystart, yend]  # loatitude bin limits
dx_map = 0.5     # x-grid size for 2D pdf maps
dy_map = 0.25   # y-grid size for 2D pdf maps 
xbins = int(360/dx_map)   # number of longitude bins
ybins = int(33/dy_map)    # number of latitude bins
xarange = np.arange(xstart, xend+dx_map, dx_map)
yarange = np.arange(ystart, yend+dy_map, dy_map)
xmid = (xarange[1:]+xarange[:-1])/2. # longitude midpoints
ymid = (yarange[1:]+yarange[:-1])/2. # latitude midpoints



Now calculate 2D particle distribution in x and y space for each coarse basin region. 

In [6]:
%%time

for basinid in antarctic_basins.basin_lookup[5:6]:
    print(basinid.values)
    traj = np.where(ds_iv.basin == basinid.values)[0] # find trajectories that start in this basin
    #dsloop = ds[dict(trajectory=traj)].load()        # load particle data for specific basin
    print("Loading dataset...")
    dsloop = ds.isel(trajectory=traj).load()          # load particle data for specific basin
    ds_iv_t = ds_iv.trans[traj].load()                # load particle transport
    ds_iv_tidx = ds_iv.shelf_exit_indx[traj].load()   # load particle tidx
    print("~ Array size (GB):", dsloop.nbytes/1000**3, "Number of particles:", len(dsloop.lon[0,:]))
    
    #array to store binned particle counts from origin to final location
    in_box = np.zeros((xbins,ybins))
    in_box_trans = np.zeros((xbins,ybins))

    for n in np.arange(dsloop.lon.shape[1]):
        if n % 50000 == 0:
            print(n)
        #t = int(ds_iv_tidx[n].values) # last time on shelf
        x = dsloop.lon[:,n]           # longitude of particle n up to last time on shelf
        y = dsloop.lat[:,n]           # latitude of particle n up to last time on shelf
        H = np.histogram2d(x,y,[xbins,ybins],[xbinlim,ybinlim])
        boxind = np.nonzero(H[0])
        in_box[boxind[0],boxind[1]] = in_box[boxind[0],boxind[1]]+1  # count
        in_box_trans[boxind[0],boxind[1]] = in_box_trans[boxind[0],boxind[1]]+1*np.abs(ds_iv_t[n]).values  # transport weighted

    # create 2D pathways matrices
    pxy_pathway_pct = 100.*np.float64(in_box)/np.float64(dsloop.lon.shape[1]) # percentage count
    pxy_pathway_trans = np.float64(in_box_trans)                         # transport 
    pxy_pathway_trans_pct = 100.*np.float64(in_box_trans)/np.float64(np.sum(np.abs(ds_iv_t)))   # percentage transport 
    
    # convert to DataArrays
    pxy_pct = xr.DataArray(data = pxy_pathway_pct, dims=["lon", "lat"], coords = {"lat": ymid, "lon": xmid})
    pxy_transport = xr.DataArray(data = pxy_pathway_trans, dims=["lon", "lat"], coords = {"lat": ymid, "lon": xmid})
    pxy_transport_pct = xr.DataArray(data = pxy_pathway_trans_pct, dims=["lon", "lat"], coords = {"lat": ymid, "lon": xmid})
    pxy_pct = pxy_pct.where(pxy_pct>0, np.nan)
    pxy_transport = pxy_transport.where(pxy_transport>0, np.nan)
    pxy_transport_pct = pxy_transport_pct.where(pxy_transport_pct>0, np.nan)

    # Combined into one Dataset
    pxy = xr.Dataset({"pxy_pct": pxy_pct, "pxy_trans": pxy_transport, "pxy_trans_pct": pxy_transport_pct})
    
    # save to file
    outdir = '/g/data/e14/hd4873/runs/parcels/output/AntConn/data/2Dpxy/'
    outfile = outdir + 'WholeTrajectory_2Dpxy_basin_{:02d}_0.5x0.25.nc'.format(int(basinid.values))
    
    # Save to netCDF
    print("Saving to netCDF file", outfile)
    encod={}
    for var in pxy.data_vars:
        encod[var]={'zlib':True}
    pxy.to_netcdf(outfile)

7.0
Loading dataset...
~ Array size (GB): 29.727777576 Number of particles: 2454406
0
50000
100000
150000
200000
250000
300000
350000
400000
450000
500000
550000
600000
650000
700000
750000
800000
850000
900000
950000
1000000
1050000
1100000
1150000
1200000
1250000
1300000
1350000
1400000
1450000
1500000
1550000
1600000
1650000
1700000
1750000
1800000
1850000
1900000
1950000
2000000
2050000
2100000
2150000
2200000
2250000
2300000
2350000
2400000
2450000
Saving to netCDF file /g/data/e14/hd4873/runs/parcels/output/AntConn/data/2Dpxy/WholeTrajectory_2Dpxy_basin_07_0.5x0.25.nc
CPU times: user 1h 55min 1s, sys: 3min 29s, total: 1h 58min 30s
Wall time: 1h 55min 14s
