## Calculate the POC flux at the bottom 
    1. Calculate bottom depth for each grid cell
    2. Using the bottom depth, calculate the POC flux in the bottom layer for every year and every lon and lan 

In [1]:
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import os
from glob import glob
from collections import OrderedDict
import xarray as xr
import numpy as np
import ncar_jobqueue
from dask.distributed import Client

import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
from scipy import stats
import pop_tools 

import cftime
import pandas as pd
import utils

import cmocean

In [2]:
case = 'g.e21.G1850ECOIAF.t62_g17.marbl0_33.GNG595'
path = f'/glade/campaign/cesm/development/bgcwg/projects/marbl-spectra/{case}/ocn/hist'

In [3]:
variables = [f'{var}' for var in ['POC_FLUX_IN','KMT','photoC_TOT']]
coords = {'x':'TLONG','y':'TLAT'}
keep_vars = variables + list(coords.values())+['dz','KMT','time']

In [4]:
%%time

ds_avg = xr.Dataset()

for year in np.arange(63,125,1):
    yr4="0{:02d}".format(year).zfill(4)
    print(year)
    
    ds_annual = xr.Dataset()

    file = sorted(glob(f'{path}/{case}.pop.h.{yr4}-*.nc'))
    
    dsv_annual=xr.open_mfdataset(file, decode_times=True,drop_variables=["transport_components", "transport_regions"], 
                            parallel=True, compat="override", combine='nested', concat_dim="time",data_vars="minimal",coords='minimal' )
    
    for vv in variables: 
        ds_annual = xr.merge((ds_annual, dsv_annual[vv]))
        

    ds_annual = ds_annual.drop([v for v in ds_annual.variables if v not in keep_vars]).squeeze()
    ds_annual = ds_annual.mean(dim='time')
    ds_avg = xr.concat([ds_avg, ds_annual],dim='year')

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
CPU times: user 7min 34s, sys: 45.8 s, total: 8min 20s
Wall time: 9min 4s


## Find the depth at the bottom, and this way we can check that out for this year that comes next!! 
    - KMT is the k index of deepest grid cell of the T grid. This means that this is the depth level that I am trying to target the flux to the bottom... to do that, since this should be the same for every year, I need to find it by creating a new array with a value 0 staying the same if that is the deepest grid cell, and for the rest do depth-1. 

    - I could do it in a loop, or I could subtract one from eveyone and then any negative values make them zero... 

In [5]:
# Finding the bottom depth the second way... 
kmt_bottom = np.nanmean(ds_avg.KMT,axis=0)
kmt_bottom_depth = kmt_bottom-1
kmt_bottom_depth[kmt_bottom_depth==-1]=0

## Get a cluster so it can continue running without interruptions

In [6]:
USER = os.environ['USER']
def get_ClusterClient():
    import dask
    from dask_jobqueue import PBSCluster
    from dask.distributed import Client
    cluster = PBSCluster(
        cores=2,
        memory='5 GB',
        processes=1,
        queue='casper',
        resource_spec='select=1:ncpus=1:mem=25GB',
        project='P93300070',
        walltime='12:00:00',
        interface='ib0',)

    dask.config.set({
        'distributed.dashboard.link':
        'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'
    })
    client = Client(cluster)
    return cluster, client

In [7]:
cluster, client = get_ClusterClient()
cluster.scale(10) 
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/gabyn/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/gabyn/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.45:37624,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/gabyn/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


#### Now create a loop where you find the POC flux at that depth level at each grid cell for every year of the simulation.. This is going to take a long time, so you need to make sure to continue saving... 

In [29]:
poc_flux = ds_avg.POC_FLUX_IN.values

In [30]:
poc_flux.shape

(62, 60, 384, 320)

In [36]:
%%time
flux_bottom_62yr = np.empty([62,384,320]) # Size of the array that I need
flux_bottom_62yr[:] = np.nan # make everything a nan inside of it
for k in range(60):
    print(k)
    for i in range(384): # loop through the latitude 
        for j in range(320): # loop through the longitude 
            flux_bottom_62yr[k,i,j]= poc_flux[k,np.int(kmt_bottom_depth[i,j]),i,j]# mmol/m^3 cm/s

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
CPU times: user 21.4 s, sys: 11 ms, total: 21.4 s
Wall time: 22 s


In [37]:
np.save("/glade/scratch/gabyn/SPECTRA/flux_bottom_62yr.npy",flux_bottom_62yr)

## Close the cluster?

In [38]:
cluster.close()