# Saving Interannual phytoplankton biomass to increase the speed of making these interannual plots in the future.

In [1]:
import warnings
warnings.simplefilter("ignore") # Silence warnings
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import numpy as np
from matplotlib.pyplot import figure

import numpy.ma as ma
from netCDF4 import Dataset as NetCDFFile
import cartopy
import cartopy.crs as ccrs
import pylab 
from copy import deepcopy
import pandas as pd
import matplotlib
import matplotlib.colors as colors
from cartopy.util import add_cyclic_point
plt.rcParams['mathtext.default']='regular'
from collections import OrderedDict
import cmocean
import matplotlib.cm as cm
import matplotlib as mpl
import seaborn as sns
from matplotlib.gridspec import GridSpec
import matplotlib
# Scientific libraries
from numpy import arange,array,ones
from scipy import stats
import os

%matplotlib inline
from glob import glob
import dask
import esmlab
import pop_tools 
import statistics as stats
from statistics import mean

In [2]:
class MidpointNormalize(colors.Normalize):
    def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
        self.midpoint = midpoint
        colors.Normalize.__init__(self, vmin, vmax, clip)

    def __call__(self, value, clip=None):
        if clip is None:
            clip = self.clip

        result, is_scalar = self.process_value(value)

        self.autoscale_None(result)
        vmin, vmax, midpoint = self.vmin, self.vmax, self.midpoint

        if not (vmin < midpoint < vmax):
            raise ValueError("midpoint must be between maxvalue and minvalue.")
        elif vmin == vmax:
            result.fill(0) # Or should it be all masked? Or 0.5?
        elif vmin > vmax:
            raise ValueError("maxvalue must be bigger than minvalue")
        else:
            vmin = float(vmin)
            vmax = float(vmax)
            if clip:
                mask = np.ma.getmask(result)
                result = np.ma.array(np.clip(result.filled(vmax), vmin, vmax),
                                  mask=mask)

            # ma division is very slow; we can take a shortcut
            resdat = result.data

            #First scale to -1 to 1 range, than to from 0 to 1.
            resdat -= midpoint
            resdat[resdat>0] /= abs(vmax - midpoint)
            resdat[resdat<0] /= abs(vmin - midpoint)

            resdat /= 2.
            resdat += 0.5
            result = np.ma.array(resdat, mask=result.mask, copy=False)

        if is_scalar:
            result = result[0]
        return result

In [3]:
import re
numbers = re.compile(r'(\d+)')
def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

In [4]:
def adjust_pop_grid(tlon,tlat,field):
    nj = tlon.shape[0]
    ni = tlon.shape[1]
    xL = int(ni/2 - 1)
    xR = int(xL + ni)

    tlon = np.where(np.greater_equal(tlon,np.min(tlon[:,0])),tlon-360.,tlon)
    lon  = np.concatenate((tlon,tlon+360.),1)
    lon = lon[:,xL:xR]

    if ni == 320:
        lon[367:-3,0] = lon[367:-3,0]+360.
    lon = lon - 360.
    lon = np.hstack((lon,lon[:,0:1]+360.))
    if ni == 320:
        lon[367:,-1] = lon[367:,-1] - 360.

    #-- trick cartopy into doing the right thing:
    #   it gets confused when the cyclic coords are identical
    lon[:,0] = lon[:,0]-1e-8

    #-- periodicity
    lat  = np.concatenate((tlat,tlat),1)
    lat = lat[:,xL:xR]
    lat = np.hstack((lat,lat[:,0:1]))

    field = np.ma.concatenate((field,field),1)
    field = field[:,xL:xR]
    field = np.ma.hstack((field,field[:,0:1]))
    return lon,lat,field

## Import the phytoplankton biomass... 

In [5]:
case = 'g.e21.G1850ECOIAF.t62_g17.marbl0_33.GNG595'
path = f'/glade/campaign/cesm/development/bgcwg/projects/marbl-spectra/{case}/ocn/hist'

In [6]:
variables = [f'{var}' for var in ['diat1C','diat2C','diat3C', 'ppC', 'diazC', 'mp1C','mp2C','mp3C','mp4C','ECOSYS_IFRAC','TEMP','NO3']]
coords = {'x':'TLONG','y':'TLAT'}
keep_vars = variables + list(coords.values())+['dz','KMT','time']

In [7]:
%%time

ds_summer_avg = xr.Dataset()
ds_spring_avg = xr.Dataset()
ds_fall_avg = xr.Dataset()
ds_winter_avg =xr.Dataset()

for year in np.arange(63,125,1):
    yr4="0{:02d}".format(year).zfill(4)
    print(year)
    
    ds_summer = xr.Dataset()
    ds_spring = xr.Dataset()
    ds_winter = xr.Dataset()
    ds_fall = xr.Dataset()

    file = sorted(glob(f'{path}/{case}.pop.h.{yr4}-*.nc'))
    
    winter_indexes = [0,1,-1]
    winter_files = [file[ind] for ind in winter_indexes]
    dsv_winter=xr.open_mfdataset(winter_files, decode_times=True,drop_variables=["transport_components", "transport_regions"], 
                            parallel=True, compat="override", combine='nested', concat_dim="time",data_vars="minimal",coords='minimal' )
        
    dsv_summer=xr.open_mfdataset(file[5:8], decode_times=True,drop_variables=["transport_components", "transport_regions"], 
                            parallel=True, compat="override", combine='nested', concat_dim="time",data_vars="minimal",coords='minimal' )
    dsv_spring=xr.open_mfdataset(file[2:5], decode_times=True,drop_variables=["transport_components", "transport_regions"], 
                            parallel=True, compat="override", combine='nested', concat_dim="time",data_vars="minimal",coords='minimal' )
    dsv_fall=xr.open_mfdataset(file[8:11], decode_times=True,drop_variables=["transport_components", "transport_regions"], 
                            parallel=True, compat="override", combine='nested', concat_dim="time",data_vars="minimal",coords='minimal' )
    
    for vv in variables: 
        ds_summer = xr.merge((ds_summer, dsv_summer[vv]))
        ds_spring = xr.merge((ds_spring, dsv_spring[vv]))
        ds_winter = xr.merge((ds_winter, dsv_winter[vv]))
        ds_fall = xr.merge((ds_fall, dsv_fall[vv]))
        

    ds_summer = ds_summer.drop([v for v in ds_summer.variables if v not in keep_vars]).squeeze()
    ds_summer = ds_summer.mean(dim='time')
    ds_summer_avg = xr.concat([ds_summer_avg, ds_summer],dim='year')
    
    ds_spring = ds_spring.drop([v for v in ds_spring.variables if v not in keep_vars]).squeeze()
    ds_spring = ds_spring.mean(dim='time')
    ds_spring_avg = xr.concat([ds_spring_avg, ds_spring],dim='year')
    
    ds_winter = ds_winter.drop([v for v in ds_winter.variables if v not in keep_vars]).squeeze()
    ds_winter = ds_winter.mean(dim='time')
    ds_winter_avg = xr.concat([ds_winter_avg, ds_winter],dim='year')
    
    ds_fall = ds_fall.drop([v for v in ds_fall.variables if v not in keep_vars]).squeeze()
    ds_fall = ds_fall.mean(dim='time')
    ds_fall_avg = xr.concat([ds_fall_avg, ds_fall],dim='year')

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
CPU times: user 12min 23s, sys: 5min 36s, total: 18min
Wall time: 19min 45s


In [8]:
%%time
phytoC_summer = np.stack([ds_summer_avg.mp3C, ds_summer_avg.diat3C, ds_summer_avg.mp4C])

CPU times: user 16.8 s, sys: 50.7 s, total: 1min 7s
Wall time: 2min 46s


In [9]:
%%time
phytoC_spring = np.stack([ds_spring_avg.mp3C, ds_spring_avg.diat3C, ds_spring_avg.mp4C])

CPU times: user 17.5 s, sys: 50.5 s, total: 1min 7s
Wall time: 2min 46s


In [10]:
%%time
phytoC_winter = np.stack([ds_winter_avg.mp3C, ds_winter_avg.diat3C, ds_winter_avg.mp4C])

CPU times: user 17.7 s, sys: 51.3 s, total: 1min 8s
Wall time: 2min 55s


In [11]:
%%time
phytoC_fall = np.stack([ds_fall_avg.mp3C, ds_fall_avg.diat3C, ds_fall_avg.mp4C])

CPU times: user 17.7 s, sys: 51 s, total: 1min 8s
Wall time: 3min 1s


In [12]:
%%time
phytoC_summer_bm = np.nansum(phytoC_summer, axis=(0,2)) * 10/150 #integrate over depth #mmol m-3
phytoC_summer_bm = phytoC_summer_bm * 12.011 *1000 # ug C m-3 --> ugC m^{-3}

phytoC_spring_bm = np.nansum(phytoC_spring, axis=(0,2)) * 10/150 #integrate over depth #mmol m-3
phytoC_spring_bm = phytoC_spring_bm * 12.011 *1000 # ug C m-3 --> ugC m^{-3}

phytoC_fall_bm = np.nansum(phytoC_fall, axis=(0,2)) * 10/150 #integrate over depth #mmol m-3
phytoC_fall_bm = phytoC_fall_bm * 12.011 *1000 # ug C m-3 --> ugC m^{-3}

phytoC_winter_bm = np.nansum(phytoC_winter, axis=(0,2)) * 10/150 #integrate over depth #mmol m-3
phytoC_winter_bm = phytoC_winter_bm * 12.011 *1000 # ug C m-3 --> ugC m^{-3}

CPU times: user 4.92 s, sys: 9.59 s, total: 14.5 s
Wall time: 46.3 s


In [13]:
phytoC_summer_bm[phytoC_summer_bm<0] ==np.nan
phytoC_spring_bm[phytoC_spring_bm<0] ==np.nan
phytoC_fall_bm[phytoC_fall_bm<0] ==np.nan
phytoC_winter_bm[phytoC_winter_bm<0] ==np.nan

array([], dtype=bool)

In [14]:
phytoC_summer_bm

array([[[  0.      ,   0.      ,   0.      , ...,   0.      ,
           0.      ,   0.      ],
        [  0.      ,   0.      ,   0.      , ...,   0.      ,
           0.      ,   0.      ],
        [ 73.89798 ,  71.91281 ,  69.833565, ...,   0.      ,
           0.      ,   0.      ],
        ...,
        [  0.      ,   0.      ,   0.      , ...,   0.      ,
           0.      ,   0.      ],
        [  0.      ,   0.      ,   0.      , ...,   0.      ,
           0.      ,   0.      ],
        [  0.      ,   0.      ,   0.      , ...,   0.      ,
           0.      ,   0.      ]],

       [[  0.      ,   0.      ,   0.      , ...,   0.      ,
           0.      ,   0.      ],
        [  0.      ,   0.      ,   0.      , ...,   0.      ,
           0.      ,   0.      ],
        [118.51018 , 114.876915, 111.590355, ...,   0.      ,
           0.      ,   0.      ],
        ...,
        [  0.      ,   0.      ,   0.      , ...,   0.      ,
           0.      ,   0.      ],
        [  0

In [15]:
time = np.arange(0,62,1)
phytoC_summer_ds = xr.DataArray(phytoC_summer_bm, coords={'time':time, 'TLAT':ds_winter_avg.TLAT, 'TLONG':ds_winter_avg.TLONG},
                                    dims=['time', 'nlat', 'nlon'],
                                    attrs=dict(description="summer phytoplankton biomass integrated over depth (150m)",units="ugC m^{-3}"))
phytoC_summer_ds = phytoC_summer_ds.to_dataset(name='phytoC_large_150m')

In [16]:
phytoC_summer_ds.to_netcdf('/glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_summer_large_bm.nc');

	NC4_create: path /glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_summer_large_bm.nc cmode 0x1000 parameters (nil)
	HDF5 error messages turned on.
			nc4_create_file: path /glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_summer_large_bm.nc mode 0x1000
			nc4_grp_list_add: name / 
		nc_inq_format: ncid 0x70000
		NC4_inq_format_extended: ncid 0x70000
		nc_inq_typeids: ncid 0x70000
		NC4_inq: ncid 0x70000
		NC4_inq: ncid 0x70000
		nc_inq_grps: ncid 0x70000
		NC4_def_dim: ncid 0x70000 name time len 62
		NC4_def_dim: ncid 0x70000 name nlat len 384
		NC4_def_dim: ncid 0x70000 name nlon len 320
		NC4_def_var: name time type 10 ndims 1
		NC4_inq_unlimdims: ncid 0x70000
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_dim: ncid 0x70000 dimid 0
		NC4_inq_dim: ncid 0x70000 dimid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_d

In [17]:
phytoC_spring_ds = xr.DataArray(phytoC_spring_bm, coords={'time':time, 'TLAT':ds_winter_avg.TLAT, 'TLONG':ds_winter_avg.TLONG},
                                    dims=['time', 'nlat', 'nlon'],
                                    attrs=dict(description="spring phytoplankton biomass integrated over depth (150m)",units="ugC m^{-3}"))
phytoC_spring_ds = phytoC_spring_ds.to_dataset(name='phytoC_large_150m')

In [18]:
phytoC_spring_ds.to_netcdf('/glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_spring_large_bm.nc');

	NC4_create: path /glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_spring_large_bm.nc cmode 0x1000 parameters (nil)
	HDF5 error messages turned on.
			nc4_create_file: path /glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_spring_large_bm.nc mode 0x1000
			nc4_grp_list_add: name / 
		nc_inq_format: ncid 0x70000
		NC4_inq_format_extended: ncid 0x70000
		nc_inq_typeids: ncid 0x70000
		NC4_inq: ncid 0x70000
		NC4_inq: ncid 0x70000
		nc_inq_grps: ncid 0x70000
		NC4_def_dim: ncid 0x70000 name time len 62
		NC4_def_dim: ncid 0x70000 name nlat len 384
		NC4_def_dim: ncid 0x70000 name nlon len 320
		NC4_def_var: name time type 10 ndims 1
		NC4_inq_unlimdims: ncid 0x70000
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_dim: ncid 0x70000 dimid 0
		NC4_inq_dim: ncid 0x70000 dimid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_d

In [19]:
phytoC_fall_ds = xr.DataArray(phytoC_fall_bm, coords={'time':time, 'TLAT':ds_winter_avg.TLAT, 'TLONG':ds_winter_avg.TLONG},
                                    dims=['time', 'nlat', 'nlon'],
                                    attrs=dict(description="fall phytoplankton biomass integrated over depth (150m)",units="ugC m^{-3}"))
phytoC_fall_ds = phytoC_fall_ds.to_dataset(name='phytoC_large_150m')

In [20]:
phytoC_fall_ds.to_netcdf('/glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_fall_large_bm.nc');

	NC4_create: path /glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_fall_large_bm.nc cmode 0x1000 parameters (nil)
	HDF5 error messages turned on.
			nc4_create_file: path /glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_fall_large_bm.nc mode 0x1000
			nc4_grp_list_add: name / 
		nc_inq_format: ncid 0x70000
		NC4_inq_format_extended: ncid 0x70000
		nc_inq_typeids: ncid 0x70000
		NC4_inq: ncid 0x70000
		NC4_inq: ncid 0x70000
		nc_inq_grps: ncid 0x70000
		NC4_def_dim: ncid 0x70000 name time len 62
		NC4_def_dim: ncid 0x70000 name nlat len 384
		NC4_def_dim: ncid 0x70000 name nlon len 320
		NC4_def_var: name time type 10 ndims 1
		NC4_inq_unlimdims: ncid 0x70000
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_dim: ncid 0x70000 dimid 0
		NC4_inq_dim: ncid 0x70000 dimid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_dim: 

In [21]:
phytoC_winter_ds = xr.DataArray(phytoC_winter_bm, coords={'time':time, 'TLAT':ds_winter_avg.TLAT, 'TLONG':ds_winter_avg.TLONG},
                                    dims=['time', 'nlat', 'nlon'],
                                    attrs=dict(description="winter phytoplankton biomass integrated over depth (150m)",units="ugC m^{-3}"))
phytoC_winter_ds = phytoC_winter_ds.to_dataset(name='phytoC_large_150m')

In [22]:
phytoC_winter_ds.to_netcdf('/glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_large_winter_bm.nc');

	NC4_create: path /glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_large_winter_bm.nc cmode 0x1000 parameters (nil)
	HDF5 error messages turned on.
			nc4_create_file: path /glade/u/home/gabyn/scratch/SPECTRA/nc_files/phytoC_large_winter_bm.nc mode 0x1000
			nc4_grp_list_add: name / 
		nc_inq_format: ncid 0x70000
		NC4_inq_format_extended: ncid 0x70000
		nc_inq_typeids: ncid 0x70000
		NC4_inq: ncid 0x70000
		NC4_inq: ncid 0x70000
		nc_inq_grps: ncid 0x70000
		NC4_def_dim: ncid 0x70000 name time len 62
		NC4_def_dim: ncid 0x70000 name nlat len 384
		NC4_def_dim: ncid 0x70000 name nlon len 320
		NC4_def_var: name time type 10 ndims 1
		NC4_inq_unlimdims: ncid 0x70000
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_dim: ncid 0x70000 dimid 0
		NC4_inq_dim: ncid 0x70000 dimid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_var_all: ncid 0x70000 varid 0
		NC4_inq_d

In [None]:
`