This script is used to produce bloom start and end timing metrics for the Payne et al. paper "End-of-century Arctic Ocean phytoplankton blooms start a month earlier due to anthropogenic climate change".


1. Import packages.

In [3]:
import numpy as np
import xarray as xr
import glob
import matplotlib.pyplot as plt
import cmocean
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from scipy import stats
import math
import matplotlib.path as mpath

2. Read in a randomly chosen NPP file. This is used to generate latitude (lat) and longitude (lon) fields for CESM files.

In [4]:
vdir = 'ocn' # 'ocn' or 'ice'
vnam = 'photoC_diat_zint_2'#, 'photoC_diaz_zint', 'photoC_sp_zint']#,'photoC_diaz_zint','photoC_sp_zint'] # 'aice' # 'SST'
fdir = '/glade/campaign/cgd/cesm/CESM2-LE/'+ vdir + '/proc/tseries/day_1/' + vnam + '/'

fnam = 'b.e21.BSSP370cmip6.f09_g17.LE2-1301.003.pop.h.ecosys.nday1.photoC_diat_zint_2.20450102-20550101.nc'

ncfile = xr.open_dataset(fdir + fnam)
lon = ncfile.TLONG.values
lat = ncfile.TLAT.values

ok = ~np.isnan(lon)
xp = ok.ravel().nonzero()[0]
fp = lon[~np.isnan(lon)]
x  = np.isnan(lon).ravel().nonzero()[0]
lon[np.isnan(lon)] = np.interp(x, xp, fp)

ok = ~np.isnan(lat)
xp = ok.ravel().nonzero()[0]
fp = lat[~np.isnan(lat)]
x  = np.isnan(lat).ravel().nonzero()[0]
lat[np.isnan(lat)] = np.interp(x, xp, fp)


3. Make the areacello_ocn array which gives the area (in m2) of each grid cell in the larger region (latitude > 50°N).

In [5]:
# b. Open up the areacello (area for ocean grid cells) file. I'll use this to calculate sea ice areal coverage
acdir = '/glade/collections/cmip/CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Ofx/areacello/gn/files/d20190308/areacello_Ofx_CESM2_historical_r1i1p1f1_gn.nc'
acfil = xr.open_dataset(acdir)
areacello = acfil['areacello'].values # tarea <- ocean files
areacello_ocn = np.full([384,320], np.nan)
for i in np.arange(0,384):
    for j in np.arange(0,320):
        if np.isnan(ncfile['photoC_diat_zint_2'][180,i,j]) == False and ncfile["TLAT"][i,j] > 50:
            areacello_ocn[i,j] = areacello[i,j]


  var = coder.decode(var, name=name)


4. Read in integrated C (or Chl) biomass for a particular year, and sum across all phytoplankton functional types.

In [15]:
# a. Select variable of interest and locate the files
vdir = 'ocn' # 'ocn' or 'ice'
vnam = ['diatC_zint_100m', 'diazC_zint_100m', 'spC_zint_100m']# 
dat_clim = np.full([50, 365, 384, 320], np.nan)
for j in np.arange(0,len(vnam)):
    fdir = '/glade/campaign/cgd/cesm/CESM2-LE/'+ vdir + '/proc/tseries/day_1/' + vnam[j] + '/'

# b. Make an array of nans to store the values from all EMs
    i = -1
    
# c. Loop through all the files in the directory, storing data in dat_clim
    for file in glob.glob(fdir + "*smbb*20950101*"): #use suffix 0102 for every year but 2015 and 0101 for 2015.
        i = i + 1
        if i == 0:
            print(file)

# d. Open the netcdf and store the variable of interest as dat. Also open lon and lat.
        ncfile = xr.open_dataset(file) 
        dat = ncfile[vnam[j]].values
        lon, lat = ncfile.TLONG.values, ncfile.TLAT.values #if in 'ice', use 'TLON', in 'ocn', use 'TLONG'

# e. Interpolate lat and lon values over holes in the arrays
        ok = ~np.isnan(lon)
        xp = ok.ravel().nonzero()[0]
        fp = lon[~np.isnan(lon)]
        x  = np.isnan(lon).ravel().nonzero()[0]
        lon[np.isnan(lon)] = np.interp(x, xp, fp)

        ok = ~np.isnan(lat)
        xp = ok.ravel().nonzero()[0]
        fp = lat[~np.isnan(lat)]
        x  = np.isnan(lat).ravel().nonzero()[0]
        lat[np.isnan(lat)] = np.interp(x, xp, fp)
        # print('hi')
# f. Loop through the ensemble members, assigning each to its own column.
        if j == 0:
            for t in range(0,365):
                dat_clim[i,t,:,:] = dat[t+1825,:,:]#[t+1825,:,:] #Get the year that is 5 years in (e.g. 2050)
            #dat_diat[i,:,:] = np.nansum(dat[1825:3650,:,:], axis = 0)
        else:
            for t in range(0,365):
                dat_clim[i,t,:,:] = dat[t+1825,:,:] + dat_clim[i,t,:,:]#dat[t+1825,:,:] + dat_clim[i,t,:,:] #Get the year that is 5 years in (e.g. 2050)
 

/glade/campaign/cgd/cesm/CESM2-LE/ocn/proc/tseries/day_1/diatC_zint_100m/b.e21.BSSP370smbb.f09_g17.LE2-1301.011.pop.h.ecosys.nday1.diatC_zint_100m.20850102-20950101.nc
/glade/campaign/cgd/cesm/CESM2-LE/ocn/proc/tseries/day_1/diazC_zint_100m/b.e21.BSSP370smbb.f09_g17.LE2-1051.003.pop.h.ecosys.nday1.diazC_zint_100m.20850102-20950101.nc
/glade/campaign/cgd/cesm/CESM2-LE/ocn/proc/tseries/day_1/spC_zint_100m/b.e21.BSSP370smbb.f09_g17.LE2-1281.016.pop.h.ecosys.nday1.spC_zint_100m.20850102-20950101.nc


5. Convert NPP into g C m-2 rather than the weird CESM units. Subsequently, calculate bloom start(ind_max_25) and end (ind_max_25_2) timing, as well as the maximum timing (ind_max_100), based on the date when biomass rises above 25% or below 25% of the maximum for the first time. This also takes a long time.

In [16]:
unitconv = 10.377 /1e12
propthresh = 0.15 ##THIS IS NORMALLY 0.25 but can be 0.15
ind_max_25 = np.zeros([384,320,50]); ind_max_100 = np.zeros([384,320,50]); ind_max_25_2 = np.zeros([384,320,50])
for i in np.arange(0,384):
    for j in np.arange(0,320):
        if np.isnan(areacello_ocn[i,j]) == False and np.isnan(areacello_ocn[i,j]) == False:
            for ens in np.arange(0,50):
                dbiom_arc = dat_clim[ens,:,i,j] * areacello_ocn[i,j] * unitconv 
                ft1 = True; ft2 = True; ft3 = True; pp = False
                for t in np.arange(1,366):
                    if dbiom_arc[t-1] > propthresh * np.nanmax(dbiom_arc) and ft1 == True:
                        ind_max_25[i,j,ens] = t
                        ft1 = False
                    if dbiom_arc[t-1] == np.nanmax(dbiom_arc) and ft2 == True:
                        ind_max_100[i,j,ens] = t
                        pp = True; ft2 = False
                    if dbiom_arc[t-1] <= (propthresh * np.nanmax(dbiom_arc)) and pp == True and ft3 == True:
                        ind_max_25_2[i,j,ens] = t
                        ft3 = False
                        break

6. Write out bloom start and end dates for the year studied.

In [17]:
dir = open("/glade/u/home/cpayne/Projects/BloomLength/AnnualGC/2100_max_15.txt", "w")
for row in ind_max_25:
    np.savetxt(dir, row)
dir.close()
dir = open("/glade/u/home/cpayne/Projects/BloomLength/AnnualGC/2100_max_15_2.txt", "w")
for row in ind_max_25_2:
    np.savetxt(dir, row)
dir.close()