# SST Empirical Orthogonal Function Analysis

This notebook will import SST data from a source, select it to be inside the scope of the project and do EOF analysis to determine Central Atlantic Niño Index and Eastern Atlantic Niño Index.

# Imports

In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client
import xeofs as xe
import glob
from geocat.viz import util as gvutil
import cartopy.crs as ccrs
import cartopy.feature as cf
import cartopy.util as cutil
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import colormaps as cm
import matplotlib
import matplotlib.pyplot as plt
import os

# PBSCluster

In [13]:
# # Create a PBS cluster object
# cluster = PBSCluster(account='P93300313',
#                      job_name='CANI_EANI_EOFa',
#                      cores=1,
#                      memory='8GiB',
#                      processes=1,
#                      walltime='02:00:00',
#                      queue='casper',
#                      interface='ext',
#                      n_workers=1)

# # dont scale many workers unless using LE
# # cluster.scale(10)

# client = Client(cluster)
# client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 35845 instead


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/acruz/proxy/35845/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/acruz/proxy/35845/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.214:37367,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/acruz/proxy/35845/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [15]:
# cluster.scale(2)

In [16]:
# client.shutdown()
# cluster.workers

{'PBSCluster-0': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-1': <dask_jobqueue.pbs.PBSJob: status=running>}

# Useful Functions

In [2]:
def ds_map(ds_to_plt, bounds=[20, -60, 10, -10], name='figure'):
    fig, ax = plt.subplots(1, 1,
                           subplot_kw={'projection': ccrs.PlateCarree()})
    fig.subplots_adjust(hspace=0, wspace=0, top=0.925, left=0.1)
    cbar_ax = fig.add_axes([0, 0, 0.1, 0.1])
    cdat, clon = cutil.add_cyclic_point(ds_to_plt, ds_to_plt.longitude)

    ax.set_title(name)
    lat_ticks = np.arange(bounds[3], bounds[2], 5)
    lon_ticks = np.arange(bounds[1], bounds[0], 10)
    ax.set_xticks(lon_ticks, crs=ccrs.PlateCarree())
    ax.set_yticks(lat_ticks, crs=ccrs.PlateCarree())
    lon_formatter = LongitudeFormatter(zero_direction_label=True)
    lat_formatter = LatitudeFormatter()
    ax.xaxis.set_major_formatter(lon_formatter)
    ax.yaxis.set_major_formatter(lat_formatter)
    ax.add_feature(cf.LAND)


    def resize_colobar(event):
        plt.draw()
        posn = ax.get_position()
        cbar_ax.set_position([posn.x0 + posn.width + 0.01, posn.y0,
                              0.04, posn.height])

    ax.set_extent(bounds, ccrs.PlateCarree())
    sst_contour = ax.contourf(clon, ds_to_plt.latitude, cdat,
                              levels=np.arange(-0.4, 0.5, 0.05),
                              # levels=40,
                              transform=ccrs.PlateCarree(), cmap='inferno', extend='both')
    fig.canvas.mpl_connect('resize_event', resize_colobar)
    ax.coastlines()
    plt.colorbar(sst_contour, cax=cbar_ax)
    resize_colobar(None)
    # plt.savefig(name, dpi=300)
    plt.show()


def detrend_dim(da, dim, deg=1):
    # detrend along a single dimension
    p = da.polyfit(dim=dim, deg=deg)
    fit = xr.polyval(da[dim], p.polyfit_coefficients)
    return da - fit


def index_plot(ds1, name1='', threshold=0.5):
    lim = 4 * threshold
    fig, ax = plt.subplots(figsize=(12, 6))

    ax.plot(ds1.time, ds1, color='black', label=name1)
    gvutil.add_major_minor_ticks(ax, x_minor_per_major=15, y_minor_per_major=3,
                                 labelsize=20)

    gvutil.set_axes_limits_and_ticks(ax, ylim=(-1*lim, lim))
    ax.fill_between(ds1.time, ds1, y2=-threshold,
                    where=ds1 < -threshold, color='blue', interpolate=True)
    ax.fill_between(ds1.time, ds1, y2=threshold,
                    where=ds1 > threshold, color='red', interpolate=True)
    plt.title(f'{name1}')
    ax.set_xlabel('year', fontsize=24)
    plt.grid()
    plt.show()

# Data import

In [11]:
def le_in(member_names, decades,
          path="/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/"):
    # we need to differentiate between members
    data = []
    members = []
    for m in members:
        for d in decades:
            file = []
            d_data = xr.open_dataset(file)
            data.append(d_data)
        member_data = xr.concat(data, dim='time')
        members.append(member_data)
    xr.concat(members, dim='time')
    return members


# start.member_n
# bhist
# dates in scope 
hist_decades = ['191001-191912', '192001-192912',
                '193001-193912', '194001-194912', '195001-195912', '196001-196912',
                '197001-197912', '198001-198912', '199001-199912', '200001-200912', 
                '201001-201412']

# too many different member names (splitting into macro and micro)
macro_name = np.char.mod('%d', np.arange(1001, 1311, 10)).tolist()

micro_name = np.char.zfill(np.char.mod('%d', np.arange(1, 21, 1)), 3).tolist()


path = "/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/"
title = "b.e21.BHIST"
type = '.nc'

all = []
for a in ["cmip6", "smbb"]:
    hist = f'{title}{a}'
    for i in macro_name:
        for j in micro_name:
            member = []
            for z in hist_decades:
                file = path + hist + '.f09_g17.LE2-'+ i + '.' +  j + '.cam.h0.SST.' + z + type
                # print(file)
                if os.path.exists(file):
                    member.append(file)
                else:
                    continue
            if member != []:
                all.append(member)

In [12]:
%%time
all_ds = []
for m in all:
    member = []
    for f in m:
        print(f)
        ds = xr.open_dataset(f, engine='h5netcdf')['SST']
        member.append(ds)
    member_data = xr.concat(member, dim='time')
    all_ds.append(member_data)

/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1001.001.cam.h0.SST.191001-191912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1001.001.cam.h0.SST.192001-192912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1001.001.cam.h0.SST.193001-193912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1001.001.cam.h0.SST.194001-194912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1001.001.cam.h0.SST.195001-195912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1001.001.cam.h0.SST.196001-196912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1001.001.cam.h0.SST.197001-197912.nc
/glade/campaign/cgd/

Exception ignored in: <function CachingFileManager.__del__ at 0x15002db54160>
Traceback (most recent call last):
  File "/glade/u/home/acruz/.local/lib/python3.10/site-packages/xarray/backends/file_manager.py", line 244, in __del__
    ref_count = self._ref_counter.decrement(self._key)
AttributeError: 'CachingFileManager' object has no attribute '_ref_counter'


/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1231.002.cam.h0.SST.192001-192912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1231.002.cam.h0.SST.193001-193912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1231.002.cam.h0.SST.194001-194912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1231.002.cam.h0.SST.195001-195912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1231.002.cam.h0.SST.196001-196912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1231.002.cam.h0.SST.197001-197912.nc
/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/SST/b.e21.BHISTcmip6.f09_g17.LE2-1231.002.cam.h0.SST.198001-198912.nc
/glade/campaign/cgd/

In [16]:
test = xr.concat(all_ds, dim='member')

In [21]:
test.to_netcdf('/glade/work/acruz/CESM21SSTs.nc')