# Calculate PSL and save as a file
RUFMOD ONLY

* **Description**: Reads in and creates seasonal and ensemble means and vertically interpolates
* **Input data**: Rufmod output in timeseries format
* **Output data**: Netcdf file with output
* **Creator**: Alice DuVivier
* **Date**: April 2022

The rufmod experiments were performed where the sea ice roughness over Arctic sea ice regions was set to be equal to what it would be over open ocean. This is to better understand ice-atmosphere coupling, processes, and feedbacks.

In [1]:
import xarray as xr
import numpy as np
from datetime import timedelta
import glob

import pop_tools

import matplotlib.pyplot as plt
import matplotlib.path as mpath
from matplotlib.gridspec import GridSpec

import geocat.datafiles as gdf
import geocat.viz.util as gvutil
from geocat.viz import cmaps as gvcmaps
import geocat.comp as gcomp

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from scipy.stats import linregress,pearsonr, t

import dask
import intake
from distributed import Client
from ncar_jobqueue import NCARCluster

  from distributed.utils import tmpfile


In [2]:
# spin up dask cluster

import dask

# Use dask jobqueue
from dask_jobqueue import PBSCluster

# Import a client
from dask.distributed import Client

# Setup your PBSCluster
cluster = PBSCluster(
    cores=36, # The number of cores you want
    memory='300 GB', # Amount of memory
    processes=9, # How many processes
    queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory='$TMPDIR', # Use your local directory
    resource_spec='select=1:ncpus=36:mem=300GB', # Specify resources
    project='P93300665', # Input your project ID here
    walltime='00:30:00', # Amount of wall time
    interface='ib0', # Interface to use
)
# Scale up
cluster.scale(jobs=4)

# Change your url to the dask dashboard so you can see it
dask.config.set({'distributed.dashboard.link':'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'})

# Setup your client
client = Client(cluster)

In [3]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/duvivier/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/duvivier/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.42:39976,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/duvivier/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Manually set variables

In [4]:
# list the variables to load
var_in_1 = 'PSL'

## Load rufmod experiments

In [5]:
# Load "rufmod" data
#choose cases and data paths
case1 = 'b.e21.BSSP370.f09_g17.rufmod.001'
case2 = 'b.e21.BSSP370.f09_g17.rufmod.002'
case3 = 'b.e21.BSSP370.f09_g17.rufmod.003'
case4 = 'b.e21.BSSP370.f09_g17.rufmod.004'
case5 = 'b.e21.BSSP370.f09_g17.rufmod.005'

# set base directory where all data live
data_dir = '/glade/campaign/cesm/development/pcwg/projects/arctic_cyclones/rufmod_expts/'
# set individual data directories
data_dir1 = data_dir+case1+'/atm/proc/tseries/month_1/'
data_dir2 = data_dir+case2+'/atm/proc/tseries/month_1/'
data_dir3 = data_dir+case3+'/atm/proc/tseries/month_1/'
data_dir4 = data_dir+case4+'/atm/proc/tseries/month_1/'
data_dir5 = data_dir+case5+'/atm/proc/tseries/month_1/'

In [6]:
%%time
#reading in files
print("loading "+var_in_1)   
ds1_1 = []
ds2_1 = []
ds3_1 = []
ds4_1 = []
ds5_1 = []
my_files=sorted(glob.glob(data_dir1+case1+'.cam.h0.'+var_in_1+'.*.nc'))
ds1_1=xr.open_mfdataset(my_files,combine='by_coords',chunks={'time':129}, parallel=True, compat='override', coords='minimal')
my_files=sorted(glob.glob(data_dir2+case2+'.cam.h0.'+var_in_1+'.*.nc'))
ds2_1=xr.open_mfdataset(my_files,combine='by_coords',chunks={'time':129}, parallel=True, compat='override', coords='minimal')
my_files=sorted(glob.glob(data_dir3+case3+'.cam.h0.'+var_in_1+'.*.nc'))
ds3_1=xr.open_mfdataset(my_files,combine='by_coords',chunks={'time':129}, parallel=True, compat='override', coords='minimal')    
my_files=sorted(glob.glob(data_dir4+case4+'.cam.h0.'+var_in_1+'.*.nc'))
ds4_1=xr.open_mfdataset(my_files,combine='by_coords',chunks={'time':129}, parallel=True, compat='override', coords='minimal')
my_files=sorted(glob.glob(data_dir5+case5+'.cam.h0.'+var_in_1+'.*.nc'))
ds5_1=xr.open_mfdataset(my_files,combine='by_coords',chunks={'time':129}, parallel=True, compat='override', coords='minimal')


loading PSL
CPU times: user 1.15 s, sys: 418 ms, total: 1.57 s
Wall time: 39.4 s


In [7]:
# concatenate them into a single array
futures_1 = xr.concat([ds1_1,ds2_1,ds3_1,ds4_1,ds5_1],dim='member_id')

In [9]:
futures_1.PSL

Unnamed: 0,Array,Chunk
Bytes,1.06 GiB,27.21 MiB
Shape,"(5, 1032, 192, 288)","(1, 129, 192, 288)"
Count,190 Tasks,45 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.06 GiB 27.21 MiB Shape (5, 1032, 192, 288) (1, 129, 192, 288) Count 190 Tasks 45 Chunks Type float32 numpy.ndarray",5  1  288  192  1032,

Unnamed: 0,Array,Chunk
Bytes,1.06 GiB,27.21 MiB
Shape,"(5, 1032, 192, 288)","(1, 129, 192, 288)"
Count,190 Tasks,45 Chunks
Type,float32,numpy.ndarray


In [10]:
# set member_id values
futures_1.member_id.values

# assign member_id as coordinate array
futures_1 = futures_1.assign_coords({"member_id": futures_1.member_id.values})

In [11]:
# Shift months by one to be center of time period.
# Take average of the time bounds to get middle of month
# will lose some attributes with time, so may need to put this back in later...
futures_1['time'] = futures_1.time_bnds.load().mean(dim='nbnd').sel(member_id=0)

In [12]:
# get just NH slice
futures_1_masked = futures_1.isel(lat=slice(164,192))

In [13]:
# grab variables of interest
PSL_rufmod = futures_1_masked[var_in_1]

# Actually load data

In [14]:
PSL_rufmod.load()

## Calculate Seasonal Means

In [16]:
season_names = ['OND','JFM', 'AMJ', 'JAS']

In [17]:
# find total years
xarr_rufmod = PSL_rufmod.coords['time.year'][(PSL_rufmod.coords['time.month']==1)]

In [19]:
# Loop through seasons - rufmod

# make numpy array to fill and specify dimensions we want
seas_array_rufmod_1 = np.zeros([len(season_names),len(xarr_rufmod),len(PSL_rufmod.member_id),len(PSL_rufmod.lat),len(PSL_rufmod.lon)])

for s_count, ss in enumerate(season_names):
    print(ss)
    ### Z PLEV
    # get temporary array of just these month by season
    if ss == 'JFM':
        temp1 = PSL_rufmod.isel(time=PSL_rufmod.time.dt.month.isin([1,2,3]))
    if ss == 'AMJ':
        temp1 = PSL_rufmod.isel(time=PSL_rufmod.time.dt.month.isin([4,5,6]))
    if ss == 'JAS':
        temp1 = PSL_rufmod.isel(time=PSL_rufmod.time.dt.month.isin([7,8,9]))
    if ss == 'OND':
        temp1 = PSL_rufmod.isel(time=PSL_rufmod.time.dt.month.isin([10,11,12]))
    # now loop through years to get the seasonal average by year for each ensemble member
    for y_count, yy in enumerate(xarr_rufmod):
        # select only the indexes for this year
        temp1a = temp1.isel(time=temp1.time.dt.year.isin([yy])).mean(dim='time')
        seas_array_rufmod_1[s_count,y_count,:,:,:] = temp1a 
        

OND
JFM
AMJ
JAS


In [20]:
print(seas_array_rufmod_1.shape)

(4, 86, 5, 28, 288)


In [21]:
# convert the numpy array to a xarray for easier plotting
PSL_seas_rufmod = xr.DataArray(seas_array_rufmod_1,dims=('season','time','member_id','lat','lon'))

In [22]:
# set coordinate arrays
PSL_seas_rufmod['season'] = season_names
PSL_seas_rufmod['time'] = xarr_rufmod
PSL_seas_rufmod['member_id'] = PSL_rufmod['member_id']
PSL_seas_rufmod['lat'] = PSL_rufmod['lat'].values
PSL_seas_rufmod['lon'] = PSL_rufmod['lon'].values

## Write out files

In [23]:
# quick and dirty way to save a file!

# save rufmod expt, rename the variable so it makes sense
#fout = 'rufmod_vertical_seas_ens_mean_WS'
#
#WS_seas_ens_mean_rufmod.to_dataset(name='vert_ws').to_netcdf(fout+'.nc')

### PSL

In [29]:
#set info to write out
out_tag = 'PSL'
units = 'Pa'
longname = 'sea level pressure'

fout = 'rufmod_seas_'+out_tag

In [30]:
ds_to_save = PSL_seas_rufmod

In [31]:
# check how big this will be to write out in GB
ds_to_save.nbytes/(1024**3)

0.10334014892578125

In [32]:
# assign some attributes
refdata = {'Author': 'Alice DuVivier', 'units':units, 'longname':longname}

ds_to_save.attrs = refdata

In [33]:
# check data
ds_to_save


In [34]:
ds_to_save.to_netcdf(fout+'.nc')  # how to save file