# Ocean Bottom Temperature Data Processing from CESM2-LE derived output
- Process data for Southern Ocean annual mean bottom temperature.
- Notebook by Alice DuVivier (NCAR), Kristen Krumhardt (NCAR)
- Note: this notebook needs to load the utils.py file
- October 2024

In [1]:
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import os
from glob import glob
import xarray as xr
import numpy as np
import esmlab
import pop_tools 
import dask
from distributed import Client
from ncar_jobqueue import NCARCluster
import utils
from datetime import datetime
import matplotlib.pyplot as plt
import cartopy
import cmocean
import cartopy.crs as ccrs

## Spin up DASK cluster

In [None]:
# Create our NCAR Cluster - which uses PBSCluster under the hood
cluster = NCARCluster(walltime='2:00:00', cores=1, processes=1, memory='100 GB', 
                      resource_spec='select=1:ncpus=1:mem=100GB',interface='ext')

# Spin up 32 workers
cluster.scale(32)

# Assign the cluster to our Client
client = Client(cluster)

In [None]:
client
# cluster.close()

## Load the CESM data

In [2]:
# load grid information
ds_grid = pop_tools.get_grid('POP_gx1v7')
lons = ds_grid.TLONG
lats = ds_grid.TLAT
area = ds_grid.TAREA
area_m = ds_grid.TAREA * 1e-4
lons_norm = utils.normal_lons(lons)

In [3]:
# load data files for 1850-2100
path = '/glade/campaign/cesm/development/bgcwg/projects/CESM2-LE-FEISTY/drivers/TEMP_bottom'
files = sorted(glob(f'{path}/CESM2-LE_HIST_SSP370_TEMP_bottom_????.???.nc'))        
ds = xr.open_mfdataset(files,concat_dim="member_id", coords='minimal', compat="override", combine='nested')

In [4]:
ds

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,68.94 GiB,1.38 GiB
Shape,"(50, 3012, 384, 320)","(1, 3012, 384, 320)"
Count,200 Tasks,50 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 68.94 GiB 1.38 GiB Shape (50, 3012, 384, 320) (1, 3012, 384, 320) Count 200 Tasks 50 Chunks Type float32 numpy.ndarray",50  1  320  384  3012,

Unnamed: 0,Array,Chunk
Bytes,68.94 GiB,1.38 GiB
Shape,"(50, 3012, 384, 320)","(1, 3012, 384, 320)"
Count,200 Tasks,50 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.30 MiB,47.06 kiB
Shape,"(50, 3012, 2)","(1, 3012, 2)"
Count,200 Tasks,50 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 2.30 MiB 47.06 kiB Shape (50, 3012, 2) (1, 3012, 2) Count 200 Tasks 50 Chunks Type object numpy.ndarray",2  3012  50,

Unnamed: 0,Array,Chunk
Bytes,2.30 MiB,47.06 kiB
Shape,"(50, 3012, 2)","(1, 3012, 2)"
Count,200 Tasks,50 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,46.88 MiB,0.94 MiB
Shape,"(50, 384, 320)","(1, 384, 320)"
Count,200 Tasks,50 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 46.88 MiB 0.94 MiB Shape (50, 384, 320) (1, 384, 320) Count 200 Tasks 50 Chunks Type float64 numpy.ndarray",320  384  50,

Unnamed: 0,Array,Chunk
Bytes,46.88 MiB,0.94 MiB
Shape,"(50, 384, 320)","(1, 384, 320)"
Count,200 Tasks,50 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,23.44 MiB,480.00 kiB
Shape,"(50, 384, 320)","(1, 384, 320)"
Count,200 Tasks,50 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 23.44 MiB 480.00 kiB Shape (50, 384, 320) (1, 384, 320) Count 200 Tasks 50 Chunks Type int32 numpy.ndarray",320  384  50,

Unnamed: 0,Array,Chunk
Bytes,23.44 MiB,480.00 kiB
Shape,"(50, 384, 320)","(1, 384, 320)"
Count,200 Tasks,50 Chunks
Type,int32,numpy.ndarray


### Subset data as needed

#### Calculate annual means and then just keep some years

In [11]:
ds_ann = ds.groupby('time.year').mean('time')

In [13]:
ds_ann.year

In [15]:
## Enter the years you want to keep:
yy_st = "1950"
yy_ed = "2100"

ds_keep = ds_ann.sel(year=slice(yy_st,yy_ed))

#### Keep just southern ocean

In [17]:
## Enter the latitudes you want to keep:
lat_min = -80
lat_max = -60

In [18]:
# crop data to the latitudes we want, use given lat/lon, not specific indices
ds_keep = ds_keep.where(((ds_grid['TLAT'] <= lat_max) & (ds_grid['TLAT'] >= lat_min)), drop=True)
ds_grid = ds_grid.where(((ds_grid['TLAT'] <= lat_max) & (ds_grid['TLAT'] >= lat_min)), drop=True)

In [19]:
ds_keep

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 90.00 kiB 90.00 kiB Shape (36, 320) (36, 320) Count 4 Tasks 1 Chunks Type float64 numpy.ndarray",320  36,

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 90.00 kiB 90.00 kiB Shape (36, 320) (36, 320) Count 4 Tasks 1 Chunks Type float64 numpy.ndarray",320  36,

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,331.79 MiB,45.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,85601 Tasks,7550 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 331.79 MiB 45.00 kiB Shape (151, 50, 36, 320) (1, 1, 36, 320) Count 85601 Tasks 7550 Chunks Type float32 numpy.ndarray",151  1  320  36  50,

Unnamed: 0,Array,Chunk
Bytes,331.79 MiB,45.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,85601 Tasks,7550 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,663.57 MiB,90.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,35451 Tasks,7550 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 663.57 MiB 90.00 kiB Shape (151, 50, 36, 320) (1, 1, 36, 320) Count 35451 Tasks 7550 Chunks Type float64 numpy.ndarray",151  1  320  36  50,

Unnamed: 0,Array,Chunk
Bytes,663.57 MiB,90.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,35451 Tasks,7550 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,663.57 MiB,90.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,43001 Tasks,7550 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 663.57 MiB 90.00 kiB Shape (151, 50, 36, 320) (1, 1, 36, 320) Count 43001 Tasks 7550 Chunks Type float64 numpy.ndarray",151  1  320  36  50,

Unnamed: 0,Array,Chunk
Bytes,663.57 MiB,90.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,43001 Tasks,7550 Chunks
Type,float64,numpy.ndarray


## Put together into one dataset

- Netcdf with variable dimension: (member_id x year x nlat x nlon)
- Include TAREA, andd TLAT and TLONG as coordinates

In [20]:
# load grid data to include in output file
dir_in = '/glade/u/home/duvivier/masks/'
fin = 'ocn_grid_gx1v7.nc'
ds_grid = xr.open_mfdataset(dir_in+fin,decode_times=False)
ds_grid['nlat'] = ds_grid.nlat
ds_grid['nlon'] = ds_grid.nlon

# crop data to the latitudes we want, use given lat/lon, not specific indices
ds_grid = ds_grid.where(((ds_grid['TLAT'] <= lat_max) & (ds_grid['TLAT'] >= lat_min)), drop=True)

In [21]:
ds_out = ds_keep
ds_out['TAREA'] = ds_grid.TAREA

# change the attributes
ds_out.attrs['author'] = 'Alice DuVivier'
ds_out.attrs['date_processed'] = datetime.now().strftime('%Y-%m-%d')
ds_out.attrs['contents'] = f'yearly mean bottom temperature in the Southern Ocean from 1950 to 2099 from the CESM2 Large Ensemble.'

ds_out

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 90.00 kiB 90.00 kiB Shape (36, 320) (36, 320) Count 4 Tasks 1 Chunks Type float64 numpy.ndarray",320  36,

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 90.00 kiB 90.00 kiB Shape (36, 320) (36, 320) Count 4 Tasks 1 Chunks Type float64 numpy.ndarray",320  36,

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 90.00 kiB 90.00 kiB Shape (36, 320) (36, 320) Count 4 Tasks 1 Chunks Type float64 numpy.ndarray",320  36,

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 90.00 kiB 90.00 kiB Shape (36, 320) (36, 320) Count 4 Tasks 1 Chunks Type float64 numpy.ndarray",320  36,

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,4 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,331.79 MiB,45.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,85601 Tasks,7550 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 331.79 MiB 45.00 kiB Shape (151, 50, 36, 320) (1, 1, 36, 320) Count 85601 Tasks 7550 Chunks Type float32 numpy.ndarray",151  1  320  36  50,

Unnamed: 0,Array,Chunk
Bytes,331.79 MiB,45.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,85601 Tasks,7550 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,663.57 MiB,90.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,35451 Tasks,7550 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 663.57 MiB 90.00 kiB Shape (151, 50, 36, 320) (1, 1, 36, 320) Count 35451 Tasks 7550 Chunks Type float64 numpy.ndarray",151  1  320  36  50,

Unnamed: 0,Array,Chunk
Bytes,663.57 MiB,90.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,35451 Tasks,7550 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,663.57 MiB,90.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,43001 Tasks,7550 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 663.57 MiB 90.00 kiB Shape (151, 50, 36, 320) (1, 1, 36, 320) Count 43001 Tasks 7550 Chunks Type float64 numpy.ndarray",151  1  320  36  50,

Unnamed: 0,Array,Chunk
Bytes,663.57 MiB,90.00 kiB
Shape,"(151, 50, 36, 320)","(1, 1, 36, 320)"
Count,43001 Tasks,7550 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,12 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 90.00 kiB 90.00 kiB Shape (36, 320) (36, 320) Count 12 Tasks 1 Chunks Type float64 numpy.ndarray",320  36,

Unnamed: 0,Array,Chunk
Bytes,90.00 kiB,90.00 kiB
Shape,"(36, 320)","(36, 320)"
Count,12 Tasks,1 Chunks
Type,float64,numpy.ndarray


In [24]:
# drop the ULAT/ULONG variables
ds_out = ds_out.drop('ULAT')
ds_out = ds_out.drop('ULONG')

In [25]:
# Print the dimensions
print("Dimensions:")
for dim in ds_out.dims:
    print(f"\t{dim}: {ds_out[dim].values.shape}")

# Print the coordinates
print("Coordinates:")
for coord in ds_out.coords:
    print(f"\t{coord}:")
    print(f"\t\t{ds_out.coords[coord].values}")
    
# Print the attributes
print("Attributes:")
for attr in ds_out.attrs:
    print(f"\t{attr}: {ds_out.attrs[attr]}")
    

Dimensions:
	nlat: (36,)
	nlon: (320,)
	year: (151,)
	member_id: (50,)
Coordinates:
	nlat:
		[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35]
	nlon:
		[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 

## Export and Save

In [26]:
# calculate the size of the dataset in GB
size_gb = ds_out.nbytes / (1024**3)
print(f"The dataset is approximately {size_gb:.2f} GB.")


The dataset is approximately 1.62 GB.


In [27]:
variable = 'TBOT'

path_out = '/glade/campaign/cgd/ppc/duvivier/cesm2_antarctic_polynya/mpa_analysis/DATA/ecoindex_data/sea_ice/'
file_out = 'CESM2-LE-annual-mean-'+variable+'.nc'
fout = path_out + file_out

In [28]:
# Export the dataset to NetCDF with all attributes and coordinates
ds_out.to_netcdf(fout)

In [None]:
cluster.close()