# Arctic Sea ice files 

* **Description**: Creates file of Utqiagvik data from CESM1-LE
* **Input data**: CESM1-le daily timeseries data
* **Output data**: Netcdf with data
* **Creator**: Alice DuVivier
* **Date**: September 2024

### Import Packages

In [1]:
#import utils
import xarray as xr
import numpy as np
from glob import glob
import importlib
from datetime import datetime
import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
import cmocean

## Find point nearest Utqiagvik

In [2]:
# set latitude and longitude of Village (longitudes must be in 0-360)
name_village = 'Utqiagvik'
lat_village = 71.29
lon_village = 203.21
#longitue = -156.79

In [3]:
# set a dataset to read in to get grid information
dir_in = '/glade/campaign/cesm/collections/cesmLE/CESM-CAM5-BGC-LE/ice/proc/tseries/monthly/aice/'
file_in = 'b.e11.B20TRC5CNBDRD.f09_g16.001.cice.h.aice_nh.185001-200512.nc'
ds = xr.open_mfdataset(dir_in+file_in, decode_times=False)

In [4]:
# grab lat and lon data
lat2d = ds.TLAT
lon2d = ds.TLON

In [5]:
# First, find the index of the grid point nearest a specific lat/lon.   
abslat = np.abs(lat2d - lat_village)
abslon = np.abs(lon2d - lon_village)
c = np.maximum(abslon, abslat)

# find the smallest values
([xloc], [yloc]) = np.where(c == np.min(c))

# print index values
print(xloc,yloc)

63 205


In [6]:
# Check how close these are to the latitude and longitude of the village
print(name_village+': '+str(lat_village)+','+str(lon_village))

lat_grid = lat2d.isel(nj=xloc,ni=yloc).values
lon_grid = lon2d.isel(nj=xloc,ni=yloc).values
print('grid point: '+str(lat_grid)+','+str(lon_grid))

Utqiagvik: 71.29,203.21
grid point: 71.5558090209961,203.34042358398438


- This point matches the lat/lon for cesm2 experiments that are closest to the Utqiagvik village. Proceed!

## Read in 35 CESM1-LE ensemble members

In [7]:
# Select variable of interest and locate the files
# varname options = 'aicen001','aicen002','aicen003','aicen004','aicen005','vicen001','vicen002','vicen003','vicen004','vicen005'

varname = 'aicen001'
path = '/glade/campaign/cesm/collections/cesmLE/CESM-CAM5-BGC-LE/ice/proc/tseries/monthly/' + varname

In [8]:
# list all 35 ensemble numbers
ens_mems = ['101', '002', '003', '004', '005',
            '006', '007', '008', '009', '010',
            '011', '012', '013', '014', '015',
            '016', '017', '018', '019', '020',
            '021', '022', '023', '024', '025',
            '026', '027', '028', '029', '030',
            '031', '032', '033', '034', '035']

### Load historical data

In [9]:
%%time

print('loading historical data')
ds_hist = xr.Dataset()

for m in ens_mems:
    print(m)

    case = 'b.e11.B20TRC5CNBDRD.f09_g16.' + m

    files = sorted(glob(f'{path}/{case}.cice.h.{varname}_nh.??????-??????.nc'))       
    ds_tmp = xr.open_mfdataset(files, data_vars="minimal", coords='minimal', compat="override", parallel=True, 
                               concat_dim="time", combine='nested', decode_times=True)

    # keep only the point we're interested in near village
    ds_tmp = ds_tmp.isel(nj=slice(xloc,xloc+1),ni=slice(yloc, yloc+1)).compute()
    
    # take an average over the time bounds to get the right time dimension
    ds_tmp["time"] = ds_tmp.time_bounds.compute().mean(dim="d2")
    
    # keep only some variables
    keep_vars=['TLAT','TLON','tarea','time'] + [varname]
    ds_tmp = ds_tmp.drop([v for v in ds_tmp.variables if v not in keep_vars])

    # for some reason the last member time index doesn't play nice, fix that
    if m == '034':
        ds_keep = ds_tmp
    if m == '035':
        ds_tmp['time'] = ds_keep.time
    
    ds_hist = xr.concat([ds_hist,ds_tmp], dim='member_id')


loading historical data
101
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
CPU times: user 12.8 s, sys: 474 ms, total: 13.3 s
Wall time: 24 s


### Load future data

In [10]:
%%time

print('loading future data')
ds_ssp = xr.Dataset()

for m in ens_mems:
    print(m)

    case = 'b.e11.BRCP85C5CNBDRD.f09_g16.' + m

    files = sorted(glob(f'{path}/{case}.cice.h.{varname}_nh.??????-??????.nc'))       
    ds_tmp = xr.open_mfdataset(files, data_vars="minimal", coords='minimal', compat="override", parallel=True, 
                               concat_dim="time", combine='nested', decode_times=True)

    # keep only the point we're interested in near village
    ds_tmp = ds_tmp.isel(nj=slice(xloc,xloc+1),ni=slice(yloc, yloc+1))
    
    # take an average over the time bounds to get the right time dimension
    ds_tmp["time"] = ds_tmp.time_bounds.compute().mean(dim="d2")
    
    # keep only some variables
    keep_vars=['TLAT','TLONG','tarea','time'] + [varname]
    ds_tmp = ds_tmp.drop([v for v in ds_tmp.variables if v not in keep_vars])
    
    ds_ssp = xr.concat([ds_ssp,ds_tmp], dim='member_id')


loading future data
101
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
CPU times: user 2.3 s, sys: 395 ms, total: 2.7 s
Wall time: 9.15 s


## Concatenate historical and future datasets

In [11]:
ds = xr.concat((ds_hist,ds_ssp),dim='time')

In [12]:
# keep just 1920-2100 for all datasets
yy_st = 1920
yy_ed = 2100

In [13]:
ds_subset = ds.where(ds.time.dt.year >= yy_st, drop = True)

In [14]:
ds_subset

Unnamed: 0,Array,Chunk
Bytes,296.95 kiB,4.45 kiB
Shape,"(2172, 35, 1, 1)","(1140, 1, 1, 1)"
Count,1020 Tasks,70 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 296.95 kiB 4.45 kiB Shape (2172, 35, 1, 1) (1140, 1, 1, 1) Count 1020 Tasks 70 Chunks Type float32 numpy.ndarray",2172  1  1  1  35,

Unnamed: 0,Array,Chunk
Bytes,296.95 kiB,4.45 kiB
Shape,"(2172, 35, 1, 1)","(1140, 1, 1, 1)"
Count,1020 Tasks,70 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,296.95 kiB,4.03 kiB
Shape,"(35, 2172, 1, 1)","(1, 1032, 1, 1)"
Count,1890 Tasks,105 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 296.95 kiB 4.03 kiB Shape (35, 2172, 1, 1) (1, 1032, 1, 1) Count 1890 Tasks 105 Chunks Type float32 numpy.ndarray",35  1  1  1  2172,

Unnamed: 0,Array,Chunk
Bytes,296.95 kiB,4.03 kiB
Shape,"(35, 2172, 1, 1)","(1, 1032, 1, 1)"
Count,1890 Tasks,105 Chunks
Type,float32,numpy.ndarray


In [15]:
ds_subset = ds_subset.isel(nj=0,ni=0)

In [16]:
ds_subset

Unnamed: 0,Array,Chunk
Bytes,296.95 kiB,4.45 kiB
Shape,"(2172, 35)","(1140, 1)"
Count,1090 Tasks,70 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 296.95 kiB 4.45 kiB Shape (2172, 35) (1140, 1) Count 1090 Tasks 70 Chunks Type float32 numpy.ndarray",35  2172,

Unnamed: 0,Array,Chunk
Bytes,296.95 kiB,4.45 kiB
Shape,"(2172, 35)","(1140, 1)"
Count,1090 Tasks,70 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,296.95 kiB,4.03 kiB
Shape,"(35, 2172)","(1, 1032)"
Count,1995 Tasks,105 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 296.95 kiB 4.03 kiB Shape (35, 2172) (1, 1032) Count 1995 Tasks 105 Chunks Type float32 numpy.ndarray",2172  35,

Unnamed: 0,Array,Chunk
Bytes,296.95 kiB,4.03 kiB
Shape,"(35, 2172)","(1, 1032)"
Count,1995 Tasks,105 Chunks
Type,float32,numpy.ndarray


In [17]:
%%time
ds_subset.load()

CPU times: user 11.2 s, sys: 570 ms, total: 11.8 s
Wall time: 17.5 s


## Save the variable
- Netcdf with variable dimension: (member_id x time)
- One netcdf per variable

In [18]:
ds_out = xr.Dataset()

ds_out[varname] = ds_subset[varname]

# change the attributes
ds_out.attrs['author'] = 'Alice DuVivier'
ds_out.attrs['date_processed'] = datetime.now().strftime('%Y-%m-%d')
ds_out.attrs['contents'] = f'Monthly CESM1-LE data for ocean point nearest Utqiagvik'

ds_out

In [19]:
# Print the dimensions
print("Dimensions:")
for dim in ds_out.dims:
    print(f"\t{dim}: {ds_out[dim].values.shape}")

# Print the coordinates
print("Coordinates:")
for coord in ds_out.coords:
    print(f"\t{coord}:")
    print(f"\t\t{ds_out.coords[coord].values}")
    
# Print the attributes
print("Attributes:")
for attr in ds_out.attrs:
    print(f"\t{attr}: {ds_out.attrs[attr]}")
    

Dimensions:
	time: (2172,)
	member_id: (35,)
Coordinates:
	TLON:
		203.34042358398438
	TLAT:
		71.5558090209961
	time:
		[cftime.DatetimeNoLeap(1920, 1, 16, 12, 0, 0, 0, has_year_zero=True)
 cftime.DatetimeNoLeap(1920, 2, 15, 0, 0, 0, 0, has_year_zero=True)
 cftime.DatetimeNoLeap(1920, 3, 16, 12, 0, 0, 0, has_year_zero=True) ...
 cftime.DatetimeNoLeap(2100, 10, 16, 12, 0, 0, 0, has_year_zero=True)
 cftime.DatetimeNoLeap(2100, 11, 16, 0, 0, 0, 0, has_year_zero=True)
 cftime.DatetimeNoLeap(2100, 12, 16, 12, 0, 0, 0, has_year_zero=True)]
Attributes:
	author: Alice DuVivier
	date_processed: 2025-10-01
	contents: Monthly CESM1-LE data for ocean point nearest Utqiagvik


## Export and Save

In [20]:
# calculate the size of the dataset in GB
size_gb = ds_out.nbytes / (1024**3)
print(f"The dataset is approximately {size_gb:.2f} GB.")


The dataset is approximately 0.00 GB.


In [21]:
path_out = '/glade/campaign/cgd/ppc/duvivier/arctic_actionable/DATA/cesm1-le_utqiagvik/'
file_out = name_village+'_CESM1-LE_1920-2100_monthly-'+varname+'.nc'
fout = path_out + file_out

In [22]:
# Export the dataset to NetCDF with all attributes and coordinates
ds_out.to_netcdf(fout)