# Arctic Sea ice files 

* **Description**: Creates file of Utqiagvik data from CESM2-LE
* **Input data**: CESM2-le daily timeseries data
* **Output data**: Netcdf with data
* **Creator**: Alice DuVivier
* **Date**: September 2024

### Import Packages

In [None]:
#import utils
import xarray as xr
import numpy as np
from glob import glob
import importlib
from datetime import datetime
import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
import cmocean

## Find point nearest Utqiagvik

In [None]:
# set latitude and longitude of Village (longitudes must be in 0-360)
name_village = 'Utqiagvik'
lat_village = 71.29
lon_village = 203.21
#longitue = -156.79

In [None]:
# set a dataset to read in to get grid information
dir_in = '/glade/u/home/duvivier/masks/'
file_in = 'ocn_grid_gx1v7.nc'
ds = xr.open_mfdataset(dir_in+file_in, decode_times=False)

In [None]:
# grab lat and lon data
lat2d = ds.TLAT
lon2d = ds.TLONG
land = ds.KMT

In [None]:
# First, find the index of the grid point nearest a specific lat/lon.   
abslat = np.abs(lat2d - lat_village)
abslon = np.abs(lon2d - lon_village)
c = np.maximum(abslon, abslat)

# find the smallest values
([xloc], [yloc]) = np.where(c == np.min(c))

# print index values
print(xloc,yloc)

In [None]:
# Check how close these are to the latitude and longitude of the village
print(name_village+': '+str(lat_village)+','+str(lon_village))

lat_grid = lat2d.isel(nlat=xloc,nlon=yloc).values
lon_grid = lon2d.isel(nlat=xloc,nlon=yloc).values
print('grid point: '+str(lat_grid)+','+str(lon_grid))

In [None]:
# Plot the Grid points to ensure we have the closest point for plots
fig = plt.figure(figsize=(20,20))

# make plot
ax = plt.subplot(1,1,1)
ax = plt.subplot(1,1,1,projection = ccrs.NorthPolarStereo(central_longitude=180.0) )
ax.set_extent([-140, -179, 72, 69], crs=ccrs.PlateCarree())

# do a pcolormesh plot of input
cs0 = ax.pcolormesh(lon2d, lat2d,
                    land,
                    cmap = cmocean.cm.thermal, vmin = 0, vmax = 50,
                    transform=ccrs.PlateCarree())

# do scatter plot of all grid values
cs1 = ax.plot(lon2d, lat2d,
                 land,
                 c='white',marker='.', linestyle='none',
                 transform=ccrs.PlateCarree())

# Plot requested lat/lon point
cs2 = ax.scatter(lon_village, lat_village, marker='.',c='red', transform=ccrs.PlateCarree())
tx1 = ax.text(lon_village, lat_village, 'requested', c='red', transform=ccrs.PlateCarree())

# Plot nearest point in the array red
cs4 = plt.scatter(lon2d.isel(nlat=xloc,nlon=yloc), lat2d.isel(nlat=xloc,nlon=yloc), marker='x', color='orange', transform=ccrs.PlateCarree())
tx2 = plt.text(lon2d.isel(nlat=xloc,nlon=yloc), lat2d.isel(nlat=xloc,nlon=yloc), 'selected', c='orange', transform=ccrs.PlateCarree())
               

- This point is an ocean point and is closest to the Utqiagvik village. Proceed!

In [None]:
# print original index values
print(xloc,yloc)

## Read in 50 CESM-LE ensemble members (CMIP6 standard forcing)

In [None]:
# Select variable of interest and locate the files
# varname options = 'aice_d','hi_d'

varname = 'hi_d'
path = '/glade/campaign/cgd/cesm/CESM2-LE/ice/proc/tseries/day_1/' + varname

In [None]:
# list all 50 cmip6 standard forcing ensemble numbers
ens_mems = ['1001.001', '1021.002', '1041.003', '1061.004', '1081.005', '1101.006', '1121.007', '1141.008', '1161.009', '1181.010',
            '1231.001', '1231.002', '1231.003', '1231.004', '1231.005', '1231.006', '1231.007', '1231.008', '1231.009', '1231.010',
            '1251.001', '1251.002', '1251.003', '1251.004', '1251.005', '1251.006', '1251.007', '1251.008', '1251.009', '1251.010',
            '1281.001', '1281.002', '1281.003', '1281.004', '1281.005', '1281.006', '1281.007', '1281.008', '1281.009', '1281.010',
            '1301.001', '1301.002', '1301.003', '1301.004', '1301.005', '1301.006', '1301.007', '1301.008', '1301.009', '1301.010']

### Load historical data

In [None]:
%%time

print('loading historical data')
ds_hist = xr.Dataset()

for m in ens_mems:
    print(m)

    case = 'b.e21.BHISTcmip6.f09_g17.LE2-' + m

    files = sorted(glob(f'{path}/{case}.cice.h1.{varname}.????????-????????.nc'))       
    ds_tmp = xr.open_mfdataset(files, data_vars="minimal", coords='minimal', compat="override", parallel=True, 
                               concat_dim="time", combine='nested', decode_times=True)

    # take an average over the time bounds to get the right time dimension
    ds_tmp["time"] = ds_tmp.time_bounds.compute().mean(dim="d2")
    
    # keep only some variables
    keep_vars=['TLAT','TLON','tarea','time'] + [varname]
    ds_tmp = ds_tmp.drop([v for v in ds_tmp.variables if v not in keep_vars])
    
    ds_hist = xr.concat([ds_hist,ds_tmp], dim='member_id')


### Load future data

In [None]:
%%time

print('loading future data')
ds_ssp = xr.Dataset()

for m in ens_mems:
    print(m)

    case = 'b.e21.BSSP370cmip6.f09_g17.LE2-' + m

    files = sorted(glob(f'{path}/{case}.cice.h1.{varname}.????????-????????.nc'))       
    ds_tmp = xr.open_mfdataset(files, data_vars="minimal", coords='minimal', compat="override", parallel=True, 
                               concat_dim="time", combine='nested', decode_times=True)

    # take an average over the time bounds to get the right time dimension
    ds_tmp["time"] = ds_tmp.time_bounds.compute().mean(dim="d2")
    
    # keep only some variables
    keep_vars=['TLAT','TLONG','tarea','time'] + [varname]
    ds_tmp = ds_tmp.drop([v for v in ds_tmp.variables if v not in keep_vars])
    
    ds_ssp = xr.concat([ds_ssp,ds_tmp], dim='member_id')


## Concatenate historical and future datasets

In [None]:
ds = xr.concat((ds_hist,ds_ssp),dim='time')

In [None]:
# keep only the point we're interested in near village
ds_village = ds.isel(nj=slice(xloc,xloc+1),ni=slice(yloc, yloc+1))

In [None]:
# keep just 1920-2100 for all datasets
yy_st = 1920
yy_ed = 2100

In [None]:
ds_subset = ds_village.where(ds_village.time.dt.year >= yy_st, drop = True)

In [None]:
ds_subset

In [None]:
ds_subset = ds_subset.isel(nj=0,ni=0)

In [None]:
ds_subset

In [None]:
%%time
ds_subset.load()

## Save the variable
- Netcdf with variable dimension: (member_id x time)
- One netcdf per variable

In [None]:
ds_out = xr.Dataset()

ds_out[varname] = ds_subset[varname]

# change the attributes
ds_out.attrs['author'] = 'Alice DuVivier'
ds_out.attrs['date_processed'] = datetime.now().strftime('%Y-%m-%d')
ds_out.attrs['contents'] = f'Daily CESM2-LE data for ocean point nearest Utqiagvik'

ds_out

In [None]:
# Print the dimensions
print("Dimensions:")
for dim in ds_out.dims:
    print(f"\t{dim}: {ds_out[dim].values.shape}")

# Print the coordinates
print("Coordinates:")
for coord in ds_out.coords:
    print(f"\t{coord}:")
    print(f"\t\t{ds_out.coords[coord].values}")
    
# Print the attributes
print("Attributes:")
for attr in ds_out.attrs:
    print(f"\t{attr}: {ds_out.attrs[attr]}")
    

## Export and Save

In [None]:
# calculate the size of the dataset in GB
size_gb = ds_out.nbytes / (1024**3)
print(f"The dataset is approximately {size_gb:.2f} GB.")


In [None]:
path_out = '/glade/campaign/cgd/ppc/duvivier/arctic_actionable/DATA/cesm2-le/'
file_out = name_village+'_CESM2-LE_1920-2100_daily-'+varname+'.nc'
fout = path_out + file_out
print(fout)

In [None]:
# Export the dataset to NetCDF with all attributes and coordinates
ds_out.to_netcdf(fout)