# Adding Back the Tides - CIOPS
An effort to make the daily files more accurate as they are currently lacking the tidal pumping that is so important to the flow of the Salish Sea

In [1]:
import xarray as xr
from pathlib import Path
import numpy as np
import datetime as dt
import gsw

#### Just 2018 for now

In [2]:
startday = [dt.datetime(2017,12,17)+dt.timedelta(days=i) for i in range(int(399))]
print(startday[0])

2017-12-17 00:00:00


In [3]:
folders = [dt.datetime(2017,12,17)+dt.timedelta(days=7*(i+1)) for i in range(int(57))]
print(folders[-1])
folders = np.repeat(folders,7)

2019-01-20 00:00:00


In [4]:
date = [startday[0],startday[1]]
folderday = [folders[0], folders[1]]

In [5]:
print(date)

[datetime.datetime(2017, 12, 17, 0, 0), datetime.datetime(2017, 12, 18, 0, 0)]


In [6]:
print(folderday)

[datetime.datetime(2017, 12, 24, 0, 0), datetime.datetime(2017, 12, 24, 0, 0)]


In [7]:
path = Path("/ocean/mdunphy/CIOPSW-BC12/")

drop_vars = (
    "deptht_bounds","time_counter_bounds","time_instant_bounds",
)

files = [sorted(path.glob("{:%Y%m%d}00/BC12_1d_grid_T_{:%Y%m%d}_{:%Y%m%d}.nc".format(folderday[i], date[i], date[i]))) for i in range(len(date))]

mydata = xr.open_mfdataset(files, drop_variables=drop_vars)
sal = mydata['so']
potT = mydata['thetao']

In [8]:
# replace all land values with nan so that math isn't done on them
sal = sal.where(sal != 0)
potT = potT.where(potT != 0)

In [13]:
np.count_nonzero(np.isnan(potT[0,:,:,:]))

30488477

In [14]:
np.count_nonzero(np.isnan(sal[0,:,:,:]))

30488477

In [9]:
#needs 2 step conversion, first CT_from_pt and them t_from_CT
CT = gsw.CT_from_pt(sal,potT)

In [32]:
np.count_nonzero(np.isnan(CT[0,:,:,:]))

30488477

In [10]:
T = gsw.t_from_CT(sal,CT,potT.deptht)

In [34]:
np.count_nonzero(np.isnan(T[0,:,:,:]))

30488477

In [17]:
# ok clearly not a stright up number of nan that is the issue - maybe its because some temperature values are unreasonable?
print('max:')
print(np.max(potT[0,:,:,:]).values)
print(np.max(CT[0,:,:,:]).values)
print(np.max(T[0,:,:,:]).values)
print('min:')
print(np.min(potT[0,:,:,:]).values)
print(np.min(CT[0,:,:,:]).values)
print(np.min(T[0,:,:,:]).values)

max:
13.2928095
13.52708962780418
13.303625922952929
min:
1.0356041
1.0369278205172834
1.3827944931701688


In [12]:
# T = T.where(T>1,0) #the conversions mess with all the land values, convert these back to 0.. the region doesnt go lower than 1 so this is fine

interpolate + resample to get it in an hourly format

In [11]:
sal_interp = sal.resample(time_counter="1H", loffset=dt.timedelta(hours=1)).interpolate("linear")

In [12]:
sal_new = sal_interp.isel(time_counter = np.arange(0,24,1))

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


In [13]:
sal_new = sal_new.rename('vosaline')

In [21]:
encoding={
          "vosaline": {"zlib": True, "complevel": 4, "_FillValue": 0}
}
encoding

{'vosaline': {'zlib': True, 'complevel': 4, '_FillValue': 0}}

In [None]:
path = '/ocean/rbeutel/data/'
sal_new.to_netcdf(str(path)+'{:%Y%m}/S_new_{:%Y%m%d}.nc'.format(date[1],date[1]), encoding=encoding)

In [14]:
T_interp = T.resample(time_counter="1H", loffset=dt.timedelta(hours=1)).interpolate("linear")

In [15]:
T_new = T_interp.isel(time_counter = np.arange(0,24,1))

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


In [16]:
T_new = T_new.rename('votemper')

In [None]:
encoding={
          "votemper": {"zlib": True, "complevel": 4, "_FillValue": 0}
}
encoding

In [17]:
T_new

Unnamed: 0,Array,Chunk
Bytes,10.49 GB,10.49 GB
Shape,"(24, 75, 1020, 714)","(24, 75, 1020, 714)"
Count,36 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 10.49 GB 10.49 GB Shape (24, 75, 1020, 714) (24, 75, 1020, 714) Count 36 Tasks 1 Chunks Type float64 numpy.ndarray",24  1  714  1020  75,

Unnamed: 0,Array,Chunk
Bytes,10.49 GB,10.49 GB
Shape,"(24, 75, 1020, 714)","(24, 75, 1020, 714)"
Count,36 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.91 MB,2.91 MB
Shape,"(1020, 714)","(1020, 714)"
Count,7 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.91 MB 2.91 MB Shape (1020, 714) (1020, 714) Count 7 Tasks 1 Chunks Type float32 numpy.ndarray",714  1020,

Unnamed: 0,Array,Chunk
Bytes,2.91 MB,2.91 MB
Shape,"(1020, 714)","(1020, 714)"
Count,7 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.91 MB,2.91 MB
Shape,"(1020, 714)","(1020, 714)"
Count,7 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.91 MB 2.91 MB Shape (1020, 714) (1020, 714) Count 7 Tasks 1 Chunks Type float32 numpy.ndarray",714  1020,

Unnamed: 0,Array,Chunk
Bytes,2.91 MB,2.91 MB
Shape,"(1020, 714)","(1020, 714)"
Count,7 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [None]:
T_new.to_netcdf(str(path)+'{:%Y%m}/T_new_{:%Y%m%d}.nc'.format(date[1],date[1]), encoding=encoding)