# Adding Back the Tides - CIOPS
An effort to make the daily files more accurate as they are currently lacking the tidal pumping that is so important to the flow of the Salish Sea

In [1]:
import xarray as xr
from pathlib import Path
import numpy as np
import datetime as dt
import gsw

#### Just 2018 for now

In [2]:
startday = [dt.datetime(2017,12,31)+dt.timedelta(days=i) for i in range(int(399))]
print(startday[0])

2017-12-31 00:00:00


In [3]:
folders = [dt.datetime(2017,12,31)+dt.timedelta(days=7*(i+1)) for i in range(int(57))]
print(folders[-1])
folders = np.repeat(folders,7)

2019-02-03 00:00:00


In [4]:
date = [startday[0],startday[1]]
folderday = [folders[0], folders[1]]

In [5]:
print(date)

[datetime.datetime(2017, 12, 31, 0, 0), datetime.datetime(2018, 1, 1, 0, 0)]


In [6]:
print(folderday)

[datetime.datetime(2018, 1, 7, 0, 0), datetime.datetime(2018, 1, 7, 0, 0)]


In [5]:
path = Path("/ocean/mdunphy/CIOPSW-BC12/")

drop_vars = (
    "deptht_bounds","time_counter_bounds","time_instant_bounds",
)

files = [sorted(path.glob("{:%Y%m%d}00/BC12_1d_grid_T_{:%Y%m%d}_{:%Y%m%d}.nc".format(folderday[i], date[i], date[i]))) for i in range(len(date))]

mydata = xr.open_mfdataset(files, drop_variables=drop_vars)
sal = mydata['so']
potT = mydata['thetao']

In [6]:
# replace all land values with nan so that math isn't done on them
sal = sal.where(sal != 0)
potT = potT.where(potT != 0)

In [7]:
#needs 2 step conversion, first CT_from_pt and them t_from_CT
CT = gsw.CT_from_pt(sal,potT)

In [8]:
T = gsw.t_from_CT(sal,CT,potT.deptht)

In [12]:
T

Unnamed: 0,Array,Chunk
Bytes,873.94 MB,436.97 MB
Shape,"(2, 75, 1020, 714)","(1, 75, 1020, 714)"
Count,25 Tasks,2 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 873.94 MB 436.97 MB Shape (2, 75, 1020, 714) (1, 75, 1020, 714) Count 25 Tasks 2 Chunks Type float64 numpy.ndarray",2  1  714  1020  75,

Unnamed: 0,Array,Chunk
Bytes,873.94 MB,436.97 MB
Shape,"(2, 75, 1020, 714)","(1, 75, 1020, 714)"
Count,25 Tasks,2 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.91 MB,2.91 MB
Shape,"(1020, 714)","(1020, 714)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.91 MB 2.91 MB Shape (1020, 714) (1020, 714) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",714  1020,

Unnamed: 0,Array,Chunk
Bytes,2.91 MB,2.91 MB
Shape,"(1020, 714)","(1020, 714)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.91 MB,2.91 MB
Shape,"(1020, 714)","(1020, 714)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.91 MB 2.91 MB Shape (1020, 714) (1020, 714) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",714  1020,

Unnamed: 0,Array,Chunk
Bytes,2.91 MB,2.91 MB
Shape,"(1020, 714)","(1020, 714)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16 B,8 B
Shape,"(2,)","(1,)"
Count,6 Tasks,2 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 16 B 8 B Shape (2,) (1,) Count 6 Tasks 2 Chunks Type datetime64[ns] numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,8 B
Shape,"(2,)","(1,)"
Count,6 Tasks,2 Chunks
Type,datetime64[ns],numpy.ndarray


In [12]:
# T = T.where(T>1,0) #the conversions mess with all the land values, convert these back to 0.. the region doesnt go lower than 1 so this is fine

interpolate + resample to get it in an hourly format

In [14]:
sal_interp = sal.resample(time_counter="1H", loffset=dt.timedelta(hours=1)).interpolate("linear")

In [15]:
sal_new = sal_interp.isel(time_counter = np.arange(0,24,1))

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


In [19]:
sal_new = sal_new.rename('vosaline')

In [21]:
encoding={
          "vosaline": {"zlib": True, "complevel": 4, "_FillValue": 0}
}
encoding

{'vosaline': {'zlib': True, 'complevel': 4, '_FillValue': 0}}

In [12]:
path = '/ocean/rbeutel/data/'
sal_new.to_netcdf(str(path)+'{:%Y%m}/S_new_{:%Y%m%d}.nc'.format(date[1],date[1]), encoding=encoding)

In [24]:
T_interp = T.resample(time_counter="1H", loffset=dt.timedelta(hours=1)).interpolate("linear")

In [25]:
T_new = T_interp.isel(time_counter = np.arange(0,24,1))

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


In [26]:
T_new = T_new.rename('votemper')

In [28]:
encoding={
          "votemper": {"zlib": True, "complevel": 4, "_FillValue": 0}
}
encoding

{'votemper': {'zlib': True, 'complevel': 4, '_FillValue': 0}}

In [None]:
T_new.to_netcdf(str(path)+'{:%Y%m}/T_new_{:%Y%m%d}.nc'.format(date[1],date[1]), encoding=encoding)