# Make the baroclinic/barotropic results files into 1d files

In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path

In [2]:
start = dt.datetime(2019,8,31)
numdays = np.linspace(0,15*4,5)
date_list = [start + dt.timedelta(days=x) for x in numdays]
date_list[-1] = date_list[-1] + dt.timedelta(days=1) #because the last run of the month is a day longer

In [3]:
date_list

[datetime.datetime(2019, 8, 31, 0, 0),
 datetime.datetime(2019, 9, 15, 0, 0),
 datetime.datetime(2019, 9, 30, 0, 0),
 datetime.datetime(2019, 10, 15, 0, 0),
 datetime.datetime(2019, 10, 31, 0, 0)]

In [4]:
files = ["/data/rbeutel/analysis/ssc_tidesback/u_new_{:%d%b%y}_{:%d%b%y}.nc".format(date_list[i],date_list[i+1])for i in range(len(date_list)-1)]
mydata = xr.open_mfdataset(files)

In [5]:
mydata['vozocrtx'] = mydata.__xarray_dataarray_variable__
mydata = mydata.drop(['__xarray_dataarray_variable__'])
mydata

Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.36 MiB 1.36 MiB Shape (898, 398) (898, 398) Count 15 Tasks 1 Chunks Type float32 numpy.ndarray",398  898,

Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.36 MiB 1.36 MiB Shape (898, 398) (898, 398) Count 15 Tasks 1 Chunks Type float32 numpy.ndarray",398  898,

Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,78.18 GiB,20.50 GiB
Shape,"(1468, 898, 398, 40)","(385, 898, 398, 40)"
Count,12 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 78.18 GiB 20.50 GiB Shape (1468, 898, 398, 40) (385, 898, 398, 40) Count 12 Tasks 4 Chunks Type float32 numpy.ndarray",1468  1  40  398  898,

Unnamed: 0,Array,Chunk
Bytes,78.18 GiB,20.50 GiB
Shape,"(1468, 898, 398, 40)","(385, 898, 398, 40)"
Count,12 Tasks,4 Chunks
Type,float32,numpy.ndarray


In [6]:
#we dont want the extra half day on either end, so remove the first and last 12 hours
inds = np.arange(12,len(mydata.time_counter)-13)
#we also get repeats of 1 hour between each merge, remove the first of each
inds = np.delete(inds, [348, 348+361, 348+361*2])

In [7]:
# trim the times we don't need
trimdata = mydata.isel(time_counter = inds)
trimdata

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.36 MiB 1.36 MiB Shape (898, 398) (898, 398) Count 15 Tasks 1 Chunks Type float32 numpy.ndarray",398  898,

Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.36 MiB 1.36 MiB Shape (898, 398) (898, 398) Count 15 Tasks 1 Chunks Type float32 numpy.ndarray",398  898,

Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,76.69 GiB,19.81 GiB
Shape,"(1440, 898, 398, 40)","(372, 898, 398, 40)"
Count,16 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 76.69 GiB 19.81 GiB Shape (1440, 898, 398, 40) (372, 898, 398, 40) Count 16 Tasks 4 Chunks Type float32 numpy.ndarray",1440  1  40  398  898,

Unnamed: 0,Array,Chunk
Bytes,76.69 GiB,19.81 GiB
Shape,"(1440, 898, 398, 40)","(372, 898, 398, 40)"
Count,16 Tasks,4 Chunks
Type,float32,numpy.ndarray


In [8]:
path = '/data/rbeutel/analysis/ssc_tidesback/'

for i in range(int(len(trimdata.time_counter)/24)):
    x = trimdata.isel(time_counter = np.arange(24*i,24*(i+1)))
    date = pd.to_datetime(x.time_counter[0].values)
    x.to_netcdf(str(path)+'U_new_{:%Y%m%d}.nc'.format(date))
    print('U_new_{:%Y%m%d}.nc'.format(date))

U_new_20190901.nc
U_new_20190902.nc
U_new_20190903.nc
U_new_20190904.nc
U_new_20190905.nc
U_new_20190906.nc
U_new_20190907.nc
U_new_20190908.nc
U_new_20190909.nc
U_new_20190910.nc
U_new_20190911.nc
U_new_20190912.nc
U_new_20190913.nc
U_new_20190914.nc
U_new_20190915.nc
U_new_20190916.nc
U_new_20190917.nc
U_new_20190918.nc
U_new_20190919.nc
U_new_20190920.nc
U_new_20190921.nc
U_new_20190922.nc
U_new_20190923.nc
U_new_20190924.nc
U_new_20190925.nc
U_new_20190926.nc
U_new_20190927.nc
U_new_20190928.nc
U_new_20190929.nc
U_new_20190930.nc
U_new_20191001.nc
U_new_20191002.nc
U_new_20191003.nc
U_new_20191004.nc
U_new_20191005.nc
U_new_20191006.nc
U_new_20191007.nc
U_new_20191008.nc
U_new_20191009.nc
U_new_20191010.nc
U_new_20191011.nc
U_new_20191012.nc
U_new_20191013.nc
U_new_20191014.nc
U_new_20191015.nc
U_new_20191016.nc
U_new_20191017.nc
U_new_20191018.nc
U_new_20191019.nc
U_new_20191020.nc
U_new_20191021.nc
U_new_20191022.nc
U_new_20191023.nc
U_new_20191024.nc
U_new_20191025.nc
U_new_2019

In [9]:
files = ["/data/rbeutel/analysis/ssc_tidesback/v_new_{:%d%b%y}_{:%d%b%y}.nc".format(date_list[i],date_list[i+1])for i in range(len(date_list)-1)]
mydata = xr.open_mfdataset(files)

In [10]:
mydata['vomecrty'] = mydata.__xarray_dataarray_variable__
mydata = mydata.drop(['__xarray_dataarray_variable__'])
mydata

Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.36 MiB 1.36 MiB Shape (898, 398) (898, 398) Count 15 Tasks 1 Chunks Type float32 numpy.ndarray",398  898,

Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.36 MiB 1.36 MiB Shape (898, 398) (898, 398) Count 15 Tasks 1 Chunks Type float32 numpy.ndarray",398  898,

Unnamed: 0,Array,Chunk
Bytes,1.36 MiB,1.36 MiB
Shape,"(898, 398)","(898, 398)"
Count,15 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,78.18 GiB,20.50 GiB
Shape,"(1468, 898, 398, 40)","(385, 898, 398, 40)"
Count,12 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 78.18 GiB 20.50 GiB Shape (1468, 898, 398, 40) (385, 898, 398, 40) Count 12 Tasks 4 Chunks Type float32 numpy.ndarray",1468  1  40  398  898,

Unnamed: 0,Array,Chunk
Bytes,78.18 GiB,20.50 GiB
Shape,"(1468, 898, 398, 40)","(385, 898, 398, 40)"
Count,12 Tasks,4 Chunks
Type,float32,numpy.ndarray


In [11]:
# trim the times we don't need
trimdata = mydata.isel(time_counter = inds)

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


In [12]:
path = '/data/rbeutel/analysis/ssc_tidesback/'

for i in range(int(len(trimdata.time_counter)/24)):
    x = trimdata.isel(time_counter = np.arange(24*i,24*(i+1)))
    date = pd.to_datetime(x.time_counter[0].values)
    x.to_netcdf(str(path)+'V_new_{:%Y%m%d}.nc'.format(date))
    print('V_new_{:%Y%m%d}.nc'.format(date))

V_new_20190901.nc
V_new_20190902.nc
V_new_20190903.nc
V_new_20190904.nc
V_new_20190905.nc
V_new_20190906.nc
V_new_20190907.nc
V_new_20190908.nc
V_new_20190909.nc
V_new_20190910.nc
V_new_20190911.nc
V_new_20190912.nc
V_new_20190913.nc
V_new_20190914.nc
V_new_20190915.nc
V_new_20190916.nc
V_new_20190917.nc
V_new_20190918.nc
V_new_20190919.nc
V_new_20190920.nc
V_new_20190921.nc
V_new_20190922.nc
V_new_20190923.nc
V_new_20190924.nc
V_new_20190925.nc
V_new_20190926.nc
V_new_20190927.nc
V_new_20190928.nc
V_new_20190929.nc
V_new_20190930.nc
V_new_20191001.nc
V_new_20191002.nc
V_new_20191003.nc
V_new_20191004.nc
V_new_20191005.nc
V_new_20191006.nc
V_new_20191007.nc
V_new_20191008.nc
V_new_20191009.nc
V_new_20191010.nc
V_new_20191011.nc
V_new_20191012.nc
V_new_20191013.nc
V_new_20191014.nc
V_new_20191015.nc
V_new_20191016.nc
V_new_20191017.nc
V_new_20191018.nc
V_new_20191019.nc
V_new_20191020.nc
V_new_20191021.nc
V_new_20191022.nc
V_new_20191023.nc
V_new_20191024.nc
V_new_20191025.nc
V_new_2019