# Setup

In [1]:
# Packages -----------------------------------------------#

# Data Analysis
import xarray as xr
import numpy as np
import pandas as pd
import metpy.calc as mpcalc
import matplotlib.dates as dates
import matplotlib.pyplot as plt
from scipy.interpolate import griddata

# Timing Processes and Progress
import time
from tqdm import tqdm

# make sure the figures plot inline rather than at the end
%matplotlib inline

In [4]:
# get merged data from tigress
infn = 'Profiles_temp_psal_doxy.nc'

ds_AWG = xr.open_dataset(infn)
# print(ds.keys())

# change time variable ----------------------------------#
# set the units to be something xarray can decode
ds_AWG.time.attrs['units'] = 'days since 1800-01-01'
#re-decode the dataset
ds_AWG = xr.decode_cf(ds_AWG)

# # add time as a coordinate
ds_AWG= ds_AWG.assign_coords(unsorted_time=ds_AWG.time)

# reset profile variables to be with respect to time
ds_AWG['temp'] = xr.DataArray(ds_AWG.temp,dims = ['unsorted_time','pres'],coords =[ds_AWG.time,ds_AWG.pres])
ds_AWG['psal'] = xr.DataArray(ds_AWG.psal,dims = ['unsorted_time','pres'],coords =[ds_AWG.time,ds_AWG.pres])
ds_AWG['doxy'] = xr.DataArray(ds_AWG.doxy,dims = ['unsorted_time','pres'],coords =[ds_AWG.time,ds_AWG.pres])
ds_AWG['lat'] = xr.DataArray(ds_AWG.lat,dims = ['unsorted_time'],coords =[ds_AWG.time])
ds_AWG['lon'] = xr.DataArray(ds_AWG.lon,dims = ['unsorted_time'],coords =[ds_AWG.time])
ds_AWG['type'] = xr.DataArray(ds_AWG.type,dims = ['unsorted_time'],coords =[ds_AWG.time])
ds_AWG['prof'] = xr.DataArray(ds_AWG.prof,dims = ['unsorted_time'],coords =[ds_AWG.time])
# get rid of intial time variable
ds_AWG = ds_AWG[['temp','psal','doxy','lat','lon','type','prof']]

sorted_time = ds_AWG.unsorted_time.sortby(ds_AWG.unsorted_time)

# sort based on time
ds_AWG['temp'] = xr.DataArray(ds_AWG.temp.sortby(ds_AWG.unsorted_time),dims = ['time','pres'],coords =[sorted_time,ds_AWG.pres])
ds_AWG['psal'] = xr.DataArray(ds_AWG.psal.sortby(ds_AWG.unsorted_time),dims = ['time','pres'],coords =[sorted_time,ds_AWG.pres])
ds_AWG['doxy'] = xr.DataArray(ds_AWG.doxy.sortby(ds_AWG.unsorted_time),dims = ['time','pres'],coords =[sorted_time,ds_AWG.pres])
ds_AWG['lat'] = xr.DataArray(ds_AWG.lat.sortby(ds_AWG.unsorted_time),dims = ['time'],coords =[sorted_time])
ds_AWG['lon'] = xr.DataArray(ds_AWG.lon.sortby(ds_AWG.unsorted_time),dims = ['time'],coords =[sorted_time])
ds_AWG['type'] = xr.DataArray(ds_AWG.type.sortby(ds_AWG.unsorted_time),dims = ['time'],coords =[sorted_time])
ds_AWG['prof'] = xr.DataArray(ds_AWG.prof.sortby(ds_AWG.unsorted_time),dims = ['time'],coords =[sorted_time])
# get rid of intial time variable
ds_AWG = ds_AWG[['temp','psal','doxy','lat','lon','type','prof','time']]
ds_AWG

<xarray.Dataset>
Dimensions:  (pres: 201, time: 306118)
Coordinates:
    prof     (time) int32 286462 286463 286464 286465 ... 251414 252160 252508
  * time     (time) datetime64[ns] 1827-10-30 1836-05-15 ... 2020-02-07
  * pres     (pres) int32 0 5 10 15 20 25 30 35 ... 970 975 980 985 990 995 1000
Data variables:
    temp     (time, pres) float32 nan nan nan nan ... 6.482922 6.45122 6.4202943
    psal     (time, pres) float32 nan nan nan ... 34.940956 34.939495 34.937958
    doxy     (time, pres) float32 nan nan nan ... 48.04291 48.480473 48.916607
    lat      (time) float32 -18.0 -27.5 -7.9 -26.78 ... -3.16 11.251 1.808
    lon      (time) float32 119.83 41.0 85.33 98.5 ... 73.594 53.525 84.366
    type     (time) int32 2 2 2 2 2 2 2 2 2 2 2 2 2 ... 0 0 0 0 0 0 0 0 0 1 1 1

In [3]:
ds_AWG.to_netcdf(infn[:-3] + '_processed.nc',mode='w',format = "NETCDF4")