# Notebook to test edits for process_glider_data.py file

THIS IS ONLY FOR TESTING CODE.  For operational use, start with **process_glider_data.py**

Processes slocum glider downloaded from C2

1. Works locally on the downloaded raw netcdf files.
2. Assign profile index, separate dives and climbs
3. Calculate oxygen

FEATURE TO ADD: 
- Will probably need to calculate TEOS-10 variables
- Any handling of wetlabs data?

Runs offline, using the netcdf files created in download_data.py

Next step: process_glider_tseries.py which works on the raw data and calculates 
some things (profile index, oxygen concentration)


In [2]:
import numpy as np
import xarray as xr
import os
import glob
import datetime as dt
# Own packages of code
from setdir import *
from parseglider import *
from calc_oxy import *

In [3]:
# Choice of grid interval (pressure in dbar)
dp=10

## CHANGE TO A CONFIG FILE WITH USER DEFINABLE PARAMETERS
# Slocum gliders: A dictionary with the key as the serial number ('unit_398') 
# and then the plain text name, "Churchill"
glider_names = {
    'unit_398': 'Churchill',
    'unit_409': 'Grease',
}

sensor_sn = {
    'unit_398': {"optode SN": "232"},
    'unit_409': {"optode SN": "268"},
}
# Dictionary keys MUST match the serial number format used in the API.  


# Choose name for new DataArrays to index the profiles:
idxname = 'profile_index'
# Choose name for new DataArray for pressure in dbar:
presname = 'pressure_dbar'

# List of glider serial numbers for API
unit_list = [(k) for k in glider_names.keys()]

In [4]:
#--------------------------------------------------------------
# DATA PROCESSING:
# - Assign a profile index to separate dives and climbs
#
# Save new files to 01-data/01-raw/ as
#   UNIT_YYYYMMDD_data.nc for the full data as a vector
#--------------------------------------------------------------
for uname in unit_list:
    fname = uname+'*_data.nc'
    
    # Extract a list with the names of existing raw data files
    existing_files = glob.glob(cat_raw_path(fname))

    # Check whether there are any files
    if len(existing_files) > 0:
        # Extract the end date from the filename
        existing_files = sorted(existing_files)
        latest_file = existing_files[-1]
        
        # Open the dataset
        data_ds = xr.open_dataset(latest_file)
        
        #--------------------------------------------------------------
        # Assign profile index (separates dives and climbs)
        # This should actually be moved to process_data.py
        #--------------------------------------------------------------
        # where 20.0 means the twentieth dive (downward profile)
        # and 20.5 is the twentieth climb (upward profile)
        data_ds, _, _ = dive_index(data_ds, presname, idxname)
                 
        #--------------------------------------------------------------
        # Calculate oxygen 
        #--------------------------------------------------------------
        sensorsn1 = sensor_sn[uname]
        data_ds = data_ds.assign_attrs(sensorsn1)
        data_ds = calc_o2conc_cal(data_ds)
        fname2 = latest_file[0:-3]+'_o2.nc'
        fname2 = os.path.basename(fname2)
        
        print('Saving to '+cat_interim_path(fname2))
        data_ds.to_netcdf(cat_interim_path(fname2), mode='w')
        data_ds.close()

1. Changing idive, idx was 330 is now  329
51. Changing iclimb, idx was 2932, is now 2933
56. Changing idive, idx was 3355 is now  3354
140. Changing idive, idx was 28836 is now  28835
157. Changing iclimb, idx was 33897, is now 33898
161. Changing iclimb, idx was 35153, is now 35154
162. Changing iclimb, idx was 35516, is now 35517
202. Changing idive, idx was 47886 is now  47885
275. Changing iclimb, idx was 70150, is now 70151
285. Changing iclimb, idx was 73200, is now 73201
302. Changing iclimb, idx was 78491, is now 78492
353. Changing iclimb, idx was 94536, is now 94537
365. Changing iclimb, idx was 98577, is now 98578
380. Changing iclimb, idx was 102790, is now 102791
432. Changing iclimb, idx was 118860, is now 118861
438. Changing iclimb, idx was 120881, is now 120882
7. Changing iclimb, idx was 366, is now 367
54. Changing idive, idx was 3298 is now  3297
68. Changing iclimb, idx was 5878, is now 5879
222. Changing iclimb, idx was 40325, is now 40326
229. Changing iclimb, i

In [7]:
# At some point, will likely need to calculate TEOS10

In [8]:
# This is only here a second time for troubleshooting, since the gridding
# process is time consuming and if something goes wrong, it's best separate
# the two different vehicles
glider_names = {
    'unit_398': 'Churchill',
}


glider_names = {
    'unit_409': 'Grease',
}

unit_list = [(k) for k in glider_names.keys()]



In [9]:
# Grid the data and make some calculations on the gridded data (MLD)
for uname in unit_list:
    fname = uname+'*_data_o2.nc'
    
    # Extract a list with the names of existing interim data files
    existing_files = glob.glob(cat_interim_path(fname))
    
    # Check whether there are any files
    if len(existing_files) > 0:
        # Extract the most recent filename
        existing_files = sorted(existing_files)
        latest_file = existing_files[-1]
        
        # Open the dataset
        data_ds = xr.open_dataset(latest_file)
         
        if 0:
            # Check whether a gridded file has already been created
            # Not yet implemented
            proc_files = glob.glob(cat_interim_path(fname))
            if not len(proc_files) > 0:
                print('No processed files for that glider')
   
        #--------------------------------------------------------------
        # Grid data onto a regular pressure grid (intervals given by dp)
        # - Grid data into a 2d matrix against profile index & pressure grid 
        #    NOTE: Gridding is rough and *not* science quality
        #--------------------------------------------------------------
        grid_ds = bin_dp(data_ds, data_ds.attrs['Serial number'], dp)
       
        # EFW: I think closing these helps with file management & permission 
        # denied problems? 
        data_ds.close()


        #------------------------------------------
        # ADD EXTRA COORDINATES (length divenum)
        #------------------------------------------
        # Simplifies plotting later to plot against time or distance
        mtime = grid_ds.time.mean(dim='pressure').values
        mlon = grid_ds.m_lon.mean(dim='pressure').values
        mlat = grid_ds.m_lat.mean(dim='pressure').values

        # Interpolate over lat and long values
        divenum = grid_ds.divenum.values

        # Lon
        idxnan = (~np.isnan(mlon))
        divenum_nonnan = divenum[idxnan]
        mlon_nonnan = mlon[idxnan]
        flon = interp1d(divenum_nonnan, mlon_nonnan,
                        kind='linear', fill_value="extrapolate")
        mlon_full = flon(divenum)

        # Lat
        idxnan = (~np.isnan(mlat))
        divenum_nonnan = divenum[idxnan]
        mlat_nonnan = mlat[idxnan]
        flat = interp1d(divenum_nonnan,mlat_nonnan,
                        kind='linear', fill_value="extrapolate")
        mlat_full = flat(divenum)

        # Calculate distances from the interpolated lat/lon positions
        dist_km = gsw.distance(mlat_full, mlon_full, 0, axis=0)/1000
        dist_km_pad = np.append(0, dist_km)
        # Cumsum is a problem, need to do something about NaN?
        dist_along_track = np.cumsum(dist_km_pad)

        # Create data array versions
        DAT_2 = xr.DataArray(dist_along_track, 
                             coords={"divenum": grid_ds.divenum},
                             attrs=dict(long_name="Distance", units="km"))
        TIME_2 = xr.DataArray(mtime, 
                              coords={"divenum": grid_ds.divenum},
                             attrs=dict(long_name="Date"))
        LAT_2 = xr.DataArray(mlat_full, 
                             coords={"divenum": grid_ds.divenum},
                            attrs=dict(long_name="Latitude"))
        LON_2 = xr.DataArray(mlon_full, 
                             coords={"divenum": grid_ds.divenum},
                            attrs=dict(long_name="Longitude"))



        grid_ds["dist_along_track"] = DAT_2
        grid_ds["timevec"] = TIME_2
        grid_ds["lonvec"] = LON_2
        grid_ds["latvec"] = LAT_2

        # Change the variables to coordinates
        grid_ds = grid_ds.set_coords(['dist_along_track','timevec',
                                      'lonvec','latvec'])

        #-------------------------------------------------
        # Calculate mixed layer depth
        #-------------------------------------------------
        grid_ds = calc_MLD(grid_ds)

        #-------------------------------------------------
        # Save gridded to 01-data/03-processed/*_bin10m.nc
        #-------------------------------------------------
        # Filename as 'unit_409_YYYYMMDD_bin10m.nc'
        uname = data_ds.attrs['Serial number']
        maxtimestr = data_ds.attrs['End Time']
        outfile = uname+'_'+maxtimestr+'_bin10m.nc'
        print('Saving processed to '+cat_proc_path(outfile))
        grid_ds.to_netcdf(cat_proc_path(outfile), mode='w')
        
        # EFW: I think closing these helps with file management & permission 
        # denied problems? 
        grid_ds.close()     
        

Saving processed to ../01-data/03-processed/unit_409_20220311_bin10m.nc
