In [51]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime 
import glob
import xarray as xr
from setdir import *
from plotglider import *
from parseglider import *


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Updated purpose of process_data.ipynb

Now, loads the *.nc file (e.g. unit_409_20220124.nc, and assigns up/down index, also creates a gridded file)


In [55]:
# Slocum gliders: A dictionary with the key as the serial number ('unit_398') 
# and then the plain text name, "Churchill"
glider_names = {
    'unit_409': 'Grease',
    'unit_398': 'Churchill',
}
unit_list = [(k) for k in glider_names.keys()]

# Time format string (should be the same as in 01-load_raw_data.ipynb)
time_strf = '%Y%m%d'




In [79]:
# Load the latest files for each glider in unit_list
idx_d={} ; idx_c={};

for unitname in unit_list:
    fname = unitname+'_2022*nc'
    
    # Extract a list with the names of existing raw data files
    existing_files = glob.glob(cat_raw_path(fname))

    # Check whether there are any
    if len(existing_files) > 0:
        # Extract the end date from the filename
        existing_files = sorted(existing_files)
        latest_file = existing_files[-1]
        
        # Open the dataset
        data_ds = xr.open_dataset(latest_file)
        
        # Assign indices
        data_ds, idx_d[unitname], idx_c[unitname] = dive_index(data_ds)
        
        # Evaluate this string to save both outputs
        # But could instead use a dictionary..
        execstr = unitname+'=data_ds'
        exec(execstr)
        print('Running '+execstr)
        
        if 0:
            # Check whether a gridded file has already been created
            # Not yet implemented
            proc_files = glob.glob(cat_interim_path(fname))
            if not len(proc_files) > 0:
                print('No processed files for that glider')
        

        
        

Running unit_409=data_ds
Running unit_398=data_ds


In [81]:
data_ds

In [76]:
pres[i+1]

nan

In [2]:
# Initialise the figure directory
figdir = create_figdir()

font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 16}

plt.rc('font', **font)

In [3]:
# Add a time column, add a pressure_dbar column, and trim the start (pre-deployment)
# Converts to xarray before saving as a netcdf file in 02-interim
count = 0
# Process a pickle
for i in unit_list:
    count += 1
    infile = i+'_'+yyyymmdd+'.pkl'
    infile_with_path = cat_raw_path(infile)
    data_df = pd.read_pickle(infile_with_path)


    


In [8]:
# Load pickle for position and convert to xarray dataset.
count = 0
# Process a pickle
for i in unit_list:
    count += 1
    infile = i+'_positions_'+yyyymmdd+'.pkl'
    infile_with_path = cat_raw_path(infile)
    data_df = pd.read_pickle(infile_with_path)

    # Prepare to convert to xarray
    data_df2 = data_df
    data_df2["time"] = data_df["time"].astype('datetime64').dt.round('1s')
    data_df2["time_received"] = data_df["time"].astype('datetime64').dt.round('1s')
    data_df2 = data_df2.set_index("time")
    data_df2 = data_df2.drop(columns="source")
    ds_2021 = data_df2.to_xarray()

    # Set some attributes
    project_name = 'TERIFIC'
    institution_name = 'National Oceanography Centre, UK'
    maxtimestr = pd.to_datetime(ds_2021.time.values.max()).strftime(time_strf)

    # Create a dictionary of attributes
    attr_dict = {"Platform": "Slocum glider",
                 "End Time": maxtimestr,
                 "Project": project_name,
                 "Institution": institution_name,
                 "Date created": yyyymmdd, 
                 "Serial number": i,
                 
            }

    ds_2021.assign_attrs(attr_dict)


    # Save a netcdf file
    outfile = i+'_position_'+maxtimestr+'.nc'
    outfile_with_path = cat_interim_path(outfile)
    
    ds_2021.to_netcdf(outfile_with_path, 'w')
