In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime 
from setdir import *

In [2]:
# Slocum gliders: Serial numbers
unit_list = ['unit_398', 'unit_409']

# Formatting string for date/time to use in filenaming
time_strf = '%Y%m%d'

# Name the file according to what date?
# Currently implemented: Date file was created
# Alternate option: end date of data record
yyyymmdd = datetime.datetime.now().strftime(time_strf)

# Time limits
datestart = '2021-12-12'
#tstart = time.mktime(datetime.datetime.strptime(datestart,'%Y-%m-%d').timetuple())
tstart = pd.Timestamp('2021-12-12T00')

# Initialise the figure directory
figdir = create_figdir()

font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 16}

plt.rc('font', **font)

In [3]:
# Add a time column, add a pressure_dbar column, and trim the start (pre-deployment)
# Converts to xarray before saving as a netcdf file in 02-interim
count = 0
# Process a pickle
for i in unit_list:
    count += 1
    infile = i+'_'+yyyymmdd+'.pkl'
    infile_with_path = cat_raw_path(infile)
    data_df = pd.read_pickle(infile_with_path)

    data_df['time'] = data_df.timestamp.apply(lambda x:
                                    datetime.datetime.fromtimestamp(x*0.001))
    data_df = data_df.drop(columns='timestamp')
    # Cut data to post deployment
    data_df_2021 = data_df[data_df.time>=tstart].copy()
    
    # Change pressure from bars to dbars
    data_df_2021['pressure_dbar'] =  data_df_2021.sci_water_pressure * 10
    
    # Remove negative salinities
    df1 = data_df_2021['derived_salinity']
    df2 = df1.where(df1>0)
    data_df_2021['derived_salinity'] = df2
        
    # Prepare to convert to xarray
    data_df2 = data_df_2021
    data_df2 = data_df2.set_index("time")
    data_df2 = data_df2.drop(columns="sci_water_pressure")
    
    # Convert to xarray
    ds_2021 = data_df2.to_xarray()

    # Set some attributes
    project_name = 'TERIFIC'
    institution_name = 'National Oceanography Centre, UK'
    maxtimestr = pd.to_datetime(ds_2021.time.values.max()).strftime(time_strf)

    # Create a dictionary of attributes
    attr_dict = {"Platform": "Slocum glider",
                 "End Time": maxtimestr,
                 "Project": project_name,
                 "Institution": institution_name,
                 "Date created": yyyymmdd, 
                 "Serial number": i,
                 
            }

    ds_2021.assign_attrs(attr_dict)

    # Save a netcdf file
    outfile = i+'_'+maxtimestr+'_edit.nc'
    outfile_with_path = cat_interim_path(outfile)
    
    ds_2021.to_netcdf(outfile_with_path)

    


FileNotFoundError: [Errno 2] No such file or directory: '../01-data/01-raw/unit_398_20220124.pkl'

In [8]:
# Load pickle for position and convert to xarray dataset.
count = 0
# Process a pickle
for i in unit_list:
    count += 1
    infile = i+'_positions_'+yyyymmdd+'.pkl'
    infile_with_path = cat_raw_path(infile)
    data_df = pd.read_pickle(infile_with_path)

    # Prepare to convert to xarray
    data_df2 = data_df
    data_df2["time"] = data_df["time"].astype('datetime64').dt.round('1s')
    data_df2["time_received"] = data_df["time"].astype('datetime64').dt.round('1s')
    data_df2 = data_df2.set_index("time")
    data_df2 = data_df2.drop(columns="source")
    ds_2021 = data_df2.to_xarray()

    # Set some attributes
    project_name = 'TERIFIC'
    institution_name = 'National Oceanography Centre, UK'
    maxtimestr = pd.to_datetime(ds_2021.time.values.max()).strftime(time_strf)

    # Create a dictionary of attributes
    attr_dict = {"Platform": "Slocum glider",
                 "End Time": maxtimestr,
                 "Project": project_name,
                 "Institution": institution_name,
                 "Date created": yyyymmdd, 
                 "Serial number": i,
                 
            }

    ds_2021.assign_attrs(attr_dict)


    # Save a netcdf file
    outfile = i+'_position_'+maxtimestr+'.nc'
    outfile_with_path = cat_interim_path(outfile)
    
    ds_2021.to_netcdf(outfile_with_path, 'w')
