In [1]:
import xarray as xr
import pandas as pd
import matplotlib.dates as mdates
import numpy as np
import ctd
import glob

# **1. Read in the files**

Read the files, transform to a dataset, set some attributes, and save as a netcdf.

**Upcasts**

In [2]:
ctd_files = glob.glob('/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/stations1-14/uQUICCHE*.cnv')
dss = []
for i,file in enumerate(ctd_files):
    df = ctd.from_cnv(file)

    lat = df['latitude'].mean().round(4)
    lon = df['longitude'].mean().round(4)

    df['time'] = mdates.date2num(np.datetime64('2023-01-01')) + df['timeJ'] -1

#    bottom_time = np.datetime64(mdates.num2date(mdates.date2num(np.datetime64('2023-01-01')) + df['timeJ'].iloc[0] - 1),'s')
#    end_surface_time = np.datetime64(mdates.num2date(mdates.date2num(np.datetime64('2023-01-01')) + df['timeJ'].iloc[-1] - 1),'s')
    
    vars_to_keep = [
        'time',
     'CStarAt0',
     'CStarTr0',
     'c0mS/cm',
     'c1mS/cm',
     'flECO-AFL',
     'sbeox0ML/L',
     'sbeox0PS',
     'sbeox0V',
     'par',
     'potemp090C',
     'potemp190C',
     't090C',
     't190C',
     'sal00',
     'sal11',
     'svCM',
     'density00',
     'density11',
    ]
    ds = df.to_xarray()[vars_to_keep]

    ds = ds.rename({
        'Pressure [dbar]':'pressure',
        'CStarAt0':'beam_attenuation',
        'CStarTr0':'beam_transmission',
        'c0mS/cm':'conductivity1',
        'c1mS/cm':'conductivity2',
        'flECO-AFL':'chlorophyll',
        'sbeox0ML/L':'oxygen_concentration',
        'sbeox0PS':'oxygen_saturation',
        'sbeox0V':'oxygen_raw',
        'par':'PAR',
        'potemp090C':'potential_temperature1',
        'potemp190C':'potential_temperature2',
        't090C':'temperature1',
        't190C':'temperature2',
        'sal00':'salinity1',
        'sal11':'salinity2',
        'svCM':'sound_velocity',
        'density00':'density1',
        'density11':'density2',
    })

    units = ['',
     'm$^{-1}$',
     '%',
     'S m$^{-1}$',
     'S m$^{-1}$',
     'kg m$^{-3}$',
     'ml l$^{-1}$',
     '%',
     'V',
     'micromol photons m$^{-2}$ s$^{-1}$',
     '°C',
     '°C',
     '°C',
     '°C',
     'PSU',
     'PSU',
     'm s$^{-1}$',
     'kg m$^{-3}$',
     'kg m$^{-3}$' 
    ]

    long_name = ['time',
     'Beam Attenuation, WET Labs C-Star',
     'Beam Transmission, WET Labs C-Star',
     'Conductivity 1',
     'Conductivity 2',
     'Fluorescence, WET Labs ECO-AFL/FL',
     'Oxygen concentration',
     'Oxygen saturation',
     'Oxygen raw',
     'Photosynthetically Active Radiation',
     'Potential temperature 1',
     'Potential temperature 2',
     'Temperature 1',
     'Temperature 2',
     'Practical salinity 1',
     'Practical salinity 2',
     'Sound velocity, Chen-Millero',
     'Density 1',
     'Density 2'
    ]
    std_name = ['time',
     'volume_beam_attenuation_coefficient_of_radiative_flux_in_sea_water',
     'volume_beam_transmission_coefficient_of_radiative_flux_in_sea_water',
     'sea_water_electrical_conductivity',
     'sea_water_electrical_conductivity',
     'volume_concentration_of_chlorophyll_a_in_sea_water',
     'mass_concentration_of_oxygen_in_sea_water',
     'fractional_saturation_of_oxygen_in_sea_water',
     'oxygen_raw_voltage',
     'photosynthetically_active_radiation',
     'sea_water_potential_temperature',
     'sea_water_potential_temperature',
     'sea_water_temperature',
     'sea_water_temperature',
     'sea_water_practical_salinity',
     'sea_water_practical_salinity',
     'speed_of_sound_in_sea_water',
     'sea_water_density',
     'sea_water_density'
    ]

    for i,key in enumerate(list(ds.keys())):
        ds[key].attrs['units'] = units[i]
        ds[key].attrs['long_name'] = long_name[i]
        ds[key].attrs['standard_name'] = std_name[i]

    ds = ds.expand_dims({'station':[int(file[-6:-4])]})
    ds = ds.expand_dims({'cast':['up']})
    ds = ds.assign_coords(latitude = ('station',[lat]))
    ds = ds.assign_coords(longitude = ('station',[lon]))
    #ds = ds.assign_coords(time = (('station','cast'),[[time]]))
    #ds['time'] = ds['time'].astype('datetime64[s]')
    ds = ds.assign_coords(max_pressure = ('station',[ds.pressure.max().values]))
    ds = ds.interp(pressure=np.arange(0,5000,1))
    df['time'] = mdates.num2date(df['time'])
    df['time'] = df['time'].dt.tz_localize(None).astype('datetime64[s]')
    #ds.attrs['bottom_time'] = f"{bottom_time}"
    #ds.attrs['end_surface_time'] = f"{end_surface_time}"
    ds.to_netcdf(file[:-3] + 'nc')

**Downcasts**

In [2]:
ctd_files = glob.glob('/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/new_stab/step_09_split/dQUICCHE*.cnv')
dss = []
for i,file in enumerate(ctd_files):
    df = ctd.from_cnv(file)

    lat = df['latitude'].mean().round(4)
    lon = df['longitude'].mean().round(4)

    df['time'] = mdates.date2num(np.datetime64('2023-01-01')) + df['timeJ'] -1

    vars_to_keep = [
     'time',
     'CStarAt0',
     'CStarTr0',
     'c0S/m',
     'c1S/m',
     'flECO-AFL',
     'sbeox0ML/L',
     'oxsolML/L',
     'sbeox0V',
     'par',
     't090C',
     't190C',
     'sal00',
     'sal11',
     'svCM',
     'density00',
     'density11',
    ]
    ds = df.to_xarray()[vars_to_keep]

    ds = ds.rename({
        'Pressure [dbar]':'pressure',
        'CStarAt0':'beam_attenuation',
        'CStarTr0':'beam_transmission',
        'c0S/m':'conductivity1',
        'c1S/m':'conductivity2',
        'flECO-AFL':'chlorophyll',
        'sbeox0ML/L':'oxygen_concentration',
        'oxsolML/L':'oxygen_solubility',
        'sbeox0V':'oxygen_raw',
        'par':'PAR',
        't090C':'temperature1',
        't190C':'temperature2',
        'sal00':'salinity1',
        'sal11':'salinity2',
        'svCM':'sound_velocity',
        'density00':'density1',
        'density11':'density2',
    })

    units = ['',
     'm$^{-1}$',
     '%',
     'S m$^{-1}$',
     'S m$^{-1}$',
     'kg m$^{-3}$',
     'ml l$^{-1}$',
     'ml l$^{-1}$',
     'V',
     'micromol photons m$^{-2}$ s$^{-1}$',
     '°C',
     '°C',
     'PSU',
     'PSU',
     'm s$^{-1}$',
     'kg m$^{-3}$',
     'kg m$^{-3}$' 
    ]

    long_name = ['time',
     'Beam Attenuation, WET Labs C-Star',
     'Beam Transmission, WET Labs C-Star',
     'Conductivity 1',
     'Conductivity 2',
     'Fluorescence, WET Labs ECO-AFL/FL',
     'Oxygen concentration',
     'Oxygen solubility',
     'Oxygen raw',
     'Photosynthetically Active Radiation',
     'Temperature 1',
     'Temperature 2',
     'Practical salinity 1',
     'Practical salinity 2',
     'Sound velocity, Chen-Millero',
     'Density 1',
     'Density 2'
    ]
    std_name = ['time',
     'volume_beam_attenuation_coefficient_of_radiative_flux_in_sea_water',
     'volume_beam_transmission_coefficient_of_radiative_flux_in_sea_water',
     'sea_water_electrical_conductivity',
     'sea_water_electrical_conductivity',
     'volume_concentration_of_chlorophyll_a_in_sea_water',
     'mass_concentration_of_oxygen_in_sea_water',
     'mass_solubility_of_oxygen_in_sea_water',
     'oxygen_raw_voltage',
     'photosynthetically_active_radiation',
     'sea_water_temperature',
     'sea_water_temperature',
     'sea_water_practical_salinity',
     'sea_water_practical_salinity',
     'speed_of_sound_in_sea_water',
     'sea_water_density',
     'sea_water_density'
    ]

    for i,key in enumerate(list(ds.keys())):
        ds[key].attrs['units'] = units[i]
        ds[key].attrs['long_name'] = long_name[i]
        ds[key].attrs['standard_name'] = std_name[i]

    ds = ds.expand_dims({'station':[int(file[-6:-4])]})
    ds = ds.expand_dims({'cast':['down']})
    ds = ds.assign_coords(latitude = ('station',[lat]))
    ds = ds.assign_coords(longitude = ('station',[lon]))
    #ds = ds.assign_coords(time = ('station',[time]))
    ds = ds.assign_coords(max_pressure = ('station',[ds.pressure.max().values]))
    ds = ds.interp(pressure=np.arange(0,5100,1))
    df['time'] = mdates.num2date(df['time'])
    df['time'] = df['time'].dt.tz_localize(None).astype('datetime64[s]')

    ds.to_netcdf(file[:-3] + 'nc')

# **2. Read the full dataset and save to a netCDF**

In [4]:
ds = xr.open_mfdataset('/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/new_stab/step_09_split/dQUICCHE*.nc')

Adding some metadata

In [6]:
# Add cruise parameters
ds.attrs['name'] = 'QUICCHE CTD'
ds.attrs['description'] = 'CTD casts taken during the 2023 QUICCHE cruise in the Cape Cauldron'
ds.attrs['project'] = 'QUICCHE'
ds.attrs['project_url'] = 'https://beal-agulhas.earth.miami.edu/research/projects/quicche/index.html'
ds.attrs['creator_email'] = 'johan.edholm@gu.se'
ds.attrs['creator_name'] = 'Johan Edholm'

# Add pressure attributes
ds['pressure'].attrs['units'] = 'dbar'
ds['pressure'].attrs['long_name'] = 'Pressure'
ds['pressure'].attrs['standard_name'] = 'sea_water_pressure'

# Add processing parameters
ds.attrs['datcnv_date'] = 'Mar 23 2023 18:25:12'
ds.attrs['datcnv_vars'] = '19'
ds.attrs['datcnv_skipover'] = '0'
ds.attrs['datcnv_ox_hysteresis_correction'] = 'yes'
ds.attrs['datcnv_ox_tau_correction'] = 'yes'
ds.attrs['wildedit_pass1_nstd'] = '2.0'
ds.attrs['wildedit_pass2_nstd'] = '20.0'
ds.attrs['wildedit_pass2_mindelta'] = '0.000e+000'
ds.attrs['wildedit_npoint'] = '100'
ds.attrs['wildedit_vars'] = 'prDM t090C t190C t068C t168C c0S/m c1S/m flECO-AFL'
ds.attrs['wildedit_excl_bad_scans'] = 'yes'
ds.attrs['celltm_alpha'] = '0.0300, 0.0300'
ds.attrs['celltm_tau'] = '7.0000, 7.0000'
ds.attrs['celltm_temp_sensor_use_for_cond'] = 'primary, primary'
ds.attrs['filter_low_pass_tc_A'] = '0.030'
ds.attrs['filter_low_pass_tc_B'] = '0.150'
ds.attrs['filter_low_pass_A_vars'] = 'prDM sbeox0V flECO-AFL'
ds.attrs['filter_low_pass_B_vars'] = 'None'
ds.attrs['alignctd_adv'] = 'sbeox0V 3.000, sbeox0ML/L 3.000, oxsolML/L 3.000                                                                                                                                                                    '
ds.attrs['loopedit_minVelocity'] = '0.250                                                                                            '
ds.attrs['loopedit_surfaceSoak']: 'do not remove                                                                                     '
ds.attrs['loopedit_excl_bad_scans'] = 'yes'
ds.attrs['binavg_bintype'] = 'decibars'
ds.attrs['binavg_binsize'] = '1'
ds.attrs['binavg_excl_bad_scans'] = 'yes'
ds.attrs['binavg_skipover'] = '0'
ds.attrs['binavg_omit'] = '0'
ds.attrs['binavg_min_scans_bin'] = '1'
ds.attrs['binavg_max_scans_bin'] = '2147483647'
ds.attrs['binavg_surface_bin'] = 'no, min = 0.000, max = 0.000, value = 0.000'
ds.attrs['processing_date'] = 'Mar 24 2023'
ds.attrs['split_excl_bad_scans'] = 'yes'
ds.attrs['file_type'] = 'ascii'

ds=ds.sel(station=slice(1,22))
ds.to_netcdf('data/dQUICCHE.nc')