In [1]:
import xarray as xr
import pandas as pd
import matplotlib.dates as mdates
import numpy as np
import ctd
import glob
import os

# **1. Read in the files**

Read the files, transform to a dataset, set some attributes, and save as a netcdf.

**Upcasts**

In [2]:
ctd_files = glob.glob('/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/step_09_split/uQUICCHE*.cnv')
dss = []
for i,file in enumerate(ctd_files):
    
    if file[-6:-4] == '-2':
        continue
    
    df = ctd.from_cnv(file)

    lat = df['latitude'].mean().round(4)
    lon = df['longitude'].mean().round(4)

    df['time'] = mdates.date2num(np.datetime64('2023-01-01')) + df['timeJ'] -1

    vars_to_keep = [
        'time',
        'CStarAt0',
        'CStarTr0',
        'c0S/m',
        'c1S/m',
        'flECO-AFL',
        'sbeox0ML/L',
        'sbeox0Mm/L',
        'oxsolML/L',
        'oxsatML/L',
        'sbeox0V',
        'par',
        't090C',
        't190C',
        'sal00',
        'sal11',
        'svCM',
        'density00',
        'density11',
        'altM'
    ]
    ds = df.to_xarray()[vars_to_keep]

    ds = ds.rename({
        'Pressure [dbar]':'pressure',
        'CStarAt0':'beam_attenuation',
        'CStarTr0':'beam_transmission',
        'c0S/m':'conductivity1',
        'c1S/m':'conductivity2',
        'flECO-AFL':'chlorophyll',
        'sbeox0ML/L':'oxygen_concentration_mll',
        'sbeox0Mm/L':'oxygen_concentration_mmm3',
        'oxsolML/L':'oxygen_saturation_GG',
        'oxsatML/L':'oxygen_saturation_W',
        'sbeox0V':'oxygen_raw',
        'par':'PAR',
        't090C':'temperature1',
        't190C':'temperature2',
        'sal00':'salinity1',
        'sal11':'salinity2',
        'svCM':'sound_velocity',
        'density00':'density1',
        'density11':'density2',
        'altM':'altimeter'
    })

        

    units = ['',
             'm$^{-1}$',
             '%',
             'S m$^{-1}$',
             'S m$^{-1}$',
             'kg m$^{-3}$',
             'ml l$^{-1}$',
             'mmol m$^{-3}$',
             'ml l$^{-1}$',
             'ml l$^{-1}$',
             'V',
             'micromol photons m$^{-2}$ s$^{-1}$',
             '°C',
             '°C',
             'PSU',
             'PSU',
             'm s$^{-1}$',
             'kg m$^{-3}$',
             'kg m$^{-3}$',
             'm'
    ]

    long_name = ['time',
     'Beam Attenuation, WET Labs C-Star',
     'Beam Transmission, WET Labs C-Star',
     'Conductivity 1',
     'Conductivity 2',
     'Fluorescence, WET Labs ECO-AFL/FL',
     'Oxygen concentration',
     'Oxygen concentration',
     'Oxygen saturation, Garcia & Gordon',
     'Oxygen saturation, Weiss',
     'Oxygen raw',
     'Photosynthetically Active Radiation',
     'Temperature 1',
     'Temperature 2',
     'Practical salinity 1',
     'Practical salinity 2',
     'Sound velocity, Chen-Millero',
     'Density 1',
     'Density 2',
     'Altimiter'
    ]
    std_name = ['time',
     'volume_beam_attenuation_coefficient_of_radiative_flux_in_sea_water',
     'volume_beam_transmission_coefficient_of_radiative_flux_in_sea_water',
     'sea_water_electrical_conductivity',
     'sea_water_electrical_conductivity',
     'volume_concentration_of_chlorophyll_a_in_sea_water',
     'mass_concentration_of_oxygen_in_sea_water',
     'mole_concentration_of_oxygen_in_sea_water',
     'mass_solubility_of_oxygen_in_sea_water',
     'mass_solubility_of_oxygen_in_sea_water',
     'oxygen_raw_voltage',
     'photosynthetically_active_radiation',
     'sea_water_temperature',
     'sea_water_temperature',
     'sea_water_practical_salinity',
     'sea_water_practical_salinity',
     'speed_of_sound_in_sea_water',
     'sea_water_density',
     'sea_water_density',
     'altimeter'
    ]

    for i,key in enumerate(list(ds.keys())):
        ds[key].attrs['units'] = units[i]
        ds[key].attrs['long_name'] = long_name[i]
        ds[key].attrs['standard_name'] = std_name[i]

    ds = ds.expand_dims({'station':[int(file[-6:-4])]})
    ds = ds.expand_dims({'cast':['up']})
    ds = ds.assign_coords(latitude = ('station',[lat]))
    ds = ds.assign_coords(longitude = ('station',[lon]))
    ds = ds.assign_coords(max_pressure = ('station',[ds.pressure.max().values]))
    ds = ds.interp(pressure=np.arange(0,5100,1))

    ds.to_netcdf(file[:-3] + 'nc')

**Downcasts**

In [3]:
ctd_files = glob.glob('/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/step_09_split/dQUICCHE*.cnv')
dss = []
for i,file in enumerate(ctd_files):

    if file[-6:-4] == '-2':
        continue
     
    df = ctd.from_cnv(file)

    lat = df['latitude'].mean().round(4)
    lon = df['longitude'].mean().round(4)

    df['time'] = mdates.date2num(np.datetime64('2023-01-01')) + df['timeJ'] -1

    vars_to_keep = [
        'time',
        'CStarAt0',
        'CStarTr0',
        'c0S/m',
        'c1S/m',
        'flECO-AFL',
        'sbeox0ML/L',
        'sbeox0Mm/L',
        'oxsolML/L',
        'oxsatML/L',
        'sbeox0V',
        'par',
        't090C',
        't190C',
        'sal00',
        'sal11',
        'svCM',
        'density00',
        'density11',
        'altM'
    ]
    ds = df.to_xarray()[vars_to_keep]

    ds = ds.rename({
        'Pressure [dbar]':'pressure',
        'CStarAt0':'beam_attenuation',
        'CStarTr0':'beam_transmission',
        'c0S/m':'conductivity1',
        'c1S/m':'conductivity2',
        'flECO-AFL':'chlorophyll',
        'sbeox0ML/L':'oxygen_concentration_mll',
        'sbeox0Mm/L':'oxygen_concentration_mmm3',
        'oxsolML/L':'oxygen_saturation_GG',
        'oxsatML/L':'oxygen_saturation_W',
        'sbeox0V':'oxygen_raw',
        'par':'PAR',
        't090C':'temperature1',
        't190C':'temperature2',
        'sal00':'salinity1',
        'sal11':'salinity2',
        'svCM':'sound_velocity',
        'density00':'density1',
        'density11':'density2',
        'altM':'altimeter'
    })

        

    units = ['',
             'm$^{-1}$',
             '%',
             'S m$^{-1}$',
             'S m$^{-1}$',
             'kg m$^{-3}$',
             'ml l$^{-1}$',
             'mmol m$^{-3}$',
             'ml l$^{-1}$',
             'ml l$^{-1}$',
             'V',
             'micromol photons m$^{-2}$ s$^{-1}$',
             '°C',
             '°C',
             'PSU',
             'PSU',
             'm s$^{-1}$',
             'kg m$^{-3}$',
             'kg m$^{-3}$',
             'm'
    ]

    long_name = ['time',
     'Beam Attenuation, WET Labs C-Star',
     'Beam Transmission, WET Labs C-Star',
     'Conductivity 1',
     'Conductivity 2',
     'Fluorescence, WET Labs ECO-AFL/FL',
     'Oxygen concentration',
     'Oxygen concentration',
     'Oxygen saturation, Garcia & Gordon',
     'Oxygen saturation, Weiss',
     'Oxygen raw',
     'Photosynthetically Active Radiation',
     'Temperature 1',
     'Temperature 2',
     'Practical salinity 1',
     'Practical salinity 2',
     'Sound velocity, Chen-Millero',
     'Density 1',
     'Density 2',
     'Altimiter'
    ]
    std_name = ['time',
     'volume_beam_attenuation_coefficient_of_radiative_flux_in_sea_water',
     'volume_beam_transmission_coefficient_of_radiative_flux_in_sea_water',
     'sea_water_electrical_conductivity',
     'sea_water_electrical_conductivity',
     'volume_concentration_of_chlorophyll_a_in_sea_water',
     'mass_concentration_of_oxygen_in_sea_water',
     'mole_concentration_of_oxygen_in_sea_water',
     'mass_solubility_of_oxygen_in_sea_water',
     'mass_solubility_of_oxygen_in_sea_water',
     'oxygen_raw_voltage',
     'photosynthetically_active_radiation',
     'sea_water_temperature',
     'sea_water_temperature',
     'sea_water_practical_salinity',
     'sea_water_practical_salinity',
     'speed_of_sound_in_sea_water',
     'sea_water_density',
     'sea_water_density',
     'altimeter'
    ]

    for i,key in enumerate(list(ds.keys())):
        ds[key].attrs['units'] = units[i]
        ds[key].attrs['long_name'] = long_name[i]
        ds[key].attrs['standard_name'] = std_name[i]

    ds = ds.expand_dims({'station':[int(file[-6:-4])]})
    ds = ds.expand_dims({'cast':['down']})
    ds = ds.assign_coords(latitude = ('station',[lat]))
    ds = ds.assign_coords(longitude = ('station',[lon]))
    ds = ds.assign_coords(max_pressure = ('station',[ds.pressure.max().values]))
    ds = ds.interp(pressure=np.arange(0,5100,1))

    ds.to_netcdf(file[:-3] + 'nc')

**Cast 16**

The upcast for cast 16 was interrupted due to some issues with firing the bottles. It was stopped and restarted around 2000 m depth. It was saved as 16-2.cnv, and we need to fix it. Therefore we do the same as before, but specify the station.

In [4]:
ctd_files = glob.glob('/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/step_09_split/uQUICCHE*.cnv')
dss = []
for i,file in enumerate(ctd_files):
    
    if file[-6:-4] == '-2':
    
        df = ctd.from_cnv(file)

        lat = df['latitude'].mean().round(4)
        lon = df['longitude'].mean().round(4)

        df['time'] = mdates.date2num(np.datetime64('2023-01-01')) + df['timeJ'] -1

        vars_to_keep = [
            'time',
            'CStarAt0',
            'CStarTr0',
            'c0S/m',
            'c1S/m',
            'flECO-AFL',
            'sbeox0ML/L',
            'sbeox0Mm/L',
            'oxsolML/L',
            'oxsatML/L',
            'sbeox0V',
            'par',
            't090C',
            't190C',
            'sal00',
            'sal11',
            'svCM',
            'density00',
            'density11',
            'altM'
        ]
        ds = df.to_xarray()[vars_to_keep]

        ds = ds.rename({
            'Pressure [dbar]':'pressure',
            'CStarAt0':'beam_attenuation',
            'CStarTr0':'beam_transmission',
            'c0S/m':'conductivity1',
            'c1S/m':'conductivity2',
            'flECO-AFL':'chlorophyll',
            'sbeox0ML/L':'oxygen_concentration_mll',
            'sbeox0Mm/L':'oxygen_concentration_mmm3',
            'oxsolML/L':'oxygen_saturation_GG',
            'oxsatML/L':'oxygen_saturation_W',
            'sbeox0V':'oxygen_raw',
            'par':'PAR',
            't090C':'temperature1',
            't190C':'temperature2',
            'sal00':'salinity1',
            'sal11':'salinity2',
            'svCM':'sound_velocity',
            'density00':'density1',
            'density11':'density2',
            'altM':'altimeter'
        })



        units = ['',
                 'm$^{-1}$',
                 '%',
                 'S m$^{-1}$',
                 'S m$^{-1}$',
                 'kg m$^{-3}$',
                 'ml l$^{-1}$',
                 'mmol m$^{-3}$',
                 'ml l$^{-1}$',
                 'ml l$^{-1}$',
                 'V',
                 'micromol photons m$^{-2}$ s$^{-1}$',
                 '°C',
                 '°C',
                 'PSU',
                 'PSU',
                 'm s$^{-1}$',
                 'kg m$^{-3}$',
                 'kg m$^{-3}$',
                 'm'
        ]

        long_name = ['time',
         'Beam Attenuation, WET Labs C-Star',
         'Beam Transmission, WET Labs C-Star',
         'Conductivity 1',
         'Conductivity 2',
         'Fluorescence, WET Labs ECO-AFL/FL',
         'Oxygen concentration',
         'Oxygen concentration',
         'Oxygen saturation, Garcia & Gordon',
         'Oxygen saturation, Weiss',
         'Oxygen raw',
         'Photosynthetically Active Radiation',
         'Temperature 1',
         'Temperature 2',
         'Practical salinity 1',
         'Practical salinity 2',
         'Sound velocity, Chen-Millero',
         'Density 1',
         'Density 2',
         'Altimiter'
        ]
        std_name = ['time',
         'volume_beam_attenuation_coefficient_of_radiative_flux_in_sea_water',
         'volume_beam_transmission_coefficient_of_radiative_flux_in_sea_water',
         'sea_water_electrical_conductivity',
         'sea_water_electrical_conductivity',
         'volume_concentration_of_chlorophyll_a_in_sea_water',
         'mass_concentration_of_oxygen_in_sea_water',
         'mole_concentration_of_oxygen_in_sea_water',
         'mass_solubility_of_oxygen_in_sea_water',
         'mass_solubility_of_oxygen_in_sea_water',
         'oxygen_raw_voltage',
         'photosynthetically_active_radiation',
         'sea_water_temperature',
         'sea_water_temperature',
         'sea_water_practical_salinity',
         'sea_water_practical_salinity',
         'speed_of_sound_in_sea_water',
         'sea_water_density',
         'sea_water_density',
         'altimeter'
        ]

        for i,key in enumerate(list(ds.keys())):
            ds[key].attrs['units'] = units[i]
            ds[key].attrs['long_name'] = long_name[i]
            ds[key].attrs['standard_name'] = std_name[i]

        ds = ds.expand_dims({'station':[16]})
        ds = ds.expand_dims({'cast':['up']})
        ds = ds.assign_coords(latitude = ('station',[lat]))
        ds = ds.assign_coords(longitude = ('station',[lon]))
        ds = ds.assign_coords(max_pressure = ('station',[ds.pressure.max().values]))
        ds = ds.interp(pressure=np.arange(0,5100,1))
        
        ds.to_netcdf(file[:-3] + 'nc')

Now we need to open the correct datasets, to be able to concatinate them.

In [5]:
ds1 = xr.open_dataset('/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/step_09_split/uQUICCHE16-2.nc')
ds2 = xr.open_dataset('/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/step_09_split/uQUICCHE16.nc')
ds = xr.concat([ds1.sel(pressure=slice(0,1.828e+03)).drop(['max_pressure','latitude','longitude']),ds2.sel(pressure=slice(1.828e+03+1,5100))],dim='pressure')

At this point, we need to remove the uQUICCHE16.nc file to be able to save the new one

In [6]:
os.remove("/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/step_09_split/uQUICCHE16.nc")
os.remove("/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/step_09_split/uQUICCHE16-2.nc")
ds.to_netcdf('/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/step_09_split/uQUICCHE16.nc')

We have now saved the full upcast for 16 as one file, and we can continue.

# **2. Read the full dataset and save to a netCDF**

In [7]:
ds = xr.open_mfdataset('/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/step_09_split/*QUICCHE*.nc').load()

Adding some metadata

In [8]:
# Add cruise parameters
ds.attrs['name'] = 'QUICCHE CTD'
ds.attrs['description'] = 'CTD casts taken during the 2023 QUICCHE cruise in the Cape Cauldron'
ds.attrs['project'] = 'QUICCHE'
ds.attrs['project_url'] = 'https://beal-agulhas.earth.miami.edu/research/projects/quicche/index.html'
ds.attrs['creator_email'] = 'johan.edholm@gu.se'
ds.attrs['creator_name'] = 'Johan Edholm'

# Add pressure attributes
ds['pressure'].attrs['units'] = 'dbar'
ds['pressure'].attrs['long_name'] = 'Pressure'
ds['pressure'].attrs['standard_name'] = 'sea_water_pressure'

# Add processing parameters
ds.attrs['datcnv_date'] = 'Apr 10 2023'
ds.attrs['datcnv_vars'] = '22'
ds.attrs['datcnv_skipover'] = '0'
ds.attrs['datcnv_ox_hysteresis_correction'] = 'yes'
ds.attrs['datcnv_ox_tau_correction'] = 'yes'
ds.attrs['wildedit_pass1_nstd'] = '2.0'
ds.attrs['wildedit_pass2_nstd'] = '10.0'
ds.attrs['wildedit_pass2_mindelta'] = '0.000e+000'
ds.attrs['wildedit_npoint'] = '100'
ds.attrs['wildedit_vars'] = 'prDM t090C t190C t068C t168C c0S/m c1S/m flECO-AFL'
ds.attrs['wildedit_excl_bad_scans'] = 'yes'
ds.attrs['celltm_alpha'] = '0.0300, 0.0300'
ds.attrs['celltm_tau'] = '7.0000, 7.0000'
ds.attrs['celltm_temp_sensor_use_for_cond'] = 'primary, secondary'
ds.attrs['filter_low_pass_tc_A'] = '0.030'
ds.attrs['filter_low_pass_tc_B'] = '0.150'
ds.attrs['filter_low_pass_A_vars'] = 'prDM sbeox0V sbeox0Mm/L sbeox0ML/L oxsolML/L oxsatML/L flECO-AFL'
ds.attrs['filter_low_pass_B_vars'] = 'None'
ds.attrs['alignctd_adv'] = 'sbeox0V 3.000, sbeox0Mm/L 3.000, sbeox0ML/L 3.000, oxsolML/L 3.000, oxsatML/L 3.000'
ds.attrs['loopedit_minVelocity'] = '0.250'
ds.attrs['loopedit_surfaceSoak']: 'minDepth = 5.0, maxDepth = 20, useDeckPress = 1'
ds.attrs['loopedit_excl_bad_scans'] = 'yes'
ds.attrs['binavg_bintype'] = 'decibars'
ds.attrs['binavg_binsize'] = '1'
ds.attrs['binavg_excl_bad_scans'] = 'yes'
ds.attrs['binavg_skipover'] = '0'
ds.attrs['binavg_omit'] = '0'
ds.attrs['binavg_min_scans_bin'] = '1'
ds.attrs['binavg_max_scans_bin'] = '2147483647'
ds.attrs['binavg_surface_bin'] = 'no, min = 0.000, max = 0.000, value = 0.000'
ds.attrs['processing_date'] = 'Apr 11 2023'
ds.attrs['split_excl_bad_scans'] = 'yes'
ds.attrs['file_type'] = 'ascii'

ds.to_netcdf('/Users/xedhjo/Documents/Projects/QUICCHE/data/CTD/QUICCHE_CTDs.nc')