# Load RSK data

**Aim:** To see a different platform in action, collecting CTD (and other data). The example dataset chosen here also has some "issues", so you can also see whether you can find a way to plot the data to display the issue.

<!--**Learning outcomes:** Using what you've learned so far, you will apply this knowledge to a new dataset that you haven't previously encountered.  You will be able to apply your knowledge of sensor response, ocean-knowing-measurements, and python to articulate (and quantify) a possible issue with a dataset.  You will also see an example of how data can be loaded from the internet directly into python.-->

**Data:** You will work with data from [Voice of the Ocean (VOTO)](https://voiceoftheocean.org) collected in the Baltic Sea.  

**Directions:** Run the python code step by step, and use it to download some data, make some starter plots, and then add some additional plots at the end yourself.

<!--**Measure of success:** You will have created a python notebook, a netCDF file (*not* added to the git repository) and the 5 figures exported as `*.png` format into your folder; these will be "commited" and "pushed" to the shared repository, and viewable by everyone.  Additionally, copy your 2 best figures to the `shared_figures/` folder for discussion.  -->

<hr>

In [3]:
import matplotlib.dates as mdates
from matplotlib import style
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import datetime
import xarray as xr
from cmocean import cm as cmo
from pyrsktools import RSK
xr.show_versions()


INSTALLED VERSIONS
------------------
commit: None
python: 3.8.18 | packaged by conda-forge | (default, Oct 10 2023, 15:46:56) 
[Clang 16.0.6 ]
python-bits: 64
OS: Darwin
OS-release: 23.5.0
machine: arm64
processor: arm
byteorder: little
LC_ALL: None
LANG: en_GB.UTF-8
LOCALE: ('en_GB', 'UTF-8')
libhdf5: 1.14.0
libnetcdf: 4.9.2

xarray: 2023.1.0
pandas: 1.5.3
numpy: 1.23.5
scipy: 1.10.1
netCDF4: 1.6.4
pydap: None
h5netcdf: None
h5py: None
Nio: None
zarr: None
cftime: 1.6.3
nc_time_axis: None
PseudoNetCDF: None
rasterio: 1.3.7
cfgrib: None
iris: None
bottleneck: 1.3.8
dask: 2023.5.0
distributed: 2023.5.0
matplotlib: 3.7.3
cartopy: 0.21.1
seaborn: None
numbagg: None
fsspec: 2023.10.0
cupy: None
pint: None
sparse: None
flox: None
numpy_groupies: None
setuptools: 69.2.0
pip: 24.0
conda: 24.3.0
pytest: None
mypy: None
IPython: 8.12.2
sphinx: 5.0.2




In [5]:
# RSK file
datapath   = '/Users/eddifying/Dropbox/Public/uhh-teaching/SeagoingOceanography/rbr-mc-wb2/newset/'
rbr_file = datapath + "/moored_record/203219_20230226_2004.rsk" #203219_20230304_2318.rsk
use_file = datapath + "/moored_record/wb2_16_2020_5768.use"

ctd_path = datapath +"ctd_casts/"


rbr_cal_file = datapath + "/caldip_cast/203219_20230304_2318.rsk"
rbr_cal_txtfile = datapath + "/caldip_cast/cast41_203219.txt"

units = {"temperature":"[°C]", "conductivity":"[mS/cm]", "pressure":"[dbar]", "CT":"[°C]", "SA":""}
names = {"temperature":"Temperature", "conductivity":"Conductivity", "pressure":"Pressure", "CT":"Conservative Temperature", "SA":"Absolute Salinity"}

In [6]:
def create_ctd_cast_time(ds):
    """
    Creates a new 'datetime' variable in a CTD cast dataset (ds) based on 'data_time_origin' and 'time' variables.

    Args:
        ds (xarray.Dataset): The CTD cast dataset. Must contain 'data_time_origin' and 'time' variables.

    Returns:
        xarray.Dataset: The modified CTD cast dataset with a new 'datetime' variable.
    """

    start_time = ds.attrs["data_time_origin"].astype('int32')
    start_time_str = f"{start_time[0]}-{start_time[1]}-{start_time[2]} {start_time[3]}:{start_time[4]}:{start_time[5]}"
    print("data_time_origin:", start_time_str)
    time = []
    start_datetime = pd.to_datetime(start_time_str, format='%Y-%m-%d %H:%M:%S')
    for i in range(len(ds.time.values[:])):
        time.append(start_datetime + pd.Timedelta(ds.time.values[i,0]))

    ds["datetime"] = ("nrows1", time)

    return ds

In [20]:
# Load ship data
cruise = "en697"#"dy129"
cast = "041"

ds = xr.open_dataset(ctd_path + f"ctd_{cruise}_{cast}_24hz.nc")
ds = create_ctd_cast_time(ds.copy())

ds = ds.set_coords('datetime')

# Save to netcdf
ds.to_netcdf(f"ctd_{cruise}_{cast}_24hz.nc")

data_time_origin: 2023-3-4 20:53:15


In [31]:
# Load RBR data
df_rbr_cal = pd.read_csv(rbr_cal_txtfile, header=None, names=["year", "month", "day", "hour", "temperature", "conductivity", "pressure"], skiprows=10, delim_whitespace=True)
df_rbr_cal['time'] = pd.to_datetime(df_rbr_cal[['year', 'month', 'day', 'hour']])
df_rbr_cal = df_rbr_cal.set_index('time')
df_rbr_cal = df_rbr_cal.drop(['year', 'month', 'day', 'hour'], axis=1)

ds_rbr_cal = df_rbr_cal.to_xarray()

# Save to netcdf
ds_rbr_cal.to_netcdf('cast41_203219_RBR.nc')

In [36]:
# Load deployed RBR
#rsk = RSK(rbr_cal_file) # rbr_file rbr_cali_file
#rsk.open()

rsk = RSK(rbr_file)
rsk.open()

#rsk.dbInfo
rsk.readdata()
rsk.deriveseapressure()
rsk.derivesalinity()
rsk.deriveSA()
rsk.derivetheta()
#rsk.derivetemperature()

rsk.computeprofiles()
rsk.printchannels()

rsk.RSK2CSV(channels = ["pressure", "sea_pressure", "temperature", "potential_temperature", "conductivity", "salinity", "absolute_salinity"], profiles=range(0,1))
# Convert to pandas
col_names = ["time", "pressure", "sea_pressure", "temperature", "potential_temperature", "conductivity", "salinity","absolute_salinity", "cast_direction"]

rbr_raw = pd.read_csv("203219_20230226_2004_profile0.csv", names=col_names, skiprows=12) #./data/

rbr_raw["time"] = pd.to_datetime(rbr_raw["time"])
rbr_raw = rbr_raw.set_index("time")
rbr_raw = rbr_raw.drop(columns=["cast_direction", "potential_temperature", "salinity", "absolute_salinity","sea_pressure"], axis=1)

# Convert to xarray
ds_rbr_raw = rbr_raw.to_xarray()
# Save to netcdf
ds_rbr_raw.to_netcdf('rbr_203219_20230226_2004.nc')

Ruskin profile and cast annotations will be deleted as they might conflict with the new profiles detected


Model:           RBRconcerto³
Serial ID:       203219
Sampling period: 60.0 second
Channels:        index                 name                  unit
                 _____     ____________________________    ________
                 0         conductivity                    mS/cm
                 1         temperature                     °C
                 2         pressure                        dbar
                 3         sea_pressure                    dbar
                 4         salinity                        PSU
                 5         absolute_salinity               g/kg
                 6         potential_temperature           °C
Wrote: ./203219_20230226_2004_profile0.csv


In [49]:
# Load microCAT data
with open(use_file, "r") as f:
  # Read the entire content into a string
  data_string = f.read().splitlines()[:11]

data_string

use_data = np.loadtxt(use_file, skiprows=12)
use_data[:,0]

vars = ["year", "month", "day", "hour", "temperature", "conductivity", "pressure"]
df_dict = {}
for i, var in enumerate(vars):
    df_dict[var] = use_data[:,i]

cat = pd.DataFrame(df_dict)
cat
cat["time"] = pd.to_datetime(cat[['year', 'month', 'day', "hour"]])
cat = cat.set_index('time')
cat = cat.drop(columns=["year", "month", "day", "hour"])

# Convert to xarray
ds_cat = cat.to_xarray()

# Save to netcdf
ds_cat.to_netcdf('mcat_wb2_16_2020_5768.nc')