# Characterize CTD Rosette-Mounted LISST Data

Load data from Raw Data Archive Cruise Data for Pioneer MAB cruises:
 * <a>https://rawdata.oceanobservatories.org/files/cruise_data/Pioneer-MAB/Pioneer-21_AR87_2025-03-28/LISST/</a>

Separate upcast from downcast data. Compare optical transmission. Also include dashed lines around LISST suspect span (per Sequoia manual). Compare profiles of optical transmission and total volume concentration between casts across distance and time.

## Import libraries

In [5]:
# Import libraries used in this notebook
import os
import re
import glob
import urllib.request as request

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import cmocean as cmo

## Load data

Including parsing.

In [2]:
# Load LISST CSV column names from json containing column headers
headers = pd.read_json("./inst_headers/lisst_hdr.json", typ='series', orient='records')
csvhdr = headers.iloc[0]
# aschdr = headers.iloc[1] # CTD cast data headers for .asc files

In [None]:
# Define functions
def load_live_data(testname, csvhdr, testnum=1):
    # Define paths to data
    lisst_path = "C:/Users/kylene.cooley/Documents/prtsz_bench_test"
    live_data_path = f"{testname}_livedata_{testnum}.csv"
    # Load LISST data processed with onboard clean water background
    f = path.join(lisst_path, live_data_path)
    lisst_live = pd.read_csv(f, names=csvhdr)
    try: print(lisst_live.head(1)) 
    except: print("No {testname} LISST data")
    return lisst_live

In [None]:
# Write function to load PRTSZ total volume concentration from file name
# [WE WILL WANT TO CHANGE THIS TO PROVIDE FULL DATASET]
def load_prtsz_volume_concentration(prtsz_file):
    """ Import PRTSZ data from CSV, use date and time
    columns to create Pandas datetime Index, and define
    Xarray Data Array object containing total volume
    concentration time series.
    Author: Kylene Cooley, 19 Apr 2024
    Inputs:
    -------
    prtsz_file, str of file name in local PRTSZ along shelf CTD 
        survey data directory
    Returns:
    --------
    vol_con, Data Array of total volume concentration from PRTSZ
        dataset with 1 dim = "time"
    """
    # Define path to locally saved PRTSZ data from CN site
    prtsz_path = os.path.join(os.path.expanduser(
    "~/Desktop/pioneer-20_data/PRTSZ_on_rosette/AR82b_CTDsurvey_processed_data/"),
    prtsz_file)
    # Import csv file using pandas and define variables
    prtsz_data = pd.read_csv(prtsz_path, header=None)
    total_vol_concentration = prtsz_data[50]
    year = prtsz_data[42]
    month = prtsz_data[43]
    day = prtsz_data[44]
    hour = prtsz_data[45]
    minute = prtsz_data[46]
    second = prtsz_data[47]
    # Convert time parameter series to datetime vector
    dt_df = pd.DataFrame({"year": year, "month": month,
                        "day": day, "hour": hour,
                        "minute": minute, "second": second})
    prtsz_dt = pd.to_datetime(dt_df,
                            yearfirst=True, utc=True)
    # Create data array of total volume concentration with series
    vol_con = xr.DataArray(data=total_vol_concentration.astype(float),
                            coords={"time": prtsz_dt},
                            name="total_volume_concentration")
    return vol_con

In [None]:
# Define functions
def load_lisst(castnum, csvhdr):
    # Define paths to data
    lisst_path = "D:/LISST_PRTSZ/Pioneer21_AR87_PRTSZ_data"
    onboard_path = f"AR87a_CAST{castnum}_*/*.csv"
    # Load LISST data processed with onboard clean water background
    for f in glob.glob(path.join(lisst_path, onboard_path)):
        # Find the Julian date-time string from the file name with regex
        filename_base = re.findall("I[0-9]{7}", f)
        lisst_onboard = pd.read_csv(f, names=csvhdr)
        try: print(lisst_onboard.head(1)) 
        except: print("No onboard LISST data")
        # Load LISST data processed with updated clean water background
        reproc_path = f"Processed_Data_for_QC/20250404_Reprocessed/{re.sub("I", "L", filename_base[0])}*.csv"
        for f in glob.glob(path.join(lisst_path, reproc_path)):
            lisst_reproc = pd.read_csv(f, names=csvhdr)
        try: print(lisst_reproc.head(1)) 
        except: print("No reprocessed LISST data")
    return lisst_onboard, lisst_reproc

In [None]:
# Create LISST and CTD time vectors for Dataset coordinates
lisst_time = pd.to_datetime(lisst_onboard[["year", "month", "day", "hour", "minute", "second"]], yearfirst=True, utc=True)
lisst_onboard.insert(0, "time", lisst_time.values)
lisst_onboard.set_index("time", drop=True, inplace=True)
print(lisst_onboard.head(1))

In [None]:
# Convert data frames to xarray for easy manipulation
ctd_inst = xr.Dataset.from_dataframe(ctd_inst)
lisst_onboard = xr.Dataset.from_dataframe(lisst_onboard)
lisst_reproc = xr.Dataset.from_dataframe(lisst_reproc)

In [None]:
# Interpolate optical transmission onto common time coordinates
# Used for finding the difference between the optical transmission signals
start_dt = ctd_inst.time[0].values
end_dt = ctd_inst.time[-1].values
time_coord = pd.date_range(start=start_dt, end=end_dt, freq="1s")
ctd_interp = ctd_inst.interp(time=time_coord)
lisst_onboard_interp = lisst_onboard.interp(time=time_coord)
lisst_reproc_interp = lisst_reproc.interp(time=time_coord)

In [51]:
# Define base url for data on Raw Data Archive
rdaURL = "https://rawdata.oceanobservatories.org/files/cruise_data/Pioneer-MAB/Pioneer-21_AR87_2025-03-28/LISST/"

In [52]:
# Use the readme file to get CSV file names for each cast
castfiles = {}
filenext = False
text, urlheaders =  request.urlretrieve(rdaURL+"AR87_LISST_README.txt")
with open(text) as f:
    for x in f:
        if "CAST" in x:
            # print(x)
            cast = x.replace("    ", "")[:-1]
            filenext = True
            continue
        if filenext is True:
            # print(x)
            file = x.replace("    ", "")[:-1]
            castfiles[cast] = file
            filenext = False
# castfiles

In [None]:
# Load data with dict values
# for key, value in castfiles:
pairs = list(castfiles.items())
key, value = pairs[0]
print(key)
print(value)


AR87a_CAST001_L6
I0872156.CSV


## Separate key parameters for analysis

In [None]:
# List of strings to label casts
castnum = np.arange(23)
castname = [lambda x: f"Cast{x%}"] # Left pad cast numbers with 0 to 3 digits
for num in castnum:
    pattern = castname(num)
    # use the pattern and a base dir path to build the path to the cast data file
    # not every cast has a file, so we need to do error handling to skip these

In [None]:
# Split datasets containing multiple casts (first cast aborted, for example)

In [None]:
# Combine cast data with cast number as a new dimension (goal: simplify plotting)

## Compare optical transmission data by depth

In [None]:
# Define plotting function to make similar
# figures given axes and 3 data arrays
def plot_prtsz_comparison(ax1, volume_concentration,
                          beam_transmission, depth):
    midpt = int(len(beam_transmission)/2)
    ax2 = ax1.twiny()
    ax1.plot(volume_concentration[:midpt], depth[:midpt], c="black")
    ax1.plot(volume_concentration[midpt-1:], depth[midpt-1:], "--", c="black", lw=1)
    ax1.invert_yaxis()
    ax1.set_xlabel("Total volume concentration [PPM]")
    ax1.set_ylabel("Depth [m]")
    ax2.plot(beam_transmission[:midpt], depth[:midpt], c="skyblue")
    ax2.plot(beam_transmission[midpt-1:], depth[midpt-1:], "--", c="skyblue", lw=1)
    ax2.set_xlabel("Beam transmission %", color="skyblue")
    return 

## Compare total volume concentration by depth

## Compare reference laser power and temperature

<ul>
    <li>Profiles by depth</li>
    <ul>
        <li>twin x-axes</li>
        <li>subplots side-by-side</li>
    </ul>
    <li>Scatter plot</li>
    <ul>
        <li>with linear regression if there appears to be a relationship</li>
    </ul>
</ul>

## Compare laser transmission power and temperature

<ul>
    <li>Profiles by depth</li>
    <ul>
        <li>twin x-axes</li>
        <li>subplots side-by-side</li>
    </ul>
    <li>Scatter plot</li>
    <ul>
        <li>with linear regression if there appears to be a relationship</li>
    </ul>
</ul>

## Compare ambient light by depth

## Compare mean particle diameter by depth