In [None]:
import os
import pandas as pd
import numpy as np
from scipy.ndimage import gaussian_filter1d
import matplotlib.pyplot as plt

MIN_WAVELENGTH = 200
MAX_WAVELENGTH = 760

exp_code = "BZ_v3"

# specify parent folder of raw data here
GLOBAL_PATH = r"M:\\0-UVVis\\RR"
PROJECT_PATH = os.path.join(GLOBAL_PATH, exp_code)
REF = os.path.join(GLOBAL_PATH, "water", "20250508_153708_484601_water.csv")

DELIMITER = ","
df_blank = pd.read_csv(
    REF, names=["wavelength", "transmittance"], sep=DELIMITER, skiprows=6
)
df_blank = df_blank[
    df_blank["wavelength"].between(MIN_WAVELENGTH, MAX_WAVELENGTH, inclusive="both")
]


def lambert_beer(
    intensity_blank: list[float], intensity_sample: list[float]
) -> np.ndarray:
    """A = log(T_0 / T_t)"""
    return np.log10(intensity_blank / intensity_sample)

In [None]:
plt.rcParams.update(
    {
        "font.size": 24,
        "axes.linewidth": 3,
        "xtick.major.width": 3,
        "ytick.major.width": 3,
        "xtick.minor.width": 3,
        "ytick.minor.width": 3,
    }
)

In [None]:
from datetime import datetime
import re
from tqdm import tqdm


data = []
raw_data = []  # for debugging
times = []
starttime = None

file_names = [
    os.path.join(PROJECT_PATH, file_name)
    for file_name in os.listdir(PROJECT_PATH)
    if os.path.isfile(os.path.join(PROJECT_PATH, file_name))
    and file_name.endswith(".csv")
]

for file_name in tqdm(file_names):
    # for millisecond precision, use '%Y%m%d_%H%M%S_%f'
    timestamp = datetime.strptime(
        re.search(r"(\d{8}_\d{6}_\d{6})", file_name).group(1), "%Y%m%d_%H%M%S_%f"
    )
    if not starttime:
        starttime = timestamp
    times.append((timestamp - starttime).total_seconds() / 60)

    df_curr_spectrum = pd.read_csv(
        file_name,
        names=["wavelength", "transmittance"],
        delimiter=DELIMITER,
        skiprows=6,
    )
    df_curr_spectrum = df_curr_spectrum[
        df_curr_spectrum["wavelength"].between(
            MIN_WAVELENGTH, MAX_WAVELENGTH, inclusive="both"
        )
    ]
    assert len(df_blank) == len(df_curr_spectrum), file_name

    data.append(
        lambert_beer(df_blank["transmittance"], df_curr_spectrum["transmittance"])
    )
    raw_data.append(df_curr_spectrum["transmittance"])

In [None]:
from scipy.stats import linregress


def detect_plateau_from_slope(
    values, num_datapoints: int = 5, threshold: float = 1e-4
) -> int:
    """
    Detects a plateau in a list of values by calculating the slope of a linear regression
    for the last num_datapoints and comparing it to a threshold value.
    """
    for i in range(len(values) - num_datapoints):
        if i < num_datapoints:
            continue
        slope = linregress(
            range(num_datapoints),
            np.array(values[i - num_datapoints : i]) / max(values),
        ).slope
        if abs(slope) < threshold:
            print(f"Reaction has reached plateau after {i} datapoints.")
            return i
    return len(values)

In [None]:
common_x_axis = df_blank["wavelength"]
spectra_array = data[20:]
times = times[20:]

In [None]:
from scipy.integrate import trapezoid
from tqdm import tqdm

error_tolerance = 0.1


# Calculate Jaccard index for pair of spectra
def jaccard_next_two_spectra(index_1, index_2, reference_spectrum=None):
    if reference_spectrum is None:
        # spectrum1_y = spectra[timestamps[index_1]].y
        spectrum1_y = spectra_array[index_1]
    else:
        spectrum1_y = reference_spectrum
    # spectrum2_y = spectra[timestamps[index_2]].y
    spectrum2_y = spectra_array[index_2]
    union, intersection = [], []
    for y1, y2 in zip(spectrum1_y, spectrum2_y):
        if abs(y1 - y2) < error_tolerance:
            union.append(y2)
            intersection.append(y2)
        else:
            union.append(max(y1, y2))
            intersection.append(min(y1, y2))
    intersection_area = trapezoid(intersection, common_x_axis)
    union_area = trapezoid(union, common_x_axis)
    return intersection_area / union_area


print("Calculating Jaccard indices...")
jaccards = []
for i in tqdm(range(len(spectra_array))):
    jaccards.append(jaccard_next_two_spectra(0, i))

print("Calculating reverse Jaccard indices...")
reverse_jaccards = []
for i in tqdm(range(len(spectra_array))):
    reverse_jaccards.append(jaccard_next_two_spectra(i, len(spectra_array) - 1))

print("Calclating neighbor Jaccard indices...")
neighbor_jaccards = []
for i in tqdm(range(len(spectra_array) - 1)):
    neighbor_jaccards.append(jaccard_next_two_spectra(i, i + 1))

In [None]:
plateau_index = detect_plateau_from_slope(jaccards, num_datapoints=20, threshold=1e-4)

plt.figure(figsize=(8, 6))

plt.plot(
    times, jaccards, marker="o", label=f"Jaccard index", linestyle="None", markersize=3
)
# plot vertical line at plateau index
# plt.axvline(x=times[plateau_index], color='r', linestyle='--', label=f'Plateau after {times[plateau_index]:.0f} min')

plt.legend().get_frame().set_linewidth(3)
plt.xlabel("Time / min")
plt.ylabel("Jaccard index")
# plt.title(f'Jaccard index for {exp_code}')
plt.xlim(3, 6)
plt.ylim(0.72, 0.88)
plt.savefig(
    os.path.join(f"{exp_code}_jaccard.svg"), transparent=True, bbox_inches="tight"
)

In [None]:
wavelength = 400

time_filt = []
peak_filt = []
index_of_peak = np.abs(df_blank["wavelength"] - wavelength).idxmin()

for curr_time, curr_data in zip(times, data):
    curr_peak = curr_data[index_of_peak]
    curr_sec = curr_time * 60
    if curr_sec > 60:
        time_filt.append(curr_sec)
        peak_filt.append(curr_data[index_of_peak])

plt.plot([t / 60 for t in time_filt], peak_filt, marker="")

plt.title(f"Absorbance at {wavelength} nm")
plt.xlabel("Time / min")
plt.ylabel("Absorbance / a.u.")
# plt.savefig(f'{exp_code}_peak.svg', transparent=True)

Fourier transformation for oscillating spectra

In [None]:
import numpy as np
import matplotlib.pyplot as plt

signal = peak_filt

# Fourier Transform
signal_fft = np.fft.fft(signal)
frequencies = np.fft.fftfreq(len(time_filt), time_filt[1] - time_filt[0])

# Only positive frequencies
positive_frequencies = frequencies[np.where(frequencies > 0)]
positive_fft = signal_fft[np.where(frequencies > 0)]

# Find the dominant frequency
dominant_frequency = np.abs(positive_frequencies[np.argmax(np.abs(positive_fft))])

# Calculate the period
oscillation_period = 1 / dominant_frequency

print(f"The dominant frequency is {dominant_frequency} Hz")
print(f"The oscillation period is {oscillation_period} seconds")

# Plotting the signal and its Fourier transform
plt.figure(figsize=(14, 6))

# Plot the time-domain signal
plt.subplot(1, 2, 1)
plt.plot(time_filt, signal)
plt.title("Time-Domain Signal")
plt.xlabel("Time [s]")
plt.ylabel("Amplitude")

# Plot the frequency-domain signal
plt.subplot(1, 2, 2)
plt.plot(positive_frequencies, np.abs(positive_fft))
# plt.xlim(0,.5)
# plt.ylim(top=100)
plt.title("Frequency-Domain Signal")
plt.xlabel("Frequency [Hz]")
plt.ylabel("Magnitude")

plt.show()