This tutorial breaks down the processing pipeline from raw data up to hits identification, in the `qualiphide_thz_offline` context and algorithms (plugins) of `straxion v0.1.1`.

Lanqing Yuan, Sep 22, 2025.

# Pre-knowledge

In [None]:
import straxion
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Set options to display all rows and columns
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)  # To prevent truncation of long cell contents


# Apply the plotting style. You can also comment it out to use the default style.
plt.style.use("../.customized_mplstyle")
straxion.register_xenon_colors()

In this tutorial, we assume that you already have access to the resonator fine scan as well as time stream data prepared in a format as follow. The scope of these data might only make sesne to QUALIPHIDE collaboration for now, and they are not made public.

In [None]:
!ls /Users/lanqingyuan/Desktop/QUALIPHIDE_Science_Data/

To load data, we will need to define a `strax.Context` object, in which all the detector-specific processing-related information is defined.

In [None]:
st = straxion.qualiphide_thz_offline()  # Used for offline analysis

run = 1756824965
DAQ_INPUT_DIR = f"/Users/lanqingyuan/Desktop/QUALIPHIDE_Science_Data/ts_38kHz-{run}.npy"
IQ_FINESCAN_DIR = "/Users/lanqingyuan/Desktop/QUALIPHIDE_Science_Data/"
IQ_WIDESCAN_DIR = "/Users/lanqingyuan/Desktop/QUALIPHIDE_Science_Data/"
IQ_FINESCAN_FILENAME = "iq_fine_z_2dB_below_pcrit-1756824887.npy"
IQ_WIDESCAN_FILENAME = "iq_wide_z_2dB_below_pcrit-1756824887.npy"
RESONANT_FREQUENCY_DIR = "/Users/lanqingyuan/Desktop/QUALIPHIDE_Science_Data/"
RESONANT_FREQUENCY_FILENAME = "fres_2dB-1756824887.npy"
run = str(run)

configs = dict(
    daq_input_dir=DAQ_INPUT_DIR,
    iq_finescan_dir=IQ_FINESCAN_DIR,
    iq_finescan_filename=IQ_FINESCAN_FILENAME,
    iq_widescan_dir=IQ_WIDESCAN_DIR,
    iq_widescan_filename=IQ_WIDESCAN_FILENAME,
    resonant_frequency_dir=RESONANT_FREQUENCY_DIR,
    resonant_frequency_filename=RESONANT_FREQUENCY_FILENAME,
)
st.set_config(configs)

For example, all the technical configuration regarding computation can be found already stored in the context. It is beyond regular users to understand what they are exactly though, for which avid users want to consult the strax documentation.

In [None]:
st.show_config()

The processing algorithms are broken down into significant steps called plugins, inserted into the context. Below are the implemented plugins so far in straxion `v0.1.1`.

In [None]:
st._plugin_class_registry

In the following of this tutorial, we will dive into these plugins in order of dependency.

# Plugins

## `raw_records`

In [None]:
?straxion.QUALIPHIDETHzReader

The following command loads the raw_records data into memory. Note that, to save disk space, `raw_records` is not saved by default. You have to specify it to save it.

In [None]:
raw_records = st.get_array(run, "raw_records")

# If you want to save it, you can do so by:
# raw_records = st.get_array(run, "raw_records", save=("raw_records"))

All the data we computed are saved in the output folder defined by the context:

In [None]:
st.storage

You can see that the data are named by `<run_id>-<data_type>-<lineage_hash>`, where the hash is uniquely determined by the tracked lineage of configuration. The motivation for this design is that we will not run into problems loading data computed by different configuration (For example, you might have changed a threshold value somewhere and forgot. This will not hurt you in straxion).


As an example, here are the tracked lineage for raw_records. Higher level plugins might have much more complicated lineage.

In [None]:
st.lineage(run, "raw_records")

It gives the hash.

In [None]:
st.key_for(run, "raw_records")

Now let's see what do we have in raw_records:

In [None]:
st.data_info("raw_records")

Each element of the loaded array correspond to one channel. We will just inspect one for illustration.

In [None]:
np.shape(raw_records)

In [None]:
raw_records = raw_records[raw_records["channel"] == 0]

In [None]:
plt.figure(figsize=(10, 5))

plt.plot(
    raw_records[0]["time"] + np.arange(raw_records[0]["length"]) * raw_records[0]["dt"],
    raw_records[0]["data_i"],
    label="I",
)
plt.plot(
    raw_records[0]["time"] + np.arange(raw_records[0]["length"]) * raw_records[0]["dt"],
    raw_records[0]["data_q"],
    label="Q",
)
plt.xlabel("Time Since Unix Epoch [ns]")
plt.ylabel("IQ [A.U.]")
plt.legend(ncol=2, loc="upper right")
plt.show()

## `hits`, `hit_classification` and `noises`

In [None]:
st.show_config("hits")

In [None]:
records = st.get_array(run, "records", save=("records"))
hits = st.get_array(run, ("hits", "hit_classification"))
noises = st.get_array(run, ("noises"))

In [None]:
records0 = records[records["channel"] == 0][0]
hits0 = hits[hits["channel"] == 0]
noises0 = noises[noises["channel"] == 0]

In [None]:
%matplotlib widget

times_start = records0["time"]
times_us = (records0["time"] + np.arange(records0["length"]) * records0["dt"] - times_start) / 1e3

plt.figure(figsize=(6, 3))
plt.plot(
    times_us,
    records0["data_dx"],
    label="Raw",
    # color="black",
    alpha=0.5,
    lw=1,
)
plt.plot(
    times_us,
    records0["data_dx_moving_average"],
    # color="tab:blue",
    lw=1,
    label="Moving Averaged",
)
plt.plot(
    times_us,
    records0["data_dx_convolved"],
    # color="tab:orange",
    lw=2,
    label="Convolved with Pulse Kernel",
)

for k, hit in enumerate(hits0):
    hit_start_us = (hit["time"] - times_start) / 1e3
    hit_end_us = (hit["endtime"] - times_start) / 1e3

    if k == 0:
        plt.axvspan(
            hit_start_us, hit_end_us, alpha=0.2, color="xenon_red", lw=0, label="Hit Window"
        )
        plt.axvspan(
            times_us[hit["amplitude_moving_average_max_record_i"] - 5],
            times_us[hit["amplitude_moving_average_max_record_i"]],
            color="xenon_blue",
            alpha=0.2,
            lw=0,
            label="Risetime Window",
        )
    else:
        plt.axvspan(hit_start_us, hit_end_us, alpha=0.2, color="xenon_red", lw=0)
        plt.axvspan(
            times_us[hit["amplitude_moving_average_max_record_i"] - 5],
            times_us[hit["amplitude_moving_average_max_record_i"]],
            color="xenon_blue",
            alpha=0.2,
            lw=0,
        )
    # plt.axvline(hit["amplitude_max_record_i"] * records0["dt"] / 1e3, color="xenon_red")

for k, noise in enumerate(noises0):
    noise_start_us = (noise["time"] - times_start) / 1e3
    noise_end_us = (noise["endtime"] - times_start) / 1e3
    if k == 0:
        plt.axvspan(
            noise_start_us, noise_end_us, alpha=0.2, color="xenon_green", lw=0, label="Noise Window"
        )
    else:
        plt.axvspan(noise_start_us, noise_end_us, alpha=0.2, color="xenon_green", lw=0)

plt.axhline(hit["hit_threshold"], color="xenon_light_blue", ls=":")
plt.legend(ncol=2, loc="best")
plt.xlabel("Time Since Run Start [us]")
plt.ylabel("dx")
plt.show()

In [None]:
%matplotlib inline


def dx_to_um(ht):
    return 3e14 / ((ht) / (1.5e-6) * 12e12)


def dx_to_THz(ht):
    return (ht) / (1.5e-6) * 12


plt.scatter(
    dx_to_um(hits["amplitude_moving_average"]),
    hits["rise_edge_slope"],
    label="Without Spike Coincidence",
    s=1,
)
plt.scatter(
    dx_to_um(hits[hits["is_coincident_with_spikes"]]["amplitude_moving_average"]),
    hits[hits["is_coincident_with_spikes"]]["rise_edge_slope"],
    label="With Spike Coincidence (0.13 ms)",
    s=1,
)
plt.xlabel(r"$\lambda$ by Moving Averaged Amplitude [$\mu$m]")
plt.ylabel(r"Rise Edge Slope [dx/Second]")
plt.legend(loc="best", fontsize=6, bbox_to_anchor=(1.0, 1.1), ncol=2)
plt.ylim(
    0,
)
plt.show()

In [None]:
photon_candidates = hits[hits["is_photon_candidate"]]

print("Total exposure: 50 seconds")
print("Total number of photon candidates: ", len(photon_candidates))
print("Total number of all hits: ", len(hits))
print(
    f"A spike tight coincidence of 0.131 ms is applied to the hits, which kills about {(len(hits)-len(photon_candidates))/len(hits)*100:.2f}% of the hits."
)

In [None]:
%matplotlib inline
plt.hist(
    dx_to_um(hits["amplitude"]),
    bins=np.linspace(0, 150, 70),
    histtype="step",
    label=">0.25 ms Width",
)
plt.hist(
    dx_to_um(photon_candidates["amplitude"]),
    bins=np.linspace(0, 150, 70),
    histtype="step",
    label="After Spike Coincidence (0.13 ms) & >0.25 ms Width",
)
plt.xlabel(r"Wavelength by Raw Amplitude [$\mu$m]")
plt.legend(loc="best", fontsize=6, bbox_to_anchor=(0.95, 1.15))
plt.ylabel("Counts")
plt.show()