In [9]:
#Data Visualization

In [14]:
# scripts/vis.py
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

def plot_signals(folder_path):
    # Load signals
    nasal = pd.read_csv(os.path.join(folder_path, "Flow - 30-05-2024.txt"), delim_whitespace=True, names=["time", "value"])
    thoracic = pd.read_csv(os.path.join(folder_path, "Thorac - 30-05-2024.txt"), delim_whitespace=True, names=["time", "value"])
    spo2 = pd.read_csv(os.path.join(folder_path, "SPO2 - 30-05-2024.txt"), delim_whitespace=True, names=["time", "value"])

    # Load events
    events = pd.read_csv(os.path.join(folder_path, "flow_events.csv"))

    # Convert time to datetime if necessary
    for df in [nasal, thoracic, spo2]:
        df['time'] = pd.to_datetime(df['time'], unit='s')

    events['start_time'] = pd.to_datetime(events['start_time'], unit='s')
    events['end_time'] = pd.to_datetime(events['end_time'], unit='s')

    # Plotting
    fig, axs = plt.subplots(3, 1, figsize=(16, 10), sharex=True)

    axs[0].plot(nasal['time'], nasal['value'], label="Nasal Airflow")
    axs[1].plot(thoracic['time'], thoracic['value'], label="Thoracic Movement", color='green')
    axs[2].plot(spo2['time'], spo2['value'], label="SpO₂", color='red')

    for _, row in events.iterrows():
        for ax in axs:
            ax.axvspan(row['start_time'], row['end_time'], color='orange', alpha=0.3)

    for ax in axs:
        ax.legend()
        ax.set_ylabel("Amplitude")

    axs[2].set_xlabel("Time")

    os.makedirs("Visualizations", exist_ok=True)
    out_path = os.path.join("Visualizations", f"{os.path.basename(folder_path)}_visualization.pdf")
    with PdfPages(out_path) as pdf:
        pdf.savefig(fig)
        plt.close()



In [None]:
# Data Cleaning

In [15]:
# Add this to a utils.py or preprocessing.py module
from scipy.signal import butter, filtfilt

def bandpass_filter(signal, fs, low=0.17, high=0.4, order=4):
    nyquist = 0.5 * fs
    lowcut = low / nyquist
    highcut = high / nyquist
    b, a = butter(order, [lowcut, highcut], btype='band')
    return filtfilt(b, a, signal)


In [None]:
# Dataset Creation

In [16]:
# scripts/create_dataset.py
import os
import numpy as np
import pandas as pd
from datetime import timedelta
import argparse

def overlap(a_start, a_end, b_start, b_end):
    latest_start = max(a_start, b_start)
    earliest_end = min(a_end, b_end)
    overlap_time = max(0, (earliest_end - latest_start).total_seconds())
    return overlap_time

def create_windows(signal_df, events_df, win_len=30, overlap=0.5):
    signals = signal_df.set_index('time')
    start = signals.index[0]
    end = signals.index[-1]
    step = timedelta(seconds=win_len * (1 - overlap))
    windows = []
    labels = []

    while start + timedelta(seconds=win_len) <= end:
        stop = start + timedelta(seconds=win_len)
        segment = signals[start:stop]
        window_label = "Normal"
        for _, row in events_df.iterrows():
            ov = overlap(start, stop, row['start_time'], row['end_time'])
            if ov >= win_len * 0.5:
                window_label = row['event']
                break
        windows.append(segment['value'].values)
        labels.append(window_label)
        start += step

    return windows, labels

def process_participant(folder):
    nasal = pd.read_csv(os.path.join(folder, "Flow - 30-052024.txt"), delim_whitespace=True, names=["time", "value"])
    nasal['time'] = pd.to_datetime(nasal['time'], unit='s')
    events = pd.read_csv(os.path.join(folder, "flow_events.csv"))
    events['start_time'] = pd.to_datetime(events['start_time'], unit='s')
    events['end_time'] = pd.to_datetime(events['end_time'], unit='s')
    return create_windows(nasal, events)

def save_dataset(participant_dirs, out_dir):
    X, y = [], []
    for participant in participant_dirs:
        windows, labels = process_participant(participant)
        X.extend(windows)
        y.extend(labels)
    df = pd.DataFrame({'signal': X, 'label': y})
    os.makedirs(out_dir, exist_ok=True)
    df.to_pickle(os.path.join(out_dir, "breathing_dataset.pkl"))

