In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from scipy import signal, interpolate # signal processing lib
import glob
from tqdm import tqdm # pretty loading bar
import pyarrow.csv # faster .csv I/O than in pandas

In [2]:
ROOT = "../input/neymark-signal-processing/watch_data_test/watch_data_test/"
SUBMISSION_SR = 30

In [3]:
def format_peaks_to_intervals(ppg_peaks, ts_start, ts_finish):
    """
    Formats the detected heart beat timestamps into the sequence of interbeat intervals. 
    The test ECG peaks are formatted in the same way.
    The start and finish of each experiment are the start and finish of the PPG data.
    """
    timestamps_interp = np.linspace(ts_start, ts_finish, int((ts_finish - ts_start) * SUBMISSION_SR))
    heartbeat_intervals = np.diff(ppg_peaks, prepend=ts_start)
    func = interpolate.interp1d(ppg_peaks, heartbeat_intervals, "nearest", bounds_error=False, fill_value=0)
    heartbeat_intervals_interp = func(timestamps_interp)
    return heartbeat_intervals_interp

In [5]:
def process_exp(ppg_df):
    ppg_signal = ppg_df['PPG_signal'].values
    timestamps = ppg_df['Timestamp'].values
    ppg_peak_idxs = signal.find_peaks(ppg_signal)[0]
    ppg_peaks = timestamps[ppg_peak_idxs]
    ppg_rr_intervals_interp = format_peaks_to_intervals(ppg_peaks, timestamps[0], timestamps[-1])
    return ppg_rr_intervals_interp

просто и тренд из максимумов и scipy.signal.filtfilt с базовыми настройками(как в документации)

In [7]:
test_ppgs_pathes = glob.glob(f"{ROOT}/PPG_EXP_*.csv")
test_ppgs_pathes.sort()
all_predictions = []
for path in tqdm(test_ppgs_pathes):
    ppg_df = pyarrow.csv.read_csv(path).to_pandas()
    max_trend=ppg_df
    max_trend["PPG_signal"] = ppg_df["PPG_signal"].rolling(
    window=5,       
    center=True,      
    min_periods=3,  
    ).max()  
    ppg_rr_intervals_interp = process_exp(max_trend)
    all_predictions.append(ppg_rr_intervals_interp)
all_predictions = np.concatenate(all_predictions)
sample_summission = pd.DataFrame({"PPG_interbeat_interval": all_predictions})
sample_summission['id'] = sample_summission.index
print(sample_summission.shape)
sample_summission.to_csv("submission_gust3.csv", index=None)

100%|██████████| 9/9 [00:01<00:00,  7.04it/s]


(1347666, 2)


In [None]:
Public score: 0.31055
Private score: 0.32141

In [6]:
test_ppgs_pathes = glob.glob(f"{ROOT}/PPG_EXP_*.csv")
test_ppgs_pathes.sort()
all_predictions = []
for path in tqdm(test_ppgs_pathes):
    ppg_df = pyarrow.csv.read_csv(path).to_pandas()  
    b, a = signal.ellip(4, 0.01, 120, 0.125)
    ppg_df["PPG_signal"] = signal.filtfilt(b, a, ppg_df["PPG_signal"], method="gust")
    ppg_rr_intervals_interp = process_exp(ppg_df)
    all_predictions.append(ppg_rr_intervals_interp)
all_predictions = np.concatenate(all_predictions)
sample_summission = pd.DataFrame({"PPG_interbeat_interval": all_predictions})
sample_summission['id'] = sample_summission.index
print(sample_summission.shape)
sample_summission.to_csv("submission.csv", index=None)

100%|██████████| 9/9 [00:02<00:00,  3.56it/s]


(1347666, 2)


In [None]:
Public score: 0.30471
Private score: 0.31182