### PPG Signal Processing for the Goldilocks ppg files

In [1]:
# Initialize the package
import sys

repo = '/root/biosignal_processing_package'
sys.path.insert(1, repo)
sys.path.insert(1, repo + '/biosignal_processing_package' )

Import the package functions

In [3]:
import pandas as pd
import numpy as np

# package functions
from preprocessing.signal_transform import signal_downsample, segment_signal 
from preprocessing.signal_filter import filter_ppg
from plotting.signal_plots import plot_signal
from plotting.signal_plots import plot_filtered
from savvyppg import ppg_module

Reading and importing the signals

In [4]:
# read files
files = [
    "0-Potions-1622059739-ppg.csv.gz",
    "0-RelaxGame-1622059683-ppg.csv.gz",
    #"0-RelaxKaleidoscope-1620938134-ppg.csv.gz",
    "4885434061570493903-MarbleGame-1620934336-ppg.csv.gz",
    "5348144018922788108-VagalTone-1619717980-ppg.csv.gz"
]

signals = []
for file in files:
    filepath = repo + "/data/" + file
    signal = pd.read_csv(filepath, compression = 'gzip')
    signal.columns = [col.strip() for col in signal.columns] # strip away spaces from col names
    signals.append(signal)

Inspect the structure of the data and estimate the sampling rate of the signal

In [5]:
signal = signals[3]
print(signal.shape)
display(signal.head())
sampling_rate = len(signal)/(signal.ppg_time[len(signal)-2])
print("sampling rate: " + str(sampling_rate))

(22536, 5)


Unnamed: 0,ppg_time,ppg0,ppg1,ppg2,ambient
0,0.0,-25379,-71592,-21379,-329832
1,0.007692,-25321,-71605,-21360,-329910
2,0.015385,-25265,-71603,-21302,-329959
3,0.023077,-25001,-71689,-21252,-329999
4,0.030769,-24955,-71631,-21147,-329938


sampling rate: 135.04562222093975


In [7]:
# comparing available signals
average_signal = (signal.ppg0+signal.ppg1+signal.ppg2)/3
plot_signal(
     [signal.ppg0.to_list(),signal.ppg1.to_list(),signal.ppg2.to_list(),average_signal.to_list()],
     [135,135,135,135], 
     labels=["ppg0","ppg1","ppg2","average"],
     x_axis_label = "Time (s)"
)


Visualizing all the signals

In [8]:
# visualizing all sets of signals
n= 0
for signal in signals:
    print(files[n])
    plot_signal(
         [signal.ppg0.to_list(),signal.ppg1.to_list(),signal.ppg2.to_list()],
         [135,135,135], 
         labels=["ppg0","ppg1","ppg2"],
         x_axis_label = "Time (s)"
    )
    n = n + 1


0-Potions-1622059739-ppg.csv.gz


0-RelaxGame-1622059683-ppg.csv.gz


4885434061570493903-MarbleGame-1620934336-ppg.csv.gz


5348144018922788108-VagalTone-1619717980-ppg.csv.gz


## Signal Processing

#### Signal Filtering

In [9]:
# make list of only ppg0 signals
ppg0_list = [signal.ppg0.to_list() for signal in signals]
sr = 135
# actual cleaning
ppg0_clean = [filter_ppg(ppg0_sig, sr) for ppg0_sig in ppg0_list]

#### Signal Cleaning Inspection

In [10]:
# comparing raw and clean signals
for i in range(len(signals)):
    print(files[i])
    plot_filtered(ppg0_list[i],ppg0_clean[i],sr)

0-Potions-1622059739-ppg.csv.gz


0-RelaxGame-1622059683-ppg.csv.gz


4885434061570493903-MarbleGame-1620934336-ppg.csv.gz


5348144018922788108-VagalTone-1619717980-ppg.csv.gz


_Observation:_

- _The first 3 signals, smoothing definitely happened, but signal quality still visually appears to be poor even after filtering._

- _For the 4th signal, which was the longest (175 seconds), the filtering function worked perfectly, removing wandering, motion artifacts, etc._

#### Testing the filter on smaller sample of ppg signal

In [12]:
# Segment the signal using `segment_signal()`
# 20 seconds
segments = segment_signal(ppg0_list[3], 135, window_time = 20, show_plot = False)

#Clean and visualize the first segment
clean_segment = filter_ppg(segments[0], sr)
plot_filtered(segments[0],clean_segment,sr)

There are segments with different window length


In [13]:
# Segment the signal using `segment_signal()`
# 10 seconds
segments = segment_signal(ppg0_list[3], 135, window_time = 10, show_plot = False)

#Clean and visualize the first segment
clean_segment = filter_ppg(segments[0], sr)
plot_filtered(segments[0],clean_segment,sr)

There are segments with different window length


In [15]:
# Segment the signal using `segment_signal()`
# 5 seconds
segments = segment_signal(ppg0_list[3], 135, window_time = 5, show_plot = False)

#Clean and visualize the first segment
clean_segment = filter_ppg(segments[0], sr)
plot_filtered(segments[0],clean_segment,sr)

There are segments with different window length


### Signal Quality Inspection

In [16]:
# Preprocess and inspect beat level quality of each ppg signal
savvyppg_signals = []
for i in range(len(signals)):
    print("Processing " + files[i])
    sig = ppg_module.Signal(ppg0_list[i],sr, predict_beats=True)
    savvyppg_signals.append(sig)

Processing 0-Potions-1622059739-ppg.csv.gz
Processing 0-RelaxGame-1622059683-ppg.csv.gz
Processing 4885434061570493903-MarbleGame-1620934336-ppg.csv.gz
Processing 5348144018922788108-VagalTone-1619717980-ppg.csv.gz


In [17]:
# Visualize the beat level signal quality
from plotting.ppg_plotting import ppg_plot_quality
for i in range(len(signals)):
    print(files[i])
    ppg_plot_quality((savvyppg_signals[i]).data_ppg,(savvyppg_signals[i]).beats_df,sr)

0-Potions-1622059739-ppg.csv.gz


0-RelaxGame-1622059683-ppg.csv.gz


4885434061570493903-MarbleGame-1620934336-ppg.csv.gz


5348144018922788108-VagalTone-1619717980-ppg.csv.gz
