## Respiratory classification (chest vs. O2 vs. CO2)

**Goal:** build a 3-class classifier based on time-domain features.

**Import libraries**

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.signal import find_peaks

from utils import crop_signal, butter_lowpass_filter, compute_time_features

**Download data from dropbox**

In [None]:
!wget -O  annotations.csv "https://www.dropbox.com/s/6wk6ajpvry9rd7z/annotations.csv?dl=0" --quiet
!wget -O  time_series.csv "https://www.dropbox.com/s/vnvz27bxkcojq0n/time_series.csv?dl=0" --quiet

**Load data**

In [None]:
## os.listdir()  # to check data downloaded correctly
df_annot = pd.read_csv('annotations.csv')
df_ts = pd.read_csv('time_series.csv')

In [None]:
# select respiratory signals only
is_respiratory = df_annot['type']!='cardiac'

TS = df_ts.iloc[:, is_respiratory.tolist()].T
Y = df_annot['type'][is_respiratory]

n_ts = np.count_nonzero(is_respiratory)
ts = 1/df_annot['sampling_freq'][0]
n_samp = df_ts.shape[0]

**Processing pipeline (1 signal)**

In [None]:
i_ts = 1
t = ts * np.arange(0,n_samp)
s = TS.iloc[i_ts,:]

# Crop signal
tw, s_crop = crop_signal(t, s, 0, 100)

# Normalization
s_norm = (s_crop - np.min(s_crop))/(np.max(s_crop) - np.min(s_crop)) - 0.5

# Smoothing (filtering) use frequency cutoffs from exploratory data analysis
b, a = butter_lowpass(cutoff=1, fs=1/ts, order=5)
s_filt = lfilter(b, a, s_norm)

fig_signals()

In [None]:
# Local maxima, minima location
maxima, prop = find_peaks(s_filt, prominence = 0.2)
minima, prop = find_peaks(-s_filt, prominence = 0.2)

fig_min_max()

In [None]:
i_ts = 1
t = ts * np.arange(0,n_samp)
s = TS.iloc[i_ts,:]

tw, s_crop = crop_signal(t, s, 0, 5)
s_norm = (s_crop - np.min(s_crop))/(np.max(s_crop) - np.min(s_crop)) - 0.5
b, a = butter_lowpass(cutoff=1, fs=1/ts, order=5)
s_filt = lfilter(b, a, s_norm)
maxima, _ = find_peaks(s_filt, prominence = 0.2)
minima, _ = find_peaks(-s_filt, prominence = 0.2)

fig_zoom_in(tw, minima, maxima, s_filt)

In [None]:
x1,x2 = compute_time_features(tw, s_filt, minima, maxima)

**Process all signals**

In [None]:
X1 = np.empty(n_ts)
X2 = np.empty(n_ts)
n_min = np.empty(n_ts)
n_max = np.empty(n_ts)

for i, s in enumerate(TS.to_numpy()):
  # Normalize
  s = (s - np.min(s))/(np.max(s) - np.min(s)) - 0.5
  
  # Normalization
  s_norm = (s - np.min(s))/(np.max(s) - np.min(s)) - 0.5

  # Smoothing (filtering) use frequency cutoffs from exploratory data analysis
  b, a = butter_lowpass(cutoff=1, fs=1/ts, order=5)
  s_filt = lfilter(b, a, s_norm)

  # Local minima/maxima location
  maxima, _ = find_peaks(s_filt, prominence = 0.2)
  minima, _ = find_peaks(-s_filt, prominence = 0.2)

  n_max[i] = len(maxima)
  n_min[i] = len(minima)

  # Feature extraction
  X1[i], X2[i] = compute_time_features(t, s, minima, maxima)
  
  # print(f'{i}/{n_ts}')

Check local minima and maxima were located in all signals

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(18,5))

titles = ['n_min', 'n_max', 'X1', 'X2']
for i, x in enumerate([n_min, n_max, X1, X2]):
  axs[i].hist(x)
  axs[i].grid()
  axs[i].set_title(titles[i])

Visualize the features

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(8,8))

ts_types = ['chest', 'O2', 'CO2']
for ts_type in ['respiratory_chest', 'respiratory_O2', 'respiratory_CO2']:
  is_type = Y == ts_type
  axs.scatter(X1[is_type], X2[is_type])
axs.grid()
axs.set_xlabel('X1', fontsize=14)
axs.set_ylabel('X2', fontsize=14)
axs.legend(ts_types, fontsize=18)

**Classification**

In [None]:
# End model: logistic regression or decision tree
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=0)
X = np.column_stack((X1, X2))
acc = np.round(100 * np.mean(cross_val_score(clf, X, Y, cv=10)), 1)
print(f"Mean accuracy: {acc}%")

In [None]:
from sklearn.tree import export_text
clf.fit(X, Y)
print(export_text(clf))

Visualize the results

In [None]:
fig_classifier_results(X1,X2,Y,acc)