In [25]:
import os
from shutil import move
from enum import Enum

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.fft import fft, fftfreq, rfft, rfftfreq
from scipy.signal import firwin
from sklearn.utils import shuffle
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier

from catboost import CatBoostClassifier

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.metrics import MeanSquaredError as mse

In [2]:
def fir_filter_coef(ntaps, lo, hi, freq):
    coef = firwin(
        ntaps, [lo, hi], fs=freq, pass_zero=False, window="hamming", scale=False
    )
    return coef


def fir_filter(signal, coef):
    return np.convolve(signal, coef, "valid")


FREQ = 200
FILT_COEF = fir_filter_coef(10, 5, 40, FREQ)

healthy_indices = set(list(range(1, 16)) + list(range(26, 31)))
ill_indices = set(range(1, 41)) - healthy_indices

In [3]:
beta_ticks = [4, 8, 13, 100]
gamma_ticks = [4, 8, 13, 32, 100]


def get_rhythms(signal: list[int], include_gamma=True) -> list:
    ampl = np.abs(rfft(signal))
    freq = rfftfreq(len(signal), 1 / 200)

    accum = [0]
    total = 0

    ticks = gamma_ticks if include_gamma else beta_ticks
    curr_tick = 0

    for a, f in zip(ampl, freq):
        if f > ticks[curr_tick]:
            accum.append(0)
            curr_tick += 1
        accum[-1] += a
        total += a

    for i in range(len(accum)):
        accum[i] /= total
    return accum

In [4]:
def shuffle_df(df: pd.DataFrame):
    df = shuffle(df)
    df.reset_index(inplace=True, drop=True)


df = pd.read_csv("rhythms.csv")
features_O1 = ["delta_O1", "theta_O1", "alpha_O1", "beta_O1", "gamma_O1"]
features_O2 = ["delta_O2", "theta_O2", "alpha_O2", "beta_O2", "gamma_O2"]
features = features_O1 + features_O2

X = df[features]
y = df[["apnoe"]]

df

Unnamed: 0,pat,rec,delta_O1,theta_O1,alpha_O1,beta_O1,gamma_O1,delta_O2,theta_O2,alpha_O2,beta_O2,gamma_O2,apnoe
0,1,1,0.417323,0.135528,0.149772,0.223610,0.073768,0.434927,0.116623,0.135741,0.237842,0.074867,0
1,1,2,0.538516,0.123477,0.107926,0.170160,0.059922,0.496885,0.114371,0.107833,0.203208,0.077703,0
2,2,1,0.447526,0.114000,0.099880,0.232133,0.106461,0.446217,0.112615,0.100571,0.234981,0.105616,0
3,2,2,0.402466,0.119452,0.105445,0.243397,0.129241,0.395906,0.105860,0.097768,0.261988,0.138478,0
4,3,1,0.273047,0.085859,0.116092,0.364092,0.160909,0.362659,0.098561,0.116807,0.302566,0.119407,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,38,2,0.341499,0.146805,0.124386,0.264995,0.122315,0.388153,0.140758,0.111543,0.231968,0.127578,1
76,39,1,0.426490,0.135030,0.095222,0.177467,0.165790,0.444808,0.139175,0.109219,0.226094,0.080705,1
77,39,2,0.420619,0.137681,0.133625,0.230246,0.077829,0.411151,0.140815,0.118984,0.243312,0.085739,1
78,40,1,0.397225,0.143894,0.118021,0.241973,0.098888,0.375637,0.141778,0.120322,0.242994,0.119270,1


In [13]:
def nn_cv(X, y, n_folds, arch, activ, out_activ, loss, optim, epochs, batch_size):
    arch = [10] + arch + [1]
    kfold = StratifiedKFold(n_folds, shuffle=True)
    cvscores = []

    model_no = 0
    for train, test in kfold.split(X, y):
        model = keras.Sequential(
            [layers.Input([arch[0]])]
            + [
                layers.Dense(units=arch[i], activation=activ)
                for i in range(1, len(arch) - 1)
            ] + [layers.Dense(units=arch[-1], activation=out_activ)]
        )
        model.compile(loss=loss, optimizer=optim, metrics=["recall"])
        model.fit(
            X.iloc[train],
            y.iloc[train],
            epochs=epochs,
            batch_size=batch_size,
            verbose=0,
        )
        model.save(f"model_{model_no}.keras")
        model_no += 1
        score = model.evaluate(X.iloc[test], y.iloc[test], verbose=0)
        cvscores.append(score[1:])

    return np.array(cvscores)


mean_metric = np.zeros(3)
scores = nn_cv(X, y, 4, [20, 30, 10], "relu", "softmax", "binary_crossentropy", "adam", 80, 5)
mean_metric = sum(scores[i] for i in range(len(scores))) / len(scores)

print(mean_metric)
print(scores)

[0.52500001]
[[0.60000002]
 [0.80000001]
 [0.69999999]
 [0.        ]]


In [None]:
hyperparams = {
    "arch": {
        "first_layer": list(range(5, 50, 5)),
        "second_layer": list(range(5, 40, 5)),
        "triple_layer": list(range(5, 20, 5)),
        "fourth_layer": list(range(5, 15, 5)),
    },
    "activ": ["sigmoid", "relu", "tanh"],
    "out_activ": ["linear", "softmax", "sigmoid"],
    
}

triple

In [30]:
def rf_cv(X, y, n_folds):
    kfold = StratifiedKFold(n_folds, shuffle=True)
    cvscores = []

    for train, test in kfold.split(X, y):
        model = RandomForestClassifier(verbose=False)
        model.fit(X.iloc[train], y.iloc[train]["apnoe"])
        y_pred = model.predict(X.iloc[test])
        score = recall_score(y.iloc[test], y_pred)
        cvscores.append(score)

    return sum(score for score in cvscores) / len(cvscores)


rf_cv(X, y, 4)

0.8

In [15]:
def cb_cv(X, y, n_folds):
    kfold = StratifiedKFold(n_folds, shuffle=True)
    cvscores = []

    for train, test in kfold.split(X, y):
        model = CatBoostClassifier(verbose=False, random_seed=111)
        _ = model.fit(X.iloc[train], y.iloc[train])
        y_pred = model.predict(X.iloc[test])
        score = recall_score(y.iloc[test], y_pred)
        cvscores.append(score)

    return sum(score for score in cvscores) / len(cvscores)


cb_cv(X, y, 4)

0.75