In [1]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from scipy.signal import welch

from glob import glob
from tqdm import tqdm

from evaluation import evaluate

In [2]:
def get_features(segment):
    frequency, power = welch(segment["BVP"], fs=4.0, nperseg=120/8)
    max_index = power.argmax()
    frequency_max = frequency[max_index]
    power_max = power_max = power[max_index]

    descriptive_features = segment.describe(percentiles=[0.1, 0.5, 0.9]).values[1:,:].flatten()
    correlation_features = segment.corr().values[np.triu_indices(6, 1)]

    output_features = np.concatenate([power, [frequency_max, power_max], descriptive_features, correlation_features])
    return output_features

In [3]:
def get_test_predictions(patient_id, model, probability_threshold=0.5):
    labels = pd.read_csv(f"data\data\{patient}\{patient}_labels.csv")
    n_segments = int((labels["duration"][0]-labels["duration"][0]%30000)/30000)
    
    test_segments = {i: None for i in range(n_segments)}
    test_features = {i: None for i in range(n_segments)}
    for i in tqdm(range(n_segments)):
        try:
            test_segments[i] = pd.read_parquet(f"data/segments/test/{patient_id}/{patient_id}_test_segment_{i}.parquet")
            test_features[i] = get_features(test_segments[i])
        except FileNotFoundError:
            continue

    prediction_values = model.predict_proba([v for v in test_features.values() if v is not None])
    prediction_values = prediction_values[:, 1]

    index = [k for k, v in test_segments.items() if v is not None]
    predictions = pd.DataFrame(data={"prediction_probability":prediction_values}, index=index)
    predictions["time"] = (predictions.index+1)*30000
    predictions["prediction"] = predictions["prediction_probability"] > probability_threshold
    
    return predictions

In [4]:
patients = ["MSEL_00172", "MSEL_00501", "MSEL_01097", "MSEL_01575", "MSEL_01808", "MSEL_01838", "MSEL_01842"]
patient_data = {patient:{"X":[], "y":[]} for patient in patients}
for patient in tqdm(patients):
    interictal_segment_files = glob(f"data/segments/scaled/{patient}/interictal/{patient}_interictal_scaled_segment_*.parquet")
    preictal_segment_files= glob(f"data/segments/scaled/{patient}/preictal/{patient}_preictal_scaled_segment_*.parquet")
    interictal_segment_files = list(np.random.choice(interictal_segment_files, size=len(preictal_segment_files), replace=False))
    for segment_file in interictal_segment_files:
        segment = pd.read_parquet(segment_file)
        features = get_features(segment)
        if any(np.isnan(features)):
            continue
        patient_data[patient]["X"].append(features)
        patient_data[patient]["y"].append(0)
    for segment_file in preictal_segment_files:
        segment = pd.read_parquet(segment_file)
        features = get_features(segment)
        if any(np.isnan(features)):
            continue
        patient_data[patient]["X"].append(features)
        patient_data[patient]["y"].append(1)

100%|██████████| 7/7 [01:02<00:00,  8.93s/it]


In [5]:
model = LogisticRegression(max_iter=1000, C=0.005)
train_accuracy = {}
validation_accuracy = {}
test_predictions = {}
evaluation_metrics = {}
for patient in patients:
    print(f"Test patient: {patient}")
    X_test = patient_data[patient]["X"]
    y_test = patient_data[patient]["y"]

    X_train = []
    y_train = []
    for p in patients:
        if p != patient:
            X_train.extend(patient_data[p]["X"])
            y_train.extend(patient_data[p]["y"])     

    model.fit(X_train, y_train)
    train_accuracy[patient] = model.score(X_train, y_train)
    validation_accuracy[patient] = model.score(X_test, y_test)

    test_predictions[patient] = get_test_predictions(patient, model)
    evaluation_metrics[patient] = evaluate(patient, test_predictions[patient], integration_windows=[600000], thresholds=[0.55], timer_duration=3600000, detection_interval=60000)
        

Test patient: MSEL_00172


100%|██████████| 8398/8398 [01:07<00:00, 124.65it/s]


Test patient: MSEL_00501


100%|██████████| 10578/10578 [01:12<00:00, 145.63it/s] 


Test patient: MSEL_01097


100%|██████████| 14050/14050 [01:51<00:00, 126.12it/s]


Test patient: MSEL_01575


100%|██████████| 14788/14788 [01:41<00:00, 145.25it/s]


Test patient: MSEL_01808


100%|██████████| 10994/10994 [01:27<00:00, 126.31it/s]


Test patient: MSEL_01838


100%|██████████| 13635/13635 [01:44<00:00, 130.30it/s]


Test patient: MSEL_01842


100%|██████████| 10109/10109 [01:19<00:00, 127.25it/s]


In [6]:
np.mean(list(validation_accuracy.values()))

0.5488621245052683

In [7]:
np.mean(list(train_accuracy.values()))

0.6204828153830503

In [12]:
evaluation_metrics

{'MSEL_00172': {'600000_0.55': {'S': 0.6,
   'TiW': 0.5371694067190851,
   'IoC': 0.06877313045065014,
   'p': 0.7586683617396658,
   'n': 6,
   'N': 10}},
 'MSEL_00501': {'600000_0.55': {'S': 1.0,
   'TiW': 0.5291293213828425,
   'IoC': 0.4767812990762996,
   'p': 1.0,
   'n': 1,
   'N': 1}},
 'MSEL_01097': {'600000_0.55': {'S': 1.0,
   'TiW': 0.8248362289945884,
   'IoC': 0.18024874888277154,
   'p': 1.0,
   'n': 2,
   'N': 2}},
 'MSEL_01575': {'600000_0.55': {'S': 0.9876543209876543,
   'TiW': 0.8045448397132423,
   'IoC': 0.1884265922324877,
   'p': 4.730228986455627e-05,
   'n': 80,
   'N': 81}},
 'MSEL_01808': {'600000_0.55': {'S': 1.0,
   'TiW': 0.9954520647625977,
   'IoC': 0.004956187358888142,
   'p': 1.0,
   'n': 8,
   'N': 8}},
 'MSEL_01838': {'600000_0.55': {'S': 0.3333333333333333,
   'TiW': 0.28874220755408875,
   'IoC': 0.04863010402186302,
   'p': 1.0,
   'n': 1,
   'N': 3}},
 'MSEL_01842': {'600000_0.55': {'S': 1.0,
   'TiW': 0.6657103284527107,
   'IoC': 0.3403942842

In [9]:
evaluate(patient, test_predictions["MSEL_01575"], integration_windows=[600000], thresholds=[0.55], timer_duration=1200000, detection_interval=60000)

{'600000_0.55': {'S': 1.0,
  'TiW': 0.7452996077370485,
  'IoC': 0.27210432628682346,
  'p': 0.6038728761943652,
  'n': 2,
  'N': 2}}

In [51]:
ioc_values = []
for i in range(100):
    for x in fake_predictions.values():
        x["prediction_probability"] = np.random.uniform(0, 1, len(x["prediction_probability"]))
        x["prediction"] = x["prediction_probability"] > 0.55
    new_iocs = [list(evaluate(patient, fake_predictions[patient], integration_windows=[600000], thresholds=[0.55], timer_duration=3600000, detection_interval=60000).values())[0]["IoC"] for patient in patients]
    ioc_values.extend(new_iocs)

In [50]:
(np.array(ioc_values)>0).mean()

0.7485714285714286

In [None]:
seizure_times

1     344781000
2     402589000
3     403088000
4     403240000
5     403286000
        ...    
77    411054000
78    411457000
79    411596000
80    411710000
81    411848000
Name: labels.startTime, Length: 81, dtype: int64

In [None]:
patient = 'MSEL_01575'
labels = pd.read_csv(f"data\data\{patient}\{patient}_labels.csv")
n_segments = int((labels["duration"][0]-labels["duration"][0]%30000)/30000)

In [None]:
evaluate()

TypeError: evaluate() missing 6 required positional arguments: 'patient_id', 'predictions', 'integration_windows', 'thresholds', 'timer_duration', and 'detection_interval'