In [14]:
import os, glob, re, itertools, numpy as np, pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, Model, Input
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, roc_auc_score, confusion_matrix)

FEATURE_DIR = "NSR_SCD_FEATS_EX"          # update if needed

DEPTH_OPTS  = [3, 5, 7, 9]
LR_OPTS     = [1e-1, 1e-2, 1e-3, 1e-4]
BATCH_OPTS  = [32, 64, 128, 256]

DILATION_BASE = [1, 2, 4, 8, 16, 32, 64]
N_FILTERS      = 64
KERNEL_SIZE    = 2
DROPOUT_RATE   = 0.5
DENSE_UNITS    = 512
EPOCHS_SEARCH  = 50   # per grid candidate
VAL_SPLIT      = 0.2

In [23]:
def parse_filename(path):
    name = os.path.basename(path)
    is_scd = name.startswith("SCD")
    parts  = name.split('_')
    subj   = parts[1]
    ordinal = parts[2]                # First…Sixth
    order = ["First","Second","Third","Fourth","Fifth","Sixth"]
    idx = order.index(ordinal)
    # SCD order reversed: Sixth→0 … First→5
    idx = 5 - idx if is_scd else idx
    return int(is_scd), subj, idx

In [24]:
FEATURE_DIR = "NSR_SCD_FEATS_EX" 

def load_subject_dict(feature_dir):
    subj_map = {}
    for path in glob.glob(os.path.join(feature_dir, "*.csv")):
        label, subj, seg_idx = parse_filename(path)
        features = pd.read_csv(path).values.squeeze()   # eight‑element row

        print(label, features)
        if subj not in subj_map:
            subj_map[subj] = {"label": label, "segments": [None]*6}
        subj_map[subj]["segments"][seg_idx] = features
    # check completeness
    # for s, info in subj_map.items():
    #     if None in info["segments"]:
    #         raise ValueError(f"Subject {s} missing segment(s)")
    return subj_map

subject_dict = load_subject_dict(FEATURE_DIR)
print("Loaded", len(subject_dict), "subjects")

0 [0.7628077  0.32957674 0.03820513 0.22607643 0.02963741 0.149
 0.5859375  2.3671875 ]
0 [0.72310915 0.37386428 0.04552058 0.27241548 0.0376728  0.188
 0.5078125  2.5       ]
0 [0.76578608 0.34806866 0.04005168 0.24525634 0.03202674 0.155
 0.6171875  2.7578125 ]
0 [0.61649485 0.33394713 0.05475207 0.24804254 0.04023432 0.265
 0.3671875  2.25      ]
0 [0.78543307 0.36229085 0.04894737 0.26408974 0.03362346 0.186
 0.5859375  2.25      ]
0 [0.78034777 0.31394761 0.05342105 0.23146456 0.02966172 0.203
 0.5        2.078125  ]
0 [0.69551235 0.51237611 0.09393939 0.3568464  0.05130698 0.403
 0.25       2.5625    ]
0 [0.69657494 0.50386794 0.0943662  0.36251678 0.05204275 0.402
 0.125      2.0390625 ]
0 [0.72401536 0.47834206 0.08731707 0.36726377 0.05072596 0.358
 0.125      2.859375  ]
0 [0.62947368 0.45919998 0.09092827 0.31560428 0.0501378  0.431
 0.1328125  1.8046875 ]
0 [0.66488211 0.50291776 0.09731544 0.32376281 0.04869477 0.435
 0.2578125  2.3125    ]
0 [0.67767654 0.58217566 0.08630

In [28]:
X_tr, y_tr, X_te, y_te = [], [], [], []

for subj, info in subject_dict.items():
    lbl  = info["label"]
    segs = info["segments"]
    X_tr.append(segs[0])              # first 5‑min
    y_tr.append(lbl)
    X_te.extend(segs[1:])             # remaining 5
    y_te.extend([lbl]*5)

X_tr = np.vstack(X_tr)
X_te = np.vstack(X_te_list)
y_tr = np.array(y_tr)
y_te = np.array(y_te_list)

# Standardise → pad dummy zero to length 9
scaler = StandardScaler().fit(X_tr)
X_tr = np.pad(scaler.transform(X_tr), ((0,0),(0,1)))
X_te = np.pad(scaler.transform(X_te), ((0,0),(0,1)))

In [29]:
def build_wavenet(depth, lr):
    dilations = DILATION_BASE[:depth]
    inp = Input(shape=(9,))
    x   = layers.Reshape((9,1))(inp)
    skips = []
    for d in dilations:
        t = layers.Conv1D(N_FILTERS, KERNEL_SIZE, padding="causal",
                          dilation_rate=d, activation="tanh")(x)
        s = layers.Conv1D(N_FILTERS, KERNEL_SIZE, padding="causal",
                          dilation_rate=d, activation="sigmoid")(x)
        g = layers.Multiply()([t, s])
        skips.append(layers.TimeDistributed(layers.Dense(N_FILTERS,
                               activation='relu'))(g))
        x = layers.Add()([x, g])
    x = layers.Add()(skips)
    x = layers.Activation('relu')(x)
    x = layers.Flatten()(x)
    x = layers.Dropout(DROPOUT_RATE)(x)
    x = layers.Dense(DENSE_UNITS, activation='relu')(x)
    x = layers.Dropout(DROPOUT_RATE)(x)
    out = layers.Dense(1, activation='sigmoid')(x)
    model = Model(inp, out)
    model.compile(tf.keras.optimizers.Adam(lr), 'binary_crossentropy', ['accuracy'])
    return model

In [13]:
histories = {}
best_key, best_score = None, (np.inf, 0)
for depth, lr, bs in itertools.product(DEPTH_OPTS, LR_OPTS, BATCH_OPTS):
    key = f"{depth}_{lr:g}_{bs}"
    model = build_wavenet(depth, lr)
    hist = model.fit(X_tr, y_tr, epochs=EPOCHS_SEARCH, batch_size=bs,
                     validation_split=VAL_SPLIT, verbose=0)
    histories[key] = (hist, model)
    vl, va = hist.history['val_loss'][-1], hist.history['val_accuracy'][-1]
    if (vl, -va) < best_score:
        best_score, best_key = (vl, -va), key
    print(f"{key:15}  val_loss={vl:.4f}  val_acc={va:.4f}")

print("\nBest →", best_key, "with val_loss,‑acc =", best_score)

ValueError: dtype='string' is not a valid dtype for Keras type promotion.

In [30]:
best_depth, best_lr, best_bs = (3, 0.1, 256)
best_depth = int(best_depth)
final_model = build_wavenet(best_depth, best_lr)
final_hist = final_model.fit(X_tr, y_tr, epochs=EPOCHS_SEARCH,
                             batch_size=int(best_bs), verbose=2)

Epoch 1/50


ValueError: dtype='string' is not a valid dtype for Keras type promotion.

In [35]:
best_bs

256