In [1]:
import warnings
warnings.filterwarnings("ignore", category= UserWarning)
warnings.filterwarnings("ignore", category= FutureWarning)
warnings.filterwarnings("ignore", category= RuntimeWarning)

In [2]:
import mne
mne.set_log_level("CRITICAL")
import numpy as np
import double_dipper
from double_dipper import dataset, constants, io, ml
from double_dipper.constants import problem, strategy_prompt

In [3]:
from double_dipper.features import chain, time_window, bandpass_filter, psd, psd_bands, flatten_end, dup

In [4]:
def labeller(meta):
    strat = meta["strategy"]
    if strat is None: return None
    if strat.lower().startswith("fact"):        return 0
    elif strat.lower().startswith("procedure"): return 1
    else:                                       return None
divider = lambda meta: meta["epoch"]

In [5]:
def gen_dset(subjNo, split=.7):
    pairs = io.filePairs(f"cleaned/main/{subjNo}")
    dset = io.partition(divider, labeller, pairs)
    keys = sorted(dset.keys())
    X = np.concatenate([dset[k]["x"] for k in keys], axis = 0)
    Y = np.concatenate([dset[k]["y"] for k in keys], axis = 0)
    split_ind = int(len(X) * split)
    (trainX, testX) = (X[:split_ind], X[split_ind:])
    (trainY, testY) = (Y[:split_ind], Y[split_ind:])
    return (trainX, trainY, testX, testY)

In [6]:
SUBJ_NO = 10
(trX, trY, tsX, tsY) = gen_dset(SUBJ_NO, .66)

In [7]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis

In [8]:
from imblearn.over_sampling import ADASYN, SMOTE
def myADASYN():
    return ADASYN(random_state=0, n_jobs=4)
def mySMOTE():
    return SMOTE(random_state=0, n_jobs=4)

## First Pass

In [9]:
feature_selectors = [
    bandpass_filter(1,32),
    chain(bandpass_filter(1,32),psd(1,32, add=True)),
    chain(bandpass_filter(1,32),psd(1,32, add=False)),
]
for i in range(len(feature_selectors)):
    oldFunc = feature_selectors[i]
    feature_selectors[i] = chain(time_window(0,strategy_prompt.delay), oldFunc, flatten_end)

models = [LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis]
resamplers = [None, mySMOTE, myADASYN]
(inds, conf) = ml.grid_search(trX, trY, tsX, tsY, feature_selectors, resamplers, models)

model=0,resampler=0,feature_selector=0: 	precision=0.583, recall=0.500, f1=0.538
New best achieved

model=0,resampler=0,feature_selector=1: 	precision=0.636, recall=0.500, f1=0.560
New best achieved

model=0,resampler=0,feature_selector=2: 	precision=0.429, recall=0.429, f1=0.429

model=0,resampler=1,feature_selector=0: 	precision=0.588, recall=0.714, f1=0.645
New best achieved

model=0,resampler=1,feature_selector=1: 	precision=0.647, recall=0.786, f1=0.710
New best achieved

model=0,resampler=1,feature_selector=2: 	precision=0.429, recall=0.429, f1=0.429

model=0,resampler=2,feature_selector=0: 	precision=0.556, recall=0.714, f1=0.625

model=0,resampler=2,feature_selector=1: 	precision=0.556, recall=0.714, f1=0.625

model=0,resampler=2,feature_selector=2: 	precision=0.400, recall=0.429, f1=0.414

model=1,resampler=0,feature_selector=0: 	precision=0.444, recall=0.857, f1=0.585

model=1,resampler=0,feature_selector=1: 	precision=0.400, recall=0.286, f1=0.333

model=1,resampler=0,featur

The LDA seemed to generally perform better than the QDA, so we'll only look at the former for now.

## Second Pass

In [10]:
feature_selectors = [
    bandpass_filter(1,32),
    chain(bandpass_filter(1,32), psd(1,32, add=True)),
    chain(bandpass_filter(1,32), psd(1,32, add=False)),
    chain(bandpass_filter(1,32),psd(add=True)),
    chain(bandpass_filter(1,32),psd(add=False)),
]

for i in range(len(feature_selectors)):
    oldFunc = feature_selectors[i]
    feature_selectors[i] = chain(time_window(0,strategy_prompt.delay), oldFunc, flatten_end)

models = [LinearDiscriminantAnalysis]
resamplers = [None, mySMOTE, myADASYN]

(inds, conf) = ml.grid_search(trX, trY, tsX, tsY, feature_selectors, resamplers, models)

model=0,resampler=0,feature_selector=0: 	precision=0.583, recall=0.500, f1=0.538
New best achieved

model=0,resampler=0,feature_selector=1: 	precision=0.636, recall=0.500, f1=0.560
New best achieved

model=0,resampler=0,feature_selector=2: 	precision=0.429, recall=0.429, f1=0.429

model=0,resampler=0,feature_selector=3: 	precision=0.636, recall=0.500, f1=0.560

model=0,resampler=0,feature_selector=4: 	precision=0.471, recall=0.571, f1=0.516

model=0,resampler=1,feature_selector=0: 	precision=0.588, recall=0.714, f1=0.645
New best achieved

model=0,resampler=1,feature_selector=1: 	precision=0.647, recall=0.786, f1=0.710
New best achieved

model=0,resampler=1,feature_selector=2: 	precision=0.429, recall=0.429, f1=0.429

model=0,resampler=1,feature_selector=3: 	precision=0.588, recall=0.714, f1=0.645

model=0,resampler=1,feature_selector=4: 	precision=0.500, recall=0.571, f1=0.533

model=0,resampler=2,feature_selector=0: 	precision=0.556, recall=0.714, f1=0.625

model=0,resampler=2,featur

Not restricting the PSD range doesn't make much of a difference, so going forward we restrict the range.

## Third Pass

In [12]:
feature_selectors = [
    chain(
        bandpass_filter(1,32),
        psd(1,32,add=True)
    ),
    chain(
        bandpass_filter(1,32),
        psd(1,32, add=False),
    ),
    chain(
        bandpass_filter(1, 45),
        psd(1, 32, add=True),
    ),
    chain(
        bandpass_filter(1, 45),
        psd(1,32, add=False)
    ),
    chain(
        bandpass_filter(1, 45),
        psd(1, 45, add=True),
    ),
    chain(
        bandpass_filter(1, 45),
        psd(1, 45, add=False)
    )
]
for i in range(len(feature_selectors)):
    oldFunc = feature_selectors[i]
    feature_selectors[i] = chain(time_window(0,strategy_prompt.delay), oldFunc, flatten_end)

models = [LinearDiscriminantAnalysis]
resamplers = [None, mySMOTE, myADASYN]

In [13]:
(inds, conf) = ml.grid_search(trX, trY, tsX, tsY, feature_selectors, resamplers, models)

model=0,resampler=0,feature_selector=0: 	precision=0.636, recall=0.500, f1=0.560
New best achieved

model=0,resampler=0,feature_selector=1: 	precision=0.429, recall=0.429, f1=0.429

model=0,resampler=0,feature_selector=2: 	precision=0.636, recall=0.500, f1=0.560

model=0,resampler=0,feature_selector=3: 	precision=0.429, recall=0.429, f1=0.429

model=0,resampler=0,feature_selector=4: 	precision=0.636, recall=0.500, f1=0.560

model=0,resampler=0,feature_selector=5: 	precision=0.583, recall=0.500, f1=0.538

model=0,resampler=1,feature_selector=0: 	precision=0.647, recall=0.786, f1=0.710
New best achieved

model=0,resampler=1,feature_selector=1: 	precision=0.429, recall=0.429, f1=0.429

model=0,resampler=1,feature_selector=2: 	precision=0.647, recall=0.786, f1=0.710

model=0,resampler=1,feature_selector=3: 	precision=0.429, recall=0.429, f1=0.429

model=0,resampler=1,feature_selector=4: 	precision=0.688, recall=0.786, f1=0.733
New best achieved

model=0,resampler=1,feature_selector=5: 	pre

Across the sampling strategies, temporal + frequency feature selectors (the even-numbered ones) did better than those with just frequency data (the odd-numbered ones). Going forward, we always include temporal data.

Additionally, thus far it appears SMOTE performs slightly better than ADASYN, so we remove that latter. Resampling in general seems to improve performance, so going forward we only use SMOTE.

## Fourth Pass

In [14]:
feature_selectors = [
    chain(bandpass_filter(1,32),psd(1,32, add=True)),
    chain(bandpass_filter(1,45),psd(1,45, add=True)),
    chain(
        bandpass_filter(1,32),
        dup(psd(1,32), psd_bands(), add=True)
    ),
    chain(
        bandpass_filter(1,45),
        dup(psd(1,45), psd_bands(), add=True)
    ),
]
for i in range(len(feature_selectors)):
    oldFunc = feature_selectors[i]
    feature_selectors[i] = chain(time_window(0,strategy_prompt.delay), oldFunc, flatten_end)

models = [LinearDiscriminantAnalysis]
resamplers = [mySMOTE]

In [15]:
(inds, conf) = ml.grid_search(trX, trY, tsX, tsY, feature_selectors, resamplers, models)

model=0,resampler=0,feature_selector=0: 	precision=0.647, recall=0.786, f1=0.710
New best achieved

model=0,resampler=0,feature_selector=1: 	precision=0.688, recall=0.786, f1=0.733
New best achieved

model=0,resampler=0,feature_selector=2: 	precision=0.647, recall=0.786, f1=0.710

model=0,resampler=0,feature_selector=3: 	precision=0.688, recall=0.786, f1=0.733

