#### Stimulus decoding

In [1]:
import sys

sys.path.insert(1, "/home/vinicius/storage1/projects/GrayData-Analysis")

In [33]:
import argparse
import os

import numpy as np
import pandas as pd
from frites.dataset import DatasetEphy
from frites.estimator import GCMIEstimator
from frites.workflow import WfMi
from tqdm import tqdm

from config import sessions
from GDa.temporal_network import temporal_network
from GDa.util import _extract_roi, average_stages

In [3]:
def remove_same_roi(df):

    rois = df.roi.values
    roi_s, roi_t = _extract_roi(rois, "-")
    return df.iloc[~(roi_s == roi_t), :]


def remove_sca(df):

    sca = ["Caudate", "Claustrum", "Thal", "Putamen"]
    roi_s, roi_t = _extract_roi(df.roi.values, "-")
    idx = np.logical_or([s in sca for s in roi_s], [t in sca for t in roi_t])
    return df.iloc[~idx, :]

In [4]:
metric = "coh"
avg = 1
surr = 0

In [5]:
_ROOT = os.path.expanduser("~/funcog/gda")

In [7]:
if not surr:
    coh_file = f"{metric}_at_cue.nc"
    coh_sig_file = f"thr_{metric}_at_cue_surr.nc"
else:
    coh_file = f"{metric}_at_cue_surr.nc"
    coh_sig_file = None

coh = []
stim = []
for s_id in tqdm(["141024"]):
    net = temporal_network(
        coh_file=coh_file,
        coh_sig_file=coh_sig_file,
        wt=None,
        date=s_id,
        trial_type=[1],
        behavioral_response=[1],
    )
    # Average if needed
    out = average_stages(net.super_tensor, avg)
    # out = out.assign_coords(coords={"trials": net.super_tensor.stim.astype(int)})
    roi_s, roi_t = _extract_roi(out.roi.values, "-")
    out = out.isel(roi=np.logical_or(roi_s == "V1", roi_t == "V1"))
    trials, stim = net.super_tensor.trials.data, net.super_tensor.stim
    # To save memory
    del net
    # Convert to format required by the MI workflow
    # coh += [out.isel(roi=[r])
    #        for r in range(len(out['roi']))]
    # stim += [out.attrs["stim"].astype(int)] \
    #    * len(out['roi'])

100%|██████████████████████████████████████████████████████████████████████████████████████████| 1/1 [06:13<00:00, 373.44s/it]


In [9]:
df = out.sel(freqs=slice(25, 40)).to_dataframe("coh").reset_index()

In [10]:
df = remove_same_roi(remove_sca(df))

In [11]:
df.loc[df.times == 0]

Unnamed: 0,trials,roi,freqs,times,coh
0,1,a46D-V1,27.0,0,0.000000
5,1,a46D-V1,35.0,0,0.018711
10,1,a46D-V1,27.0,0,0.000000
15,1,a46D-V1,35.0,0,0.000000
20,1,a46D-V1,27.0,0,0.000000
...,...,...,...,...,...
10004195,954,V1-V2,35.0,0,0.045517
10004200,954,V1-V2,27.0,0,0.000067
10004205,954,V1-V2,35.0,0,0.119106
10004210,954,V1-V2,27.0,0,0.024024


In [13]:
df

Unnamed: 0,trials,roi,freqs,times,coh
0,1,a46D-V1,27.0,0,0.000000
1,1,a46D-V1,27.0,1,0.002524
2,1,a46D-V1,27.0,2,0.000000
3,1,a46D-V1,27.0,3,0.011936
4,1,a46D-V1,27.0,4,0.000000
...,...,...,...,...,...
10004215,954,V1-V2,35.0,0,0.049435
10004216,954,V1-V2,35.0,1,0.005216
10004217,954,V1-V2,35.0,2,0.008371
10004218,954,V1-V2,35.0,3,0.005777


In [15]:
trial_to_stim = dict(zip(trials, stim.astype(int)))

In [48]:
df = []
for time in tqdm(range(5)):
    df += [
        out.sel(times=time, freqs=slice(25, 40))
        .to_dataframe(f"coh_{time}")
        .reset_index()
    ]

100%|███████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00,  6.80it/s]


In [49]:
df = pd.concat(df, axis=1)
df = df.loc[:, ~df.columns.duplicated()].copy()

In [52]:
df["trials"] = df["trials"].map(trial_to_stim)

In [57]:
df = remove_same_roi(remove_sca(df))

In [58]:
df

Unnamed: 0,trials,roi,freqs,coh_0,coh_1,coh_2,coh_3,coh_4
0,5,a46D-V1,27.0,0.000000,0.002524,0.000000,0.011936,0.000000
1,5,a46D-V1,35.0,0.018711,0.083652,0.013918,0.000000,0.000000
2,5,a46D-V1,27.0,0.000000,0.012004,0.000000,0.017627,0.000000
3,5,a46D-V1,35.0,0.000000,0.024961,0.000000,0.013555,0.013785
4,5,a46D-V1,27.0,0.000000,0.000000,0.000000,0.020997,0.000000
...,...,...,...,...,...,...,...,...
2000839,1,V1-V2,35.0,0.045517,0.001244,0.000000,0.073931,0.000000
2000840,1,V1-V2,27.0,0.000067,0.002325,0.000000,0.014647,0.026296
2000841,1,V1-V2,35.0,0.119106,0.000000,0.000000,0.000000,0.000000
2000842,1,V1-V2,27.0,0.024024,0.068390,0.115423,0.000000,0.000000


### Classifier

In [61]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
from sklearn.preprocessing import OneHotEncoder

In [62]:
X = df.loc[:, ["coh_0", "coh_1", "coh_2", "coh_3", "coh_4"]].values
y = df["trials"].values

In [64]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=True)

In [None]:
max_depths = np.arange(1, 21, 1, dtype=int)
n_estimators = [100, 150, 200]

parameters = {
    "max_depth": max_depths,
    "n_estimators": n_estimators,
    "bootstrap": [True],
    "ccp_alpha": [0, 0.1, 0.2],
}

est = RandomForestClassifier(n_jobs=1, random_state=0, max_features=4)

clf = GridSearchCV(
    estimator=est,
    param_grid=parameters,
    cv=5,
    scoring="accuracy",
    n_jobs=-1,
    return_train_score=True,
    verbose=3,
)

clf.fit(x_train, y_train)

Fitting 5 folds for each of 180 candidates, totalling 900 fits


In [None]:
clf.best_params_

In [None]:
est = RandomForestClassifier(
    **clf.best_params_,
    n_jobs=-1,
)

In [None]:
cv_scores = cross_val_score(est, x_test, y_test, cv=5, n_jobs=-1, scoring="accuracy")

In [None]:
print((cv_scores.mean(), cv_scores.std()))