In [15]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import re
from pathlib import Path
from fmri_decoder.data import DataConfig, ModelConfig, SurfaceData, TimeseriesData
from fmri_decoder.model import MVPA
from fmri_decoder.preprocessing import (
    TimeseriesPreproc,
    TimeseriesSampling,
)
import numpy as np
import pandas as pd
import functools
from sklearn.feature_selection import f_classif
from src.config import N_LAYER, SESSION, DIR_BASE, N_RUN

class Data:
    """File paths to data."""

    def __init__(self, subj, sequence, day, area):
        self.subj = subj
        self.sess = f"{sequence}{SESSION[self.subj][sequence][day]}"
        self.day = day
        self.area = area

    @property
    def surfaces(self):
        """File names of surface geometries."""
        file_ = {}
        for hemi in ["lh", "rh"]:
            file_[hemi] = [
                str(
                    Path(DIR_BASE)
                    / self.subj
                    / "anatomy"
                    / "layer"
                    / f"{hemi}.layer_{i}"
                )
                for i in range(N_LAYER)
            ]
        return file_

    @property
    def labels(self):
        """File names of labels."""
        file_ = {}
        for hemi in ["lh", "rh"]:
            file_[hemi] = [
                str(
                    Path(DIR_BASE)
                    / self.subj
                    / "anatomy"
                    / "label_benson"
                    / f"{hemi}.{self.area}.label"
                ),
                str(
                    Path(DIR_BASE)
                    / self.subj
                    / "anatomy"
                    / "label"
                    / f"{hemi}.fov.label"
                )
            ]
        return file_

    def get_sample_data(self, layer):
        """Load sample data from MVPA analysis."""
        file = (
            Path(DIR_BASE)
            / "paper"
            / "v2.0"
            / "decoding"
            / self.subj
            / self.sess
            / f"{self.area}_bandpass_none"
            / "sample"
            / f"sample_data_{layer}.parquet"
        )
        return file

    @property
    def timeseries(self):
        """File names of fmri time series."""
        if "VASO" in self.sess and "uncorrected" in self.sess:
            sess_ = re.sub("_uncorrected", "", self.sess)
            file_ = [
                str(
                    Path(DIR_BASE)
                    / self.subj
                    / "odc"
                    / sess_
                    / f"Run_{i+1}"
                    / "ubold_upsampled.nii"
                )
                for i in range(N_RUN)
            ]
        elif "VASO" in self.sess:
            file_ = [
                str(
                    Path(DIR_BASE)
                    / self.subj
                    / "odc"
                    / self.sess
                    / f"Run_{i+1}"
                    / "uvaso_upsampled_corrected.nii"
                )
                for i in range(N_RUN)
            ]
        else:
            file_ = [
                str(
                    Path(DIR_BASE)
                    / self.subj
                    / "odc"
                    / self.sess
                    / f"Run_{i+1}"
                    / "udata.nii"
                )
                for i in range(N_RUN)
            ]
        return file_

    @property
    def events(self):
        """File names of condition files."""
        sess_ = (
            re.sub("_uncorrected", "", self.sess)
            if "_uncorrected" in self.sess
            else self.sess
        )
        file_ = [
            str(
                Path(DIR_BASE)
                / self.subj
                / "odc"
                / sess_
                / f"Run_{i+1}"
                / "logfiles"
                / f"{self.subj}_{sess_}_Run{i+1}_odc_Cond.mat"
            )
            for i in range(N_RUN)
        ]
        return file_

    @property
    def deformation(self):
        """File name of coordinate mapping."""
        sess_ = (
            re.sub("_uncorrected", "", self.sess)
            if "_uncorrected" in self.sess
            else self.sess
        )
        file_ = str(
            Path(DIR_BASE)
            / self.subj
            / "deformation"
            / "odc"
            / sess_
            / "source2target.nii.gz"
        )
        return file_


class Univariate:
    """Compute the univariate profile for different number of features."""

    def __init__(self, subj, sess, day, area):
        self.subj = subj
        self.sess = sess
        self.day = day
        self.area = area
        self.data = Data(self.subj, self.sess, self.day, self.area)
        self.label, self.hemi = self.get_label
        self.label_sorted, self.hemi_sorted = zip(*[self.sort_features(i) for i in range(N_LAYER)])

    @property
    @functools.lru_cache()
    def get_label(self):
        """Get label and hemisphere."""
        surf_data = SurfaceData(self.data.surfaces, None, self.data.labels)

        label_left = surf_data.load_label_intersection("lh")
        label_right = surf_data.load_label_intersection("rh")

        hemi = np.zeros(len(label_left) + len(label_right))
        hemi[len(label_left):] = 1
        label = np.append(label_left, label_right)

        return label, hemi

    def sort_features(self, layer):
        """Sort label and hemi array based on features."""
        dtf = pd.read_parquet(self.data.get_sample_data(layer))

        # choose subset of features
        features = dtf.columns[2:]
        
        X = np.array(dtf.loc[:, features])
        y = np.array(dtf.loc[:, "label"])

        f_statistic = f_classif(X, y)[0]
        index = np.arange(len(features))
        index_sorted = np.array(
                    [x for _, x in sorted(zip(f_statistic, index), reverse=True)]
                )

        label_sorted= self.label[index_sorted]
        hemi_sorted = self.hemi[index_sorted]

        return label_sorted, hemi_sorted

In [None]:
# arguments
DIR_OUT = "/data/pt_01880/zzz"
SUBJ = "p1"
SEQ = "GE_EPI"
DAY = 0
AREA = "v1"  # v1, v2, v3, v2a, v2b, v3a or v3b


data = Data(SUBJ, SEQ, DAY, AREA)
config: dict[str, int | float | str | None] = {}
config["TR"] = 3
config["n_skip"] = 2
config["cutoff_sec"] = 270
config["filter_size"] = None
config["nmax"] = 200
config["radius"] = None
config["feature_scaling"] = "standard"
config["sample_scaling"] = None
config["file_series"] = data.timeseries
config["file_events"] = data.events
config["file_layer"] = data.surfaces
config["file_deformation"] = data.deformation
config["file_localizer"] = None
config["file_label"] = data.labels
config["randomize_labels"] = False

# make output directory
dir_out = Path(DIR_OUT)
dir_out.mkdir(parents=True, exist_ok=True)

dir_sample = dir_out / "sample"
dir_label = dir_out / "label"
dir_model = dir_out / "model"

# load data
time_data = TimeseriesData.from_dict(config)
surf_data = SurfaceData.from_dict(config)
config_data = DataConfig.from_dict(config)
config_model = ModelConfig.from_dict(config)

In [29]:
mid = 5
univariate = Univariate(SUBJ, SEQ, DAY, AREA)
features_selected = {"hemi": univariate.hemi_sorted[mid][:config_model.nmax], "label": univariate.label_sorted[mid][:config_model.nmax]}

200

In [3]:
# timeseries preprocessing
preproc = TimeseriesPreproc.from_dict(config)
# detrend time series
_ = preproc.detrend_timeseries(config_data.tr, config_data.cutoff_sec)
# crop time series
data_vol, events = preproc.crop_data(config_data.n_skip)

Detrend timeseries ...
Crop timeseries ...


(10, 66)

In [None]:
# iterate over surfaces (layers)
n_surf = len(surf_data.file_layer["lh"])
for i in range(n_surf):
    data_sampled = {}
    for hemi in ["lh", "rh"]:
        vtx, fac = surf_data.load_layer(hemi, i)
        sampler = TimeseriesSampling(vtx, fac, data_vol)
        # sample time series
        file_deformation = config_data.file_deformation
        file_reference = time_data.file_series[0]
        data_sampled[hemi] = sampler.sample_timeseries(file_deformation, file_reference)

    if surf_data.file_localizer is not None:
        mvpa = MVPA.from_selected_data(data_sampled, features_selected, events)
    else:
        for hemi in ["lh", "rh"]:
            label = surf_data.load_label_intersection(hemi)
            data_sampled[hemi] = [
                data_sampled[hemi][x][label, :] for x in range(len(data_sampled[hemi]))
            ]
        mvpa = MVPA.from_data(
            data_sampled, events, nmax=config_model.nmax, remove_nan=True
        )

    # model preparation and fitting
    # scaling
    if config_model.feature_scaling:
        mvpa.scale_features(config_model.feature_scaling)
    if config_model.sample_scaling:
        mvpa.scale_samples(config_model.sample_scaling)
    _ = mvpa.evaluate

    # save results
    mvpa.save_results(dir_out / "accuracy.csv", "accuracy")
    mvpa.save_results(dir_out / "sensitivity.csv", "sensitivity")
    mvpa.save_results(dir_out / "specificity.csv", "specificity")
    mvpa.save_results(dir_out / "f1.csv", "f1")

print("Done.")
