In [12]:
import os
import numpy as np
import pandas as pd
import sys
from sklearn import svm
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedGroupKFold, cross_validate, StratifiedKFold
from sklearn.metrics import make_scorer, roc_auc_score
import numpy as np
from sklearn.linear_model import LogisticRegression
from copy import deepcopy
import itertools
import argparse
from os import path as op
from glob import glob

# add path to classification analysis functions
from mixed_sigmoid_normalisation import MixedSigmoidScaler


bids_root = "/Users/abry4213/data/Cogitate_MEG"
n_jobs = 1
subject_ID = "CA103"
SPI_directionality_file = "/Users/abry4213/github/MEG_functional_connectivity/feature_extraction/pyspi_SPI_info.csv"
classification_type = "individual"

# Read in SPI directionality info
SPI_directionality_info = pd.read_csv(SPI_directionality_file)

# Load data paths
pyspi_res_path = f"{bids_root}/derivatives/time_series_features"
pyspi_res_path_averaged = f"{pyspi_res_path}/averaged_epochs"
pyspi_res_path_individual = f"{pyspi_res_path}/individual_epochs"

classification_res_path = f"{bids_root}/derivatives/classification_results"
classification_res_path_averaged = f"{classification_res_path}/across_participants"
classification_res_path_individual = f"{classification_res_path}/within_participants"

# Make classification result directories
os.makedirs(classification_res_path_averaged, exist_ok=True)
os.makedirs(classification_res_path_individual, exist_ok=True)

# Define scoring type
scoring = {'accuracy': 'accuracy',
           'balanced_accuracy': 'balanced_accuracy',
           'AUC': make_scorer(roc_auc_score, needs_proba=True)}

# Define classifier
model = LogisticRegression(penalty='l1', C=1, solver='liblinear', class_weight='balanced', random_state=127)
pipe = Pipeline([('scaler', MixedSigmoidScaler(unit_variance=True)), 
                        ('model', model)])




In [4]:
# meta-ROI comparisons
meta_ROIs = ["Category_Selective", "IPS", "Prefrontal_Cortex", "V1_V2"]
meta_roi_comparisons = list(itertools.permutations(meta_ROIs, 2))

# Load in pyspi results
all_pyspi_res_list = []
# for pyspi_res_file in os.listdir(pyspi_res_path_averaged):
for pyspi_res_file in glob(f"{pyspi_res_path_averaged}/*all_pyspi_results_1000ms.csv"):
    pyspi_res = pd.read_csv(pyspi_res_file)
    # Reset index
    pyspi_res.reset_index(inplace=True, drop=True)
    pyspi_res['stimulus_type'] = pyspi_res['stimulus_type'].replace(False, 'false').replace('False', 'false')
    pyspi_res['relevance_type'] = pyspi_res['relevance_type'].replace("Relevant non-target", "Relevant-non-target")
    # Rename stimulus to stimulus_presentation if it is present
    if 'stimulus' in pyspi_res.columns:
        if 'stimulus_presentation' in pyspi_res.columns:
            pyspi_res.drop(columns=['stimulus'], inplace=True)
        else:
            pyspi_res = pyspi_res.rename(columns={'stimulus': 'stimulus_presentation'})

    all_pyspi_res_list.append(pyspi_res)
all_pyspi_res = pd.concat(all_pyspi_res_list)

In [5]:
# meta-ROI comparisons
meta_ROIs = ["Category_Selective", "IPS", "Prefrontal_Cortex", "V1_V2"]
meta_roi_comparisons = list(itertools.permutations(meta_ROIs, 2))

# Relevance type comparisons
relevance_type_comparisons = ["Relevant-non-target", "Irrelevant"]

# Stimulus presentation comparisons
stimulus_presentation_comparisons = ["on", "off"]

# Stimulus type comparisons
stimulus_types = all_pyspi_res.stimulus_type.unique().tolist()
stimulus_type_comparisons = list(itertools.combinations(stimulus_types, 2))

# Also add in face vs. non-face
stimulus_type_comparisons.append(("face", "non-face"))

In [10]:
all_pyspi_res.relevance_type.unique()

array(['Irrelevant', 'Relevant-non-target', 'Relevant target'],
      dtype=object)

In [32]:
relevance_type = "Relevant-non-target"
stimulus_presentation = "on"
SPI = "cov_EmpiricalCovariance"
meta_roi_comparison = meta_roi_comparisons[0]
this_combo = ("face", "non-face")

ROI_from, ROI_to = meta_roi_comparison
# Finally, we get to the final dataset
roi_pair_wise_dataset_for_classification = (all_pyspi_res.query("meta_ROI_from == @ROI_from & meta_ROI_to == @ROI_to & relevance_type == @relevance_type & stimulus_presentation == @stimulus_presentation")
                                            .reset_index(drop=True)
                                            .drop(columns=['index']))

# Extract this SPI
this_SPI_data = roi_pair_wise_dataset_for_classification.query(f"SPI == '{SPI}'")

# Find overall number of rows
num_rows = this_SPI_data.shape[0]

# Extract SPI values
this_column_data = this_SPI_data["value"]

# Find number of NaN in this column 
num_NaN = this_column_data.isna().sum()
prop_NaN = num_NaN / num_rows

# Find mode and SD
column_mode_max = this_column_data.value_counts().max()
column_SD = this_column_data.std()

# If 0% < num_NaN < 10%, impute by the mean of each component
if 0 < prop_NaN < 0.1:
    values_imputed = (this_column_data
                        .transform(lambda x: x.fillna(x.mean())))

    this_column_data = values_imputed
    print(f"Imputing column values for {SPI}")
    this_SPI_data["value"] = this_column_data

# If there are: 
# - more than 10% NaN values;
# - more than 90% of the values are the same; OR
# - the standard deviation is less than 1*10**(-10)
# then remove the column
if prop_NaN > 0.1 or column_mode_max / num_rows > 0.9 or column_SD < 1*10**(-10):
    print(f"{SPI} has low SD: {column_SD}, and/or too many mode occurences: {column_mode_max} out of {num_rows}, and/or {100*prop_NaN}% NaN")

# Start an empty list for the classification results
SPI_combo_res_list = []

final_dataset_for_classification_this_combo = this_SPI_data.assign(stimulus_type = lambda x: np.where(x.stimulus_type == "face", "face", "non-face"))

# Fit classifier
X = final_dataset_for_classification_this_combo.value.to_numpy().reshape(-1, 1)
y = final_dataset_for_classification_this_combo.stimulus_type.to_numpy().reshape(-1, 1)
groups = final_dataset_for_classification_this_combo.subject_ID.to_numpy().reshape(-1, 1)

# Flatten and convert each item to a string
groups_flat = np.array([str(item[0]) for item in groups])
group_stratified_CV = StratifiedGroupKFold(n_splits = 10, shuffle = True, random_state=127)

# Make a deepcopy of the pipeline
this_iter_pipe = deepcopy(pipe)
this_classifier_res = cross_validate(this_iter_pipe, X, y, groups=groups_flat, cv=group_stratified_CV, scoring=scoring, n_jobs=n_jobs, 
                                            return_estimator=False, return_train_score=False)

# Add the results to the list
this_SPI_combo_df = pd.DataFrame({"SPI": [SPI], 
                                    "meta_ROI_from": [ROI_from],
                                    "meta_ROI_to": [ROI_to],
                                    "relevance_type": [relevance_type],
                                    "stimulus_presentation": [stimulus_presentation],
                                    "stimulus_combo": [this_combo], 
                                    "accuracy": [this_classifier_res['test_accuracy'].mean()],
                                    "balanced_accuracy": [this_classifier_res['test_balanced_accuracy'].mean()],
                                    "AUC": [this_classifier_res['test_AUC'].mean()]})


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [None]:
this_classifier_res = cross_validate(this_iter_pipe, X, y, groups=groups, cv=group_stratified_CV, scoring=scoring, n_jobs=n_jobs, 
                                            return_estimator=False, return_train_score=False)[["test_accuracy", "test_balanced_accuracy", "test_AUC"]].mean()


this_SPI_combo_df = pd.DataFrame({"SPI": [SPI], 
                                    "meta_ROI_from": [ROI_from],
                                    "meta_ROI_to": [ROI_to],
                                    "relevance_type": [relevance_type],
                                    "stimulus_presentation": [stimulus_presentation],
                                    "stimulus_combo": [this_combo], 
                                    "accuracy": [this_classifier_res.test_accuracy],
                                    "balanced_accuracy": [this_classifier_res.test_balanced_accuracy],
                                    "AUC": [this_classifier_res.test_AUC]})
