In [1]:
# prepend parent dir to import from local pcsp
import sys
sys.path.insert(0, '../')

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pcsp
from pcsp import PCSPipeline, ModuleSet, Module, init_args, convert
from pcsp.pipeline import build_graph
from functools import partial
from sklearn.linear_model import LogisticRegression, Lasso, LassoCV, LassoLarsCV, LassoLarsIC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
import sklearn.utils
from sklearn.metrics import accuracy_score, balanced_accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
from irf import irf_utils, irf_jupyter_utils
from irf.irf_utils import run_iRF
from irf.ensemble import RandomForestClassifierWithWeights
from sklearn.inspection import permutation_importance
from scipy import stats

In [2]:
def top_n_features(X, Y, n):
    corr = np.apply_along_axis(lambda x: np.corrcoef(x, Y)[0,1], 0, X)
    ind = np.argsort(corr[~np.isnan(corr)])[::-1][:n]
    return X[:,ind]

In [15]:
# load data
X = np.asarray(pd.read_csv("../data/fmri/fit_feat.txt", sep=" ", error_bad_lines=False))
Y = np.asarray(pd.read_csv("../data/fmri/resp_dat.txt", sep=" ", error_bad_lines=False))
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=14)

In [None]:
np.random.seed(14)
X_train, X_test, y_train, y_test = init_args((X_train, X_test, y_train, y_test), names=['X_train', 'X_test', 'y_train', 'y_test'])

# split y_train by voxel and extract top 500 correlated features per voxel
voxel_extract_funcs = [partial(lambda x, y, i: (top_n_features(x, y[:,i], 500), y[:,i]), i=i) for i in range(3)]
voxel_extract_set = ModuleSet(name='voxel_extract', modules=voxel_extract_funcs)
X_trains, y_trains = voxel_extract_set(X_train, y_train)
X_tests, y_tests = voxel_extract_set(X_test, y_test)

In [None]:
# modeling
modeling_set = ModuleSet(name='modeling', modules=[LassoCV()], module_keys=["Lasso"])
modeling_set.fit(X_trains, y_trains)

In [23]:
# predict matches any X_tests key ('X_test', 'y_test', 'voxel_extract_i') with 
# all modeling_set.out keys ('X_train', 'y_train', 'voxel_extract_j', 'Lasso')
preds = modeling_set.predict(X_tests)

In [25]:
# evaluate fails to match y_tests key ('X_test', 'y_test', 'voxel_extract_i') with preds key 
# ((('X_train', 'y_train', 'voxel_extract_i'), ('X_train', 'y_train', 'voxel_extract_i'), 'Lasso'), ('X_test', 'y_test', 'voxel_extract_i'))
hard_metrics_set = ModuleSet(name='hard_metrics', modules=[accuracy_score], module_keys=["Acc"])
hard_metrics = hard_metrics_set.evaluate(preds, y_tests)