In [1]:
# prepend parent dir to import from local pcsp
import sys
sys.path.insert(0, '../')

In [8]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pcsp
from pcsp import PCSPipeline, ModuleSet, Module, init_args, sep_dicts
from pcsp.pipeline import build_graph
from functools import partial
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, LassoLarsCV, LassoLarsIC
from sklearn.metrics import r2_score, explained_variance_score
from sklearn.model_selection import train_test_split
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.decomposition import PCA

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
ind = {}
def top_n_features(X, Y, n, i):
    if i not in ind:
        corr = np.abs(np.apply_along_axis(lambda x: np.corrcoef(x, Y[:,i])[0,1], 0, X))
        ind[i] = np.argsort(corr[~np.isnan(corr)])[::-1][:n]
    return X[:,ind[i]]

def pca(X, n):
    return PCA(n_components=n, copy=True).fit(X).transform(X)


In [4]:
# load data
X = np.asarray(pd.read_csv("../data/fmri/fit_feat.txt", sep=" ", error_bad_lines=False))
Y = np.asarray(pd.read_csv("../data/fmri/resp_dat.txt", sep=" ", error_bad_lines=False))

In [None]:
np.random.seed(14)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=14)
X_train, X_test, y_train, y_test = init_args((X_train, X_test, y_train, y_test), names=['X_train', 'X_test', 'y_train', 'y_test'])

# split y_train by voxel and extract top 500 correlated features per voxel
voxel_extract_funcs = [partial(lambda x, y, i: (top_n_features(x, y, 20, i), y[:,i]), i=i) for i in range(20)]
voxel_extract_set = ModuleSet(name='voxel_extract', modules=voxel_extract_funcs, output_matching=True)
X_trains, y_trains = sep_dicts(voxel_extract_set(X_train, y_train))
X_tests, y_tests = sep_dicts(voxel_extract_set(X_test, y_test))

In [25]:
# modeling
modeling_set = ModuleSet(name='modeling', modules=[LassoCV()], module_keys=["Lasso"])
modeling_set.fit(X_trains, y_trains)
modeling_set.out

<pcsp.module_set.ModuleSet at 0x7f9fb73c5b20>

In [38]:
preds = modeling_set.predict(X_trains)
for k in preds.keys():
    print(k)

('X_train', 'X_train', 'voxel_extract_0', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_1', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_2', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_3', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_4', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_5', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_6', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_7', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_8', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_9', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_10', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_11', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_12', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_13', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_14', 'y_train', 'Lasso')
('X_train', 'X_train', 'voxel_extract_15', 'y_train', 'Lasso')
('

In [37]:
# evaluate fails to match y_tests key ('X_test', 'y_test', 'voxel_extract_i') with preds key 
# (('X_train', 'y_train', 'voxel_extract_i', 'Lasso'), ('X_test', 'y_test', 'voxel_extract_i'))
hard_metrics_set = ModuleSet(name='hard_metrics', modules=[r2_score, explained_variance_score], module_keys=["R2", "EV"])
hard_metrics = hard_metrics_set.evaluate(y_trains, preds)

for k, v in hard_metrics.items():
    print(f'{k}: {v:.3f}')

('y_train', 'voxel_extract_0', 'X_train', 'X_train', 'voxel_extract_0', 'y_train', 'Lasso', 'R2'): 0.171
('y_train', 'voxel_extract_0', 'X_train', 'X_train', 'voxel_extract_1', 'y_train', 'Lasso', 'R2'): 0.060
('y_train', 'voxel_extract_0', 'X_train', 'X_train', 'voxel_extract_2', 'y_train', 'Lasso', 'R2'): 0.134
('y_train', 'voxel_extract_0', 'X_train', 'X_train', 'voxel_extract_3', 'y_train', 'Lasso', 'R2'): 0.009
('y_train', 'voxel_extract_0', 'X_train', 'X_train', 'voxel_extract_4', 'y_train', 'Lasso', 'R2'): 0.086
('y_train', 'voxel_extract_0', 'X_train', 'X_train', 'voxel_extract_5', 'y_train', 'Lasso', 'R2'): 0.070
('y_train', 'voxel_extract_0', 'X_train', 'X_train', 'voxel_extract_6', 'y_train', 'Lasso', 'R2'): 0.127
('y_train', 'voxel_extract_0', 'X_train', 'X_train', 'voxel_extract_7', 'y_train', 'Lasso', 'R2'): 0.044
('y_train', 'voxel_extract_0', 'X_train', 'X_train', 'voxel_extract_8', 'y_train', 'Lasso', 'R2'): 0.116
('y_train', 'voxel_extract_0', 'X_train', 'X_train', 'v

TypeError: unsupported format string passed to tuple.__format__