In [2]:
'''
Load fMRI data
The data is from: https://openneuro.org/datasets/ds004144/versions/1.0.1
We have 66 subjects, 33 of who have fibromyalgia and 33 of who are controls
fMRI is upper triangle of 264x264 functional connectivity based on Power atlas
'''

import pickle

data = None

with open('../data/fibromyalgia-data.pkl', 'rb') as f:
    data = pickle.load(f)
    
list(data.keys())

['subs', 'fibromyalgia', 'fibromyalgia_score', 'rest', 'epr']

In [9]:
# Package fMRI data into data matrix and response variables

import numpy as np

x = []
y = []

for task in ['rest', 'epr']:
    for k,v in data[task].items():
        x.append(v)
        y.append(int(data['fibromyalgia'][k]))
    
x = np.stack(x)
y = np.array(y)
    
print(x.shape)
print(y.shape)

(131, 34716)
(131,)


In [23]:
# We should see similar results for LatSim and LogisticRegression
# Except LatSim is much faster

import sys

if '..' not in sys.path:
    sys.path.append('..')

from latsim import LatSimClf
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

def rmse(yhat, y):
    return np.mean((yhat-y)**2)**0.5

accs = []

for i in range(30):
    xtr, xt, ytr, yt = train_test_split(x, y, train_size=0.8)
    clf = LatSimClf().fit(xtr,ytr)
#     clf = LogisticRegression(C=1).fit(xtr,ytr)
    yhat = clf.predict(xt)
    acc = np.mean(yhat == yt)
    accs.append(acc)
    print(acc)
    
print('---')
print(np.mean(accs))
print(np.std(accs))

0.6666666666666666
0.8148148148148148
0.9259259259259259
0.7037037037037037
0.7777777777777778
0.7037037037037037
0.6666666666666666
0.8888888888888888
0.7037037037037037
0.6666666666666666
0.7777777777777778
0.7407407407407407
0.7777777777777778
0.5925925925925926
0.7777777777777778
0.7407407407407407
0.6666666666666666
0.6666666666666666
0.7407407407407407
0.7407407407407407
0.7407407407407407
0.7407407407407407
0.8148148148148148
0.7407407407407407
0.7037037037037037
0.8148148148148148
0.8148148148148148
0.6666666666666666
0.7407407407407407
0.7777777777777778
---
0.7432098765432098
0.06957532248694011


In [13]:
import sys

if '..' not in sys.path:
    sys.path.append('..')

from latsim import LatSimClf
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

# parameters = LatSimClf.get_default_distributions()
parameters = {'ld': [1, 10], 'stop': [0, 0.1]}
sim = LatSimClf()
clf = GridSearchCV(sim, parameters, scoring='accuracy')

xtr, xt, ytr, yt = train_test_split(x, y, stratify=y, train_size=0.8)
clf.fit(xtr, ytr)

clf.cv_results_

{'mean_fit_time': array([0.09296627, 0.06823435, 0.08515639, 0.08539343]),
 'std_fit_time': array([0.01422698, 0.00032864, 0.00120074, 0.00217892]),
 'mean_score_time': array([0.00234475, 0.00198679, 0.00194292, 0.00179815]),
 'std_score_time': array([3.28873471e-04, 4.73764534e-05, 5.63714299e-05, 1.12559833e-04]),
 'param_ld': masked_array(data=[1, 1, 10, 10],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_stop': masked_array(data=[0, 0.1, 0, 0.1],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'ld': 1, 'stop': 0},
  {'ld': 1, 'stop': 0.1},
  {'ld': 10, 'stop': 0},
  {'ld': 10, 'stop': 0.1}],
 'split0_test_score': array([0.85714286, 0.76190476, 0.85714286, 0.85714286]),
 'split1_test_score': array([0.66666667, 0.66666667, 0.71428571, 0.66666667]),
 'split2_test_score': array([0.61904762, 0.66666667, 0.61904762, 0.66666667]),
 'split3_test_score': array([0.5