In [1]:
'''
Load fMRI data
You must first run 'python getdata.py' in the LatentSimilarity directory to get the data
The data is from: https://openneuro.org/datasets/ds004144/versions/1.0.1
We have 66 subjects, 33 of who have fibromyalgia and 33 of who are controls
fMRI is upper triangle of 264x264 functional connectivity based on Power atlas
'''

import pickle

fmriData = None

with open('../data/fmri-FC-slim.pkl', 'rb') as f:
    fmriData = pickle.load(f)
    
list(fmriData.keys())

['FC-slim', 'subjNum2IdxMap', 'subjIdx2NumMap', 'groupsNormalDiagMap']

In [2]:
# Package fMRI data into data matrix and response variables

import numpy as np

keys = list(fmriData['groupsNormalDiagMap'].keys())
y = [fmriData['groupsNormalDiagMap'][key] for key in keys]
y = np.array(y).astype('int')
x = [fmriData['FC-slim'][fmriData['subjNum2IdxMap'][key]] for key in keys]
x = np.stack(x)
print(x.shape)
print(y.shape)

(66, 34716)
(66,)


In [3]:
import sys

if '..' not in sys.path:
    sys.path.append('..')

from latsim import LatSimClf
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

accs = []

for i in range(30):
    xtr, xt, ytr, yt = train_test_split(x, y, stratify=y, train_size=0.8)
    clf = LatSimClf().fit(xtr,ytr,ld=1)
#     clf = LogisticRegression(C=10).fit(xtr,ytr)
    yhat = clf.predict(xt)
    acc = np.sum(yhat == yt)/len(yhat)
    accs.append(acc)
    print(acc)
    
print('---')
print(np.mean(accs))
print(np.std(accs))

0.42857142857142855
0.5
0.5
0.6428571428571429
0.42857142857142855
0.7142857142857143
0.5
0.42857142857142855
0.5714285714285714
0.35714285714285715
0.5714285714285714
0.7142857142857143
0.7142857142857143
0.7857142857142857
0.5714285714285714
0.7142857142857143
0.5
0.7142857142857143
0.6428571428571429
0.5
0.6428571428571429
0.5714285714285714
0.7142857142857143
0.42857142857142855
0.6428571428571429
0.7857142857142857
0.6428571428571429
0.5714285714285714
0.35714285714285715
0.5714285714285714
---
0.580952380952381
0.12056179905878253


In [5]:
import sys

if '..' not in sys.path:
    sys.path.append('..')

from latsim import LatSimClf
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

# parameters = LatSimClf.get_default_distributions()
parameters = {'ld': [1, 10], 'stop': [0, 0.1]}
sim = LatSimClf()
clf = GridSearchCV(sim, parameters, scoring='accuracy')

xtr, xt, ytr, yt = train_test_split(x, y, stratify=y, train_size=0.8)
clf.fit(xtr, ytr)

clf.cv_results_

{'mean_fit_time': array([0.07243547, 0.07250857, 0.07289786, 0.07535329]),
 'std_fit_time': array([0.00698142, 0.00773355, 0.00201003, 0.00799013]),
 'mean_score_time': array([0.00115542, 0.00122228, 0.00122108, 0.00100236]),
 'std_score_time': array([1.61300937e-04, 6.49111122e-05, 6.42021171e-05, 1.48389195e-04]),
 'param_ld': masked_array(data=[1, 1, 10, 10],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_stop': masked_array(data=[0, 0.1, 0, 0.1],
              mask=[False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'ld': 1, 'stop': 0},
  {'ld': 1, 'stop': 0.1},
  {'ld': 10, 'stop': 0},
  {'ld': 10, 'stop': 0.1}],
 'split0_test_score': array([0.45454545, 0.45454545, 0.36363636, 0.54545455]),
 'split1_test_score': array([0.18181818, 0.54545455, 0.45454545, 0.81818182]),
 'split2_test_score': array([0.5, 0.3, 0.3, 0.4]),
 'split3_test_score': array([0.6, 0.5, 0.5, 0.3]),
 'split4_