-
Notifications
You must be signed in to change notification settings - Fork 0
/
music_feature_searchlight.py
322 lines (268 loc) · 13.6 KB
/
music_feature_searchlight.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
import numpy as np
import brainiak.eventseg.event
from scipy.stats import norm,zscore,pearsonr,stats
from nilearn.image import load_img
import sys
from brainiak.funcalign.srm import SRM
import nibabel as nib
import os
from scipy.spatial import distance
from sklearn import linear_model
from scipy.spatial.distance import squareform, pdist
import logging
from scipy.fftpack import fft, ifft
logger = logging.getLogger(__name__)
datadir = '/tigress/jamalw/MES/'
subjs = ['MES_022817_0','MES_030217_0','MES_032117_1','MES_040217_0','MES_041117_0','MES_041217_0','MES_041317_0','MES_041417_0','MES_041517_0','MES_042017_0','MES_042317_0','MES_042717_0','MES_050317_0','MES_051317_0','MES_051917_0','MES_052017_0','MES_052017_1','MES_052317_0','MES_052517_0','MES_052617_0','MES_052817_0','MES_052817_1','MES_053117_0','MES_060117_0','MES_060117_1']
# run 1 times
#song_bounds = np.array([0,225,314,494,628,718,898,1032,1122,1301,1436,1660,1749,1973, 2198,2377,2511])
#songs = ['Finlandia', 'Blue_Monk', 'I_Love_Music','Waltz_of_Flowers','Capriccio_Espagnole','Island','All_Blues','St_Pauls_Suite','Moonlight_Sonata','Symphony_Fantastique','Allegro_Moderato','Change_of_the_Guard','Boogie_Stop_Shuffle','My_Favorite_Things','The_Bird','Early_Summer']
#song_features = np.load(datadir + 'prototype/link/scripts/data/searchlight_input/mfccRun1_hrf.npy')
# run 2 times
song_bounds = np.array([0,90,270,449,538,672,851,1031,1255,1480,1614,1704,1839,2063,2288,2377,2511])
songs = ['St_Pauls_Suite', 'I_Love_Music', 'Moonlight_Sonata', 'Change_of_the_Guard','Waltz_of_Flowers','The_Bird', 'Island', 'Allegro_Moderato', 'Finlandia', 'Early_Summer', 'Capriccio_Espagnole', 'Symphony_Fantastique', 'Boogie_Stop_Shuffle', 'My_Favorite_Things', 'Blue_Monk','All_Blues']
song_features = np.load(datadir + 'prototype/link/scripts/data/searchlight_input/chromaRun2_hrf.npy')
song_idx = int(sys.argv[1])
srm_k = 30
hrf = 5
mask_img = load_img(datadir + 'data/mask_nonan.nii.gz')
mask = mask_img.get_data()
mask_reshape = np.reshape(mask,(91*109*91))
def _check_timeseries_input(data):
"""Checks response time series input data (e.g., for ISC analysis)
Input data should be a n_TRs by n_voxels by n_subjects ndarray
(e.g., brainiak.image.MaskedMultiSubjectData) or a list where each
item is a n_TRs by n_voxels ndarray for a given subject. Multiple
input ndarrays must be the same shape. If a 2D array is supplied,
the last dimension is assumed to correspond to subjects. This
function is generally intended to be used internally by other
functions module (e.g., isc, isfc in brainiak.isc).
Parameters
----------
data : ndarray or list
Time series data
Returns
-------
data : ndarray
Input time series data with standardized structure
n_TRs : int
Number of time points (TRs)
n_voxels : int
Number of voxels (or ROIs)
n_subjects : int
Number of subjects
"""
# Convert list input to 3d and check shapes
if type(data) == list:
data_shape = data[0].shape
for i, d in enumerate(data):
if d.shape != data_shape:
raise ValueError("All ndarrays in input list "
"must be the same shape!")
if d.ndim == 1:
data[i] = d[:, np.newaxis]
data = np.dstack(data)
# Convert input ndarray to 3d and check shape
elif isinstance(data, np.ndarray):
if data.ndim == 2:
data = data[:, np.newaxis, :]
elif data.ndim == 3:
pass
else:
raise ValueError("Input ndarray should have 2 "
"or 3 dimensions (got {0})!".format(data.ndim))
# Infer subjects, TRs, voxels and log for user to check
n_TRs, n_voxels, n_subjects = data.shape
logger.info("Assuming {0} subjects with {1} time points "
"and {2} voxel(s) or ROI(s) for ISC analysis.".format(
n_subjects, n_TRs, n_voxels))
return data, n_TRs, n_voxels, n_subjects
def phase_randomize(data, voxelwise=False, random_state=None):
"""Randomize phase of time series across subjects
For each subject, apply Fourier transform to voxel time series
and then randomly shift the phase of each frequency before inverting
back into the time domain. This yields time series with the same power
spectrum (and thus the same autocorrelation) as the original time series
but will remove any meaningful temporal relationships among time series
across subjects. By default (voxelwise=False), the same phase shift is
applied across all voxels; however if voxelwise=True, different random
phase shifts are applied to each voxel. The typical input is a time by
voxels by subjects ndarray. The first dimension is assumed to be the
time dimension and will be phase randomized. If a 2-dimensional ndarray
is provided, the last dimension is assumed to be subjects, and different
phase randomizations will be applied to each subject.
The implementation is based on the work in [Lerner2011]_ and
[Simony2016]_.
Parameters
----------
data : ndarray (n_TRs x n_voxels x n_subjects)
Data to be phase randomized (per subject)
voxelwise : bool, default: False
Apply same (False) or different (True) randomizations across voxels
random_state : RandomState or an int seed (0 by default)
A random number generator instance to define the state of the
random permutations generator.
Returns
----------
shifted_data : ndarray (n_TRs x n_voxels x n_subjects)
Phase-randomized time series
"""
# Check if input is 2-dimensional
data_ndim = data.ndim
# Get basic shape of data
data, n_TRs, n_voxels, n_subjects = _check_timeseries_input(data)
# Random seed to be deterministically re-randomized at each iteration
if isinstance(random_state, np.random.RandomState):
prng = random_state
else:
prng = np.random.RandomState(random_state)
# Get randomized phase shifts
if n_TRs % 2 == 0:
# Why are we indexing from 1 not zero here? n_TRs / -1 long?
pos_freq = np.arange(1, data.shape[0] // 2)
neg_freq = np.arange(data.shape[0] - 1, data.shape[0] // 2, -1)
else:
pos_freq = np.arange(1, (data.shape[0] - 1) // 2 + 1)
neg_freq = np.arange(data.shape[0] - 1,
(data.shape[0] - 1) // 2, -1)
if not voxelwise:
phase_shifts = (prng.rand(len(pos_freq), 1, n_subjects)
* 2 * np.math.pi)
else:
phase_shifts = (prng.rand(len(pos_freq), n_voxels, n_subjects)
* 2 * np.math.pi)
# Fast Fourier transform along time dimension of data
fft_data = fft(data, axis=0)
# Shift pos and neg frequencies symmetrically, to keep signal real
fft_data[pos_freq, :, :] *= np.exp(1j * phase_shifts)
fft_data[neg_freq, :, :] *= np.exp(-1j * phase_shifts)
# Inverse FFT to put data back in time domain
shifted_data = np.real(ifft(fft_data, axis=0))
# Go back to 2-dimensions if input was 2-dimensional
if data_ndim == 2:
shifted_data = shifted_data[:, 0, :]
return shifted_data
def searchlight(coords,song_features,mask,subjs,song_idx,song_bounds,srm_k,hrf):
"""run searchlight
Create searchlight object and perform voxel function at each searchlight location
Parameters
----------
data1 : voxel by time ndarray (2D); leftout subject run 1
data2 : voxel by time ndarray (2D); average of others run 1
data3 : voxel by time ndarray (2D); leftout subject run 2
data4 : voxel by time ndarray (2D); average of others run 2
coords : voxel by xyz ndarray (2D, Vx3)
K : # of events for HMM (scalar)
Returns
-------
3D data: brain (or ROI) filled with searchlight function scores (3D)
"""
stride = 5
radius = 5
min_vox = srm_k
nPerm = 1000
SL_allvox = []
SL_results = []
datadir = '/tigress/jamalw/MES/prototype/link/scripts/data/searchlight_input/'
for x in range(0,np.max(coords, axis=0)[0]+stride,stride):
for y in range(0,np.max(coords, axis=0)[1]+stride,stride):
for z in range(0,np.max(coords, axis=0)[2]+stride,stride):
if not os.path.isfile(datadir + subjs[0] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy'):
continue
D = distance.cdist(coords,np.array([x,y,z]).reshape((1,3)))[:,0]
SL_vox = D <= radius
data = []
for i in range(len(subjs)):
subj_data = np.load(datadir + subjs[i] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy')
subj_regs = np.genfromtxt(datadir + subjs[i] + '/EPI_mcf1.par')
motion = subj_regs.T
regr = linear_model.LinearRegression()
regr.fit(motion[:,0:2511].T,subj_data[:,:,0].T)
subj_data1 = subj_data[:,:,0] - np.dot(regr.coef_, motion[:,0:2511]) - regr.intercept_[:, np.newaxis]
data.append(np.nan_to_num(stats.zscore(subj_data1,axis=1,ddof=1)))
for i in range(len(subjs)):
subj_data = np.load(datadir + subjs[i] + '/' + str(x) + '_' + str(y) + '_' + str(z) + '.npy')
subj_regs = np.genfromtxt(datadir + subjs[i] + '/EPI_mcf2.par')
motion = subj_regs.T
regr = linear_model.LinearRegression()
regr.fit(motion[:,0:2511].T,subj_data[:,:,1].T)
subj_data2 = subj_data[:,:,1] - np.dot(regr.coef_, motion[:,0:2511]) - regr.intercept_[:, np.newaxis]
data.append(np.nan_to_num(stats.zscore(subj_data2,axis=1,ddof=1)))
print("Running Searchlight")
# only run function on searchlights with voxels greater than or equal to min_vox
if data[0].shape[0] >= min_vox:
SL_match = RSA(data,song_features,song_idx,song_bounds,srm_k,hrf)
SL_results.append(SL_match)
SL_allvox.append(np.array(np.nonzero(SL_vox)[0]))
voxmean = np.zeros((coords.shape[0], nPerm+1))
vox_SLcount = np.zeros(coords.shape[0])
for sl in range(len(SL_results)):
voxmean[SL_allvox[sl],:] += SL_results[sl]
vox_SLcount[SL_allvox[sl]] += 1
voxmean = voxmean / vox_SLcount[:,np.newaxis]
vox_z = np.zeros((coords.shape[0], nPerm+1))
for p in range(nPerm+1):
vox_z[:,p] = (voxmean[:,p] - np.mean(voxmean[:,1:],axis=1))/np.std(voxmean[:,1:],axis=1)
return vox_z,voxmean
def RSA(X,song_features,song_idx,song_bounds,srm_k,hrf):
"""compute RSA for music features and neural data
compute similarity matrices for music and neural features then compute RSA between the two
Parameters
----------
A: voxel by time ndarray (2D)
B: voxel by time ndarray (2D)
C: voxel by time ndarray (2D)
D: voxel by time ndarray (2D)
K: # of events for HMM (scalar)
Returns
-------
z: z-score after permuting music feature vectors
"""
nPerm = 1000
run1 = [X[i] for i in np.arange(0, int(len(X)/2))]
run2 = [X[i] for i in np.arange(int(len(X)/2), len(X))]
print('Building Model')
srm = SRM(n_iter=10, features=srm_k)
print('Training Model')
srm.fit(run1)
print('Testing Model')
shared_data = srm.transform(run2)
shared_data = stats.zscore(np.dstack(shared_data),axis=1,ddof=1)
data = np.mean(shared_data[:,song_bounds[song_idx]:song_bounds[song_idx + 1]],axis=2)
song_features = song_features[:,song_bounds[song_idx]:song_bounds[song_idx + 1]]
songCorr = np.corrcoef(song_features.T)
songVec = squareform(songCorr, checks=False)
rsa_scores = np.zeros(nPerm+1)
voxCorr = np.corrcoef(data.T)
voxVec = squareform(voxCorr,checks=False)
perm_features = data.copy()
for p in range(nPerm+1):
spearmanCorr = stats.spearmanr(voxVec,songVec,nan_policy='omit')
rsa_scores[p] = spearmanCorr[0]
vox_features = phase_randomize(perm_features.T, voxelwise=False, random_state=p).T
voxCorr = np.corrcoef(vox_features.T)
voxVec = squareform(voxCorr, checks=False)
return rsa_scores
# initialize data stores
global_outputs_all = np.zeros((91,109,91))
results3d = np.zeros((91,109,91))
results3d_real = np.zeros((91,109,91))
results3d_perms = np.zeros((91,109,91,1001))
# create coords matrix
x,y,z = np.mgrid[[slice(dm) for dm in tuple((91,109,91))]]
x = np.reshape(x,(x.shape[0]*x.shape[1]*x.shape[2]))
y = np.reshape(y,(y.shape[0]*y.shape[1]*y.shape[2]))
z = np.reshape(z,(z.shape[0]*z.shape[1]*z.shape[2]))
coords = np.vstack((x,y,z)).T
coords_mask = coords[mask_reshape>0]
print('Running Distribute...')
voxmean,real_sl_scores = searchlight(coords_mask,song_features,mask,subjs,song_idx,song_bounds,srm_k,hrf)
results3d[mask>0] = voxmean[:,0]
results3d_real[mask>0] = real_sl_scores[:,0]
for j in range(voxmean.shape[1]):
results3d_perms[mask>0,j] = voxmean[:,j]
print('Saving data to Searchlight Folder')
print(songs[song_idx])
np.save('/tigress/jamalw/MES/prototype/link/scripts/data/searchlight_output/music_features/' + songs[song_idx] +'/chroma/raw/globals_raw_srm_k_' + str(srm_k) + '_fit_run1_psVox', results3d_real)
np.save('/tigress/jamalw/MES/prototype/link/scripts/data/searchlight_output/music_features/' + songs[song_idx] +'/chroma/zscores/globals_z_srm_k' + str(srm_k) + '_fit_run1_psVox', results3d)
np.save('/tigress/jamalw/MES/prototype/link/scripts/data/searchlight_output/music_features/' + songs[song_idx] +'/chroma/perms/globals_z_srm_k' + str(srm_k) + '_fit_run1_psVox', results3d_perms)