# Characterizing Brain Regions Contributing to the Models

### Take the absolute value of the SVM weights

In [None]:
from nibabel import load, save, Nifti1Image
from numpy import absolute

analysis = ['all_conditions','allConds_predAge','negative','neutral','positive']

for a in analysis:
    file = '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/svmweights_%s.nii.gz' % a
    temp_nii = load(file)
    temp_data = temp_nii.get_data()
    new_data = absolute(temp_data)
    new_nii = Nifti1Image(new_data,header=temp_nii.header,affine=temp_nii.affine)
    new_file = '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/svmABSweights_%s.nii.gz' % a
    save(new_nii,new_file)

### Cluster the volumes

In [None]:
from nipype.interfaces.fsl import Cluster

analysis = ['all_conditions','allConds_predAge','negative','neutral','positive']

for a in analysis:
    file = '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/svmABSweights_%s.nii.gz' % a
    cl = Cluster()
    cl.inputs.in_file = file
    cl.inputs.out_localmax_txt_file = 'cluster_stats_%s.txt' % a
    cl.inputs.threshold = 0.000001
    cl.inputs.out_index_file = True
    #cl.inputs.index_file = 'clusters_%s.nii.gz' % a
    cl.run()

### Extract parameter estimates for each cluster
This fuction additionally breaks up clusters that are >1000 voxels to aid in interpretation. It takes the intersection of those clusters with the AAL2 atlas.

In [13]:
def extract_cluster_betas(cluster_index_file, sample_betas, min_clust_size, subject_ids):
    from nibabel import load, save, Nifti1Image
    from pandas import DataFrame, Series
    from numpy import unique, zeros_like, invert
    from nipype.interfaces.fsl.utils import ImageMeants
    from os.path import abspath, basename
    
    subject_ids = sorted(subject_ids)
    sample_data = DataFrame(subject_ids, index=None, columns=['Subject'])
    
    cluster_nifti = load(cluster_index_file)
    cluster_data = cluster_nifti.get_data()
    clusters, cluster_sizes = unique(cluster_data, return_counts=True)
    cluster_sizes = cluster_sizes[clusters>0]
    clusters = clusters[clusters>0]
    ind_filename = basename(cluster_index_file) 
    out_prefix = ind_filename[:-7]
    
    for clust_idx in clusters:
        temp = zeros_like(cluster_data)
        temp[cluster_data==clust_idx] = 1
        temp_nii = Nifti1Image(temp,cluster_nifti.affine)
        temp_file = 'temp_clust_mask.nii.gz'
        save(temp_nii, temp_file)

        eb = ImageMeants()
        eb.inputs.in_file = sample_betas
        eb.inputs.mask = temp_file
        eb.inputs.out_file = 'betas.txt'
        eb.run()
        betas = open('betas.txt').read().splitlines()
        sample_data['clust' + str(clust_idx)] = Series(betas, index=sample_data.index)
    
    sample_data.to_csv(out_prefix+'_extracted_betas.csv')
    extracted_betas_csv = abspath(out_prefix+'_extracted_betas.csv')
    
    print('###### ' + out_prefix + ' #######')
    print('cluster labels: '+str(clusters))
    print('cluster sizes: '+str(cluster_sizes))
    return(extracted_betas_csv)

In [14]:
from glob import glob
from pandas import read_csv
index_files = glob('/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/*_clusters.nii.gz')
sample_betas = '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/featureset.nii.gz'
subject_info = read_csv('/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/featureset_key.csv',index_col=0)

subject_ids = subject_info['subject']
min_clust_size = 0

for clusters in index_files:
    extract_cluster_betas(clusters, sample_betas, min_clust_size, subject_ids)

180802-13:27:40,997 interface DEBUG:
	 in_file_/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/featureset.nii.gz
180802-13:27:40,999 interface DEBUG:
	 mask_temp_clust_mask.nii.gz
180802-13:27:41,0 interface DEBUG:
	 order_1
180802-13:27:41,1 interface DEBUG:
	 out_file_betas.txt
180802-13:28:00,56 interface DEBUG:
	 in_file_/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/featureset.nii.gz
180802-13:28:00,57 interface DEBUG:
	 mask_temp_clust_mask.nii.gz
180802-13:28:00,59 interface DEBUG:
	 order_1
180802-13:28:00,60 interface DEBUG:
	 out_file_betas.txt
180802-13:28:19,211 interface DEBUG:
	 in_file_/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/featureset.nii.gz
180802-13:28:19,212 interface DEBUG:
	 mask_temp_clust_mask.nii.gz
180802-13:28:19,214 interface DEBUG:
	 order_1
180802-13:28:19,215 interface DEBUG:
	 out_file_betas.txt
180802-13:28:39,199 interface DEBUG:
	 in_file_/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/featureset.nii.gz


In [53]:
def get_cluster_peaks(clusters_file, stat_file):
    from nibabel import load, save, Nifti1Image
    from pandas import DataFrame, Series
    from numpy import unique, unravel_index, max
    
    # load up clusters
    clusters_nii = load(clusters_file)
    clusters_data = clusters_nii.get_data()
    cluster_labels, cluster_sizes = unique(clusters_data, return_counts=True)
    cluster_sizes = cluster_sizes[cluster_labels>0]
    cluster_labels = cluster_labels[cluster_labels>0]
    
    # set up dataframe
    cluster_info = DataFrame(columns=['clust_num','peak','num_voxels','X','Y','Z'])
    cluster_info['clust_num'] = Series(cluster_labels,index=None)
    
    for i in range(0,len(cluster_labels)):
        # load up stat image
        stat_nii = load(stat_file)
        stat_data = stat_nii.get_data()
        stat_data[clusters_data!=cluster_labels[i]]=0
        location=unravel_index(stat_data.argmax(), stat_data.shape)
        cluster_info.iloc[i,0]=cluster_labels[i]
        cluster_info.iloc[i,1]=max(stat_data)
        cluster_info.iloc[i,2]=cluster_sizes[i]
        cluster_info.iloc[i,3]=location[0]
        cluster_info.iloc[i,4]=location[1]
        cluster_info.iloc[i,5]=location[2]
    
    out_prefix = clusters_file[:-7]
    cluster_info.to_csv(out_prefix + '_peaks.csv')
    return(cluster_info)
        

In [54]:
cluster_files = ['/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/all_conditions_cluster6_clusters.nii.gz',
                 '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/all_conditions_cluster7_clusters.nii.gz',
                 '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/allCondspredAge_cluster21_clusters.nii.gz',
                 '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/negative_cluster21_clusters.nii.gz',
                 '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/neutral_cluster12_clusters.nii.gz',
                 '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/positive_cluster25_clusters.nii.gz']

stat_files=['/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/svmABSweights_all_conditions.nii.gz',
            '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/svmABSweights_all_conditions.nii.gz',
            '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/svmABSweights_allConds_predAge.nii.gz',
            '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/svmABSweights_negative.nii.gz',
            '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/svmABSweights_neutral.nii.gz',
            '/home/camachocm2/Analysis/KidVid_MVPA/analysis/classifier/clustering/svmABSweights_positive.nii.gz']

for j in range(0,len(cluster_files)):
    get_cluster_peaks(cluster_files[j], stat_files[j])

In [48]:
cluster_info

Unnamed: 0,clust_num,peak,num_voxels,X,Y,Z
0,4102.0,0.0273487,61,31,42,31
1,4202.0,0.0216526,25,22,56,25
2,5022.0,0.0369103,13,32,41,31
3,5202.0,0.0247387,38,19,23,37
4,5302.0,0.0241205,113,17,20,30
5,5402.0,0.0350459,412,19,36,28
6,6212.0,0.018394,73,11,32,47
7,6222.0,0.0343483,90,17,30,47
8,7102.0,0.0238231,81,30,44,34
9,8112.0,0.040559,1567,8,53,34
