In [1]:
###############################################################################
# Download Neurosynth
# -----------------------------------------------------------------------------
# Neurosynth's data files are stored at https://github.com/neurosynth/neurosynth-data.

from nimare import extract
import os
from nimare import io

# The overall procedure may take a while (~10 minutes)
# Download the desired version of Neurosynth from GitHub.
files = extract.fetch_neurosynth(
    data_dir='./neurosynth_data',
    version="7",
    source="abstract",
    vocab="terms",
    overwrite=False,
)
print(files)
neurosynth_db = files[0]

# Convert the files to a Dataset.
neurosynth_dset = io.convert_neurosynth_to_dataset(
    coordinates_file=neurosynth_db["coordinates"],
    metadata_file=neurosynth_db["metadata"],
    annotations_files=neurosynth_db["features"],
)

# Save the Dataset for later use.
neurosynth_dset.save(os.path.join('./neurosynth_data', "neurosynth_dataset.pkl.gz"))

INFO:nimare.extract.utils:Dataset found in ./neurosynth_data/neurosynth

INFO:nimare.extract.extract:Searching for any feature files matching the following criteria: [('source-abstract', 'vocab-terms', 'data-neurosynth', 'version-7')]


Downloading data-neurosynth_version-7_coordinates.tsv.gz
File exists and overwrite is False. Skipping.
Downloading data-neurosynth_version-7_metadata.tsv.gz
File exists and overwrite is False. Skipping.
Downloading data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz
File exists and overwrite is False. Skipping.
Downloading data-neurosynth_version-7_vocab-terms_vocabulary.txt
File exists and overwrite is False. Skipping.
[{'coordinates': '/home/users/ysong30/Documents/Projects/Functional_Decoding_Tutorial/neurosynth_data/neurosynth/data-neurosynth_version-7_coordinates.tsv.gz', 'metadata': '/home/users/ysong30/Documents/Projects/Functional_Decoding_Tutorial/neurosynth_data/neurosynth/data-neurosynth_version-7_metadata.tsv.gz', 'features': [{'features': '/home/users/ysong30/Documents/Projects/Functional_Decoding_Tutorial/neurosynth_data/neurosynth/data-neurosynth_version-7_vocab-terms_source-abstract_type-tfidf_features.npz', 'vocabulary': '/home/users/ysong30/D



In [2]:
# Check how many studies are included in this dataset.
# All coordinates are represented in MNI152 space.
print(neurosynth_dset)

Dataset(14371 experiments, space='mni152_2mm')


In [3]:
# Check which studies are included.
neurosynth_dset.metadata.head()

Unnamed: 0,id,study_id,contrast_id,authors,journal,year,title
0,10022492-1,10022492,1,"Callicott JH, Mattay VS, Bertolino A, Finn K, ...","Cerebral cortex (New York, N.Y. : 1991)",1999,Physiological characteristics of capacity cons...
1,10022494-1,10022494,1,"Toni I, Schluter ND, Josephs O, Friston K, Pas...","Cerebral cortex (New York, N.Y. : 1991)",1999,"Signal-, set- and movement-related activity in..."
2,10022496-1,10022496,1,"Lockwood AH, Salvi RJ, Coad ML, Arnold SA, Wac...","Cerebral cortex (New York, N.Y. : 1991)",1999,The functional anatomy of the normal human aud...
3,10051677-1,10051677,1,"Denton D, Shade R, Zamarippa F, Egan G, Blair-...",Proceedings of the National Academy of Science...,1999,Correlation of regional cerebral blood flow an...
4,10191322-1,10191322,1,"Chee MW, Tan EW, Thiel T",The Journal of neuroscience : the official jou...,1999,Mandarin and English single word processing st...


In [4]:
# The coordinates of activation peaks are extracted for each study.
neurosynth_dset.coordinates.head()

Unnamed: 0,id,study_id,contrast_id,x,y,z,space
1483,10022492-1,10022492,1,36.0,-58.0,52.0,mni152_2mm
1499,10022492-1,10022492,1,48.0,24.0,20.0,mni152_2mm
1498,10022492-1,10022492,1,-42.0,26.0,20.0,mni152_2mm
1497,10022492-1,10022492,1,-36.0,30.0,16.0,mni152_2mm
1496,10022492-1,10022492,1,-30.0,32.0,0.0,mni152_2mm


In [5]:
# This dataset includes annotations for each study using TF-IDF (Term Frequency–Inverse Document Frequency).
neurosynth_dset.annotations.head()

Unnamed: 0,id,study_id,contrast_id,terms_abstract_tfidf__001,terms_abstract_tfidf__01,terms_abstract_tfidf__05,terms_abstract_tfidf__10,terms_abstract_tfidf__100,terms_abstract_tfidf__11,terms_abstract_tfidf__12,...,terms_abstract_tfidf__yield,terms_abstract_tfidf__yielded,terms_abstract_tfidf__young,terms_abstract_tfidf__young adults,terms_abstract_tfidf__young healthy,terms_abstract_tfidf__young older,terms_abstract_tfidf__younger,terms_abstract_tfidf__younger adults,terms_abstract_tfidf__youth,terms_abstract_tfidf__zone
0,10022492-1,10022492,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10022494-1,10022494,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10022496-1,10022496,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,10051677-1,10051677,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10191322-1,10191322,1,0.0,0.0,0.0,0.0,0.0,0.0,0.079103,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
neurosynth_dset.annotations.iloc[:,3210:3220].head()

Unnamed: 0,terms_abstract_tfidf__words,terms_abstract_tfidf__work,terms_abstract_tfidf__working,terms_abstract_tfidf__working memory,terms_abstract_tfidf__world,terms_abstract_tfidf__worse,terms_abstract_tfidf__written,terms_abstract_tfidf__year,terms_abstract_tfidf__year old,terms_abstract_tfidf__years
0,0.0,0.0,0.317592,0.320417,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.151279


In [7]:
# Display cognitive labels for studies (indices 100–109 shown as an example).
neurosynth_dset.annotations.columns.tolist()[100:110]

['terms_abstract_tfidf__actively',
 'terms_abstract_tfidf__activities',
 'terms_abstract_tfidf__acts',
 'terms_abstract_tfidf__actual',
 'terms_abstract_tfidf__actually',
 'terms_abstract_tfidf__acute',
 'terms_abstract_tfidf__ad',
 'terms_abstract_tfidf__adaptation',
 'terms_abstract_tfidf__adapted',
 'terms_abstract_tfidf__adaptive']

In [8]:
###############################################################################
# Download NeuroQuery dataset
# -----------------------------------------------------------------------------
# NeuroQuery's data files are stored at https://github.com/neuroquery/neuroquery_data.

from nimare import extract
import os
from nimare import io

files = extract.fetch_neuroquery(
    data_dir="./neuroquery_data",
    version="1",
    overwrite=False,
    source="combined",
    vocab="neuroquery6308",
    type="tfidf",
)
# Note that the files are saved to a new folder within "out_dir" named "neuroquery".
print(files)
neuroquery_db = files[0]

# Note that the conversion function says "neurosynth".
# This is just for backwards compatibility.
neuroquery_dset = io.convert_neurosynth_to_dataset(
    coordinates_file=neuroquery_db["coordinates"],
    metadata_file=neuroquery_db["metadata"],
    annotations_files=neuroquery_db["features"],
)
neuroquery_dset.save(os.path.join("./neuroquery_data", "neuroquery_dataset.pkl.gz"))

INFO:nimare.extract.utils:Dataset found in ./neuroquery_data/neuroquery

INFO:nimare.extract.extract:Searching for any feature files matching the following criteria: [('source-combined', 'vocab-neuroquery6308', 'type-tfidf', 'data-neuroquery', 'version-1')]


Downloading data-neuroquery_version-1_coordinates.tsv.gz
File exists and overwrite is False. Skipping.
Downloading data-neuroquery_version-1_metadata.tsv.gz
File exists and overwrite is False. Skipping.
Downloading data-neuroquery_version-1_vocab-neuroquery6308_source-combined_type-tfidf_features.npz
File exists and overwrite is False. Skipping.
Downloading data-neuroquery_version-1_vocab-neuroquery6308_vocabulary.txt
File exists and overwrite is False. Skipping.
[{'coordinates': '/home/users/ysong30/Documents/Projects/Functional_Decoding_Tutorial/neuroquery_data/neuroquery/data-neuroquery_version-1_coordinates.tsv.gz', 'metadata': '/home/users/ysong30/Documents/Projects/Functional_Decoding_Tutorial/neuroquery_data/neuroquery/data-neuroquery_version-1_metadata.tsv.gz', 'features': [{'features': '/home/users/ysong30/Documents/Projects/Functional_Decoding_Tutorial/neuroquery_data/neuroquery/data-neuroquery_version-1_vocab-neuroquery6308_source-combined_type-tfidf_features.npz', 'vocabula



In [9]:
# Checks how many studies are included in this dataset.
# All coordinates are represented in MNI152 space.
print(neuroquery_dset)

Dataset(13459 experiments, space='mni152_2mm')


In [10]:
# Display cognitive labels for studies (indices 100–109 shown as an example).
neuroquery_dset.annotations.columns.tolist()[100:110]

['neuroquery6308_combined_tfidf__add',
 'neuroquery6308_combined_tfidf__addiction',
 'neuroquery6308_combined_tfidf__addictive',
 'neuroquery6308_combined_tfidf__addictive behavior',
 'neuroquery6308_combined_tfidf__addition',
 'neuroquery6308_combined_tfidf__adenoma',
 'neuroquery6308_combined_tfidf__adhd',
 'neuroquery6308_combined_tfidf__adherence',
 'neuroquery6308_combined_tfidf__adherence medication',
 'neuroquery6308_combined_tfidf__adherence treatment']