<a href="https://colab.research.google.com/github/faizankshaikh/evaluating-deeplight-transfer/blob/master/experiments/data_preprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# write table of contents and explain

In [1]:
#@title Connect drive to load dataset

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#@title Install data preprocessing library

!git clone https://github.com/athms/evaluating-deeplight-transfer.git

%cd evaluating-deeplight-transfer/

!pip install -q poetry

Cloning into 'evaluating-deeplight-transfer'...
remote: Enumerating objects: 526, done.[K
remote: Total 526 (delta 0), reused 0 (delta 0), pack-reused 526[K
Receiving objects: 100% (526/526), 12.07 MiB | 16.09 MiB/s, done.
Resolving deltas: 100% (304/304), done.
/content/evaluating-deeplight-transfer
[K     |████████████████████████████████| 175 kB 9.8 MB/s 
[K     |████████████████████████████████| 91 kB 10.4 MB/s 
[K     |████████████████████████████████| 425 kB 19.8 MB/s 
[K     |████████████████████████████████| 54 kB 2.9 MB/s 
[K     |████████████████████████████████| 8.8 MB 48.9 MB/s 
[K     |████████████████████████████████| 40 kB 6.3 MB/s 
[K     |████████████████████████████████| 58 kB 6.1 MB/s 
[K     |████████████████████████████████| 58 kB 6.1 MB/s 
[K     |████████████████████████████████| 48 kB 4.7 MB/s 
[K     |████████████████████████████████| 4.0 MB 37.5 MB/s 
[K     |████████████████████████████████| 461 kB 45.9 MB/s 
[31mERROR: pip's dependency resolver

In [3]:
#@title Install data preprocessing library (contd)

#edit TOML file with TF as 1.15.3

%%writefile /content/evaluating-deeplight-transfer/pyproject.toml

[tool.poetry]
name = "evaluating-deeplight-transfer"
version = "0.1.0"
description = "Transfer learning with DeepLight"
authors = ["Armin W. Thomas <athms@stanford.edu>"]

[tool.poetry.dependencies]
python = ">=3.6.1,<4.0"
boto3 = "1.12.34"
botocore = "1.15.39"
einops = "0.3.2"
Keras = "2.2.4"
matplotlib = "3.3.4"
nilearn = "0.7.0"
numpy = "1.19.5"
tensorflow-gpu = "1.15.3"
tqdm = "4.60.0"
pandas = "1.1.5"
innvestigate = "1.0.8"
h5py = "2.10.0"

[tool.poetry.dev-dependencies]

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

Overwriting /content/evaluating-deeplight-transfer/pyproject.toml


In [4]:
#@title Install data preprocessing library (contd)

!poetry update -q > log.txt

In [5]:
#@title Install data preprocessing library (contd)

%cd src/
!poetry run pip3 install -e .

/content/evaluating-deeplight-transfer/src
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Obtaining file:///content/evaluating-deeplight-transfer/src
  Preparing metadata (setup.py) ... [?25l[?25hdone
Installing collected packages: deeplight
  Running setup.py develop for deeplight
Successfully installed deeplight-0.1.0


In [6]:
#@title Create AWS config

!mkdir ~/.aws

In [7]:
#@title Create AWS config (contd)

%%writefile ~/.aws/config

[profile hcp]
region=eu-central-1

Writing /root/.aws/config


In [8]:
#@title Get AWS credentials

from getpass import getpass

print('Input AWS access key ID:')
aws_access_key_id = getpass()
print('Input AWS secret access key:')
aws_secret_access_key = getpass()

Input AWS access key ID:
··········
Input AWS secret access key:
··········


In [9]:
#@title Download data and preprocess

%cd ../scripts/

/content/evaluating-deeplight-transfer/scripts


In [10]:
#@title Download data and preprocess (contd)

# edit download to keep task "WM" and run 'LR'
# merge download and preprocess scripts

%%writefile /content/evaluating-deeplight-transfer/scripts/download_preprocess.py

#!/usr/bin/python
import os
import argparse
import numpy as np
import tensorflow as tf
import hcprep
import deeplight
from glob import glob


def main():

  np.random.seed(13089)

  ap = argparse.ArgumentParser()
  ap.add_argument(
    "--ACCESS-KEY",
    required=True,
    help="AWS S3 access key"
  )
  ap.add_argument(
    "--SECRET-KEY",
    required=True,
    help="AWS S3 secret key"
  )
  ap.add_argument(
    "--path",
    required=False,
    default='../data/',
    help="path to store data (default: ../data/)"
  )
  ap.add_argument(
    "-n",
    required=False,
    default=3,
    help="number of subjects to download per HCP task (1-500) (default: 3)"
  )
  ap.add_argument(
    "-c",
    required=False,
    default=0,
    help="continue download from which subject (default: 0)"
  )
  
  args = ap.parse_args()
  ACCESS_KEY = str(args.ACCESS_KEY)
  SECRET_KEY = str(args.SECRET_KEY)
  n = int(args.n)
  c = int(args.c)
  path = str(args.path)
  hcprep.paths.make_sure_path_exists(path)

  hcp_info = hcprep.info.basics()    

  # update number as per previous download    
  n=n+c

  print(
    'Download and preprocess data of {} subjects to {}'.format(
      n, path
    )
  )
  
  task_id=6
  task='WM'
  for subject_id, subject in enumerate(hcp_info.subjects[task][c:n]):
    print('\n\nWorking on subject {} of {}'.format(subject_id+1+c, n))
    run_id=0
    run='LR'
    #  for run_id, run in enumerate(hcp_info.runs):
    hcprep.download.download_subject_data(
        ACCESS_KEY=ACCESS_KEY,
        SECRET_KEY=SECRET_KEY,
        subject=subject,
        task=task,
        run=run,
        output_path=path
    )

    print('Download done!')

    filechecks = [
        os.path.isfile(
        hcprep.paths.path_bids_func_mni(
            subject=subject,
            task=task,
            run=run,
            path=path
        )
        ),
        os.path.isfile( 
        hcprep.paths.path_bids_func_mask_mni(
            subject=subject,
            task=task,
            run=run,
            path=path
        )
        ),
        os.path.isfile(
        hcprep.paths.path_bids_EV(
            subject=subject,
            task=task,
            run=run,
            path=path
        )
        )
    ]
    if not np.all(filechecks):
        print(
        'Skipping subject {} task {} run {}, because BIDS data not fully present.'.format(
            subject, task, run
        )
        )
    else:
        subject_data = hcprep.data.load_subject_data(
        task=task,
        subject=subject,
        runs=[run],
        path=path,
        t_r=hcp_info.t_r
        )
        
        func_imgs, states, trs = hcprep.preprocess.preprocess_subject_data(
        subject_data=subject_data,
        runs=[run],
        high_pass=1./128.,
        smoothing_fwhm=3
        )

        tf_record_options = tf.io.TFRecordOptions(
            compression_type="GZIP"
        )


        tfr_writers = [
        tf.io.TFRecordWriter(
            hcprep.paths.path_bids_tfr(
            subject=subject,
            task=task,
            run=run,
            path=path
            ),
            options=tf_record_options
        )
        ]
        
        deeplight.data.io.write_func_to_tfr(
        tfr_writers=tfr_writers,
        func_data=func_imgs.get_fdata(),
        states=states,
        trs=trs,
        subject_id=subject_id,
        task_id=task_id,
        run_id=run_id,
        n_onehot=hcp_info.n_states_total, # total number of cognitive states across tasks (for one-hot encoding)
        onehot_task_idx=hcp_info.onehot_idx_per_task[task], # indices of current task in onehot encoding
        randomize_volumes=True
        )
            
        [w.close() for w in tfr_writers]

        print('Preprocessing done! Deleting extra files')

        files_to_remove = glob('../data/*/*/*.txt') + glob('../data/*/*/*.gz') + glob('../data/*/*/*.csv')
        for f in files_to_remove:
            os.remove(f)
    

if __name__ == '__main__':
  
  main()

Writing /content/evaluating-deeplight-transfer/scripts/download_preprocess.py


In [11]:
#@title Download data and preprocess (contd)

%%time
!poetry run python download_preprocess.py -n 20 --ACCESS-KEY $aws_access_key_id --SECRET-KEY $aws_secret_access_key

Using TensorFlow backend.
Download and preprocess data of 20 subjects to ../data/


Working on subject 1 of 20
downloading HCP/100307/MNINonLinear/Results/tfMRI_WM_LR/tfMRI_WM_LR.nii.gz  to  ../data/sub-100307/func/sub-100307_task-WM_run-LR_space-MNI152NLin6Asym_res-2_desc-preproc_bold.nii.gz
downloading HCP/100307/MNINonLinear/Results/tfMRI_WM_LR/brainmask_fs.2.nii.gz  to  ../data/sub-100307/func/sub-100307_task-WM_run-LR_space-MNI152NLin6Asym_res-2_desc-preproc_brain_mask.nii.gz
downloading HCP/100307/MNINonLinear/T1w.nii.gz  to  ../data/sub-100307/anat/sub-100307_space-MNI152NLin6Asym_res-2_desc-preproc_T1w.nii.gz
downloading HCP/100307/MNINonLinear/Results/tfMRI_WM_LR/EVs/0bk_body.txt  to  ../data/sub-100307/func/sub-100307_task-WM_run-LR_desc-EV_0bk_body.txt
downloading HCP/100307/MNINonLinear/Results/tfMRI_WM_LR/EVs/0bk_faces.txt  to  ../data/sub-100307/func/sub-100307_task-WM_run-LR_desc-EV_0bk_faces.txt
downloading HCP/100307/MNINonLinear/Results/tfMRI_WM_LR/EVs/0bk_places.txt 

In [12]:
#@title Check data size

!du -sh ../data

5.1G	../data


In [13]:
#@title Compress data and save to drive

%%time
!zip data.zip ../data/*/*/*

!mv data.zip /content/drive/MyDrive/MAID/CV/misc

  adding: ../data/sub-100307/func/sub-100307_task-WM_run-LR_space-MNI152NLin6Asym_res-2_desc-tfr.tfrecords (deflated 0%)
  adding: ../data/sub-100408/func/sub-100408_task-WM_run-LR_space-MNI152NLin6Asym_res-2_desc-tfr.tfrecords (deflated 0%)
  adding: ../data/sub-101006/func/sub-101006_task-WM_run-LR_space-MNI152NLin6Asym_res-2_desc-tfr.tfrecords (deflated 0%)
  adding: ../data/sub-101107/func/sub-101107_task-WM_run-LR_space-MNI152NLin6Asym_res-2_desc-tfr.tfrecords (deflated 0%)
  adding: ../data/sub-101309/func/sub-101309_task-WM_run-LR_space-MNI152NLin6Asym_res-2_desc-tfr.tfrecords (deflated 0%)
  adding: ../data/sub-101410/func/sub-101410_task-WM_run-LR_space-MNI152NLin6Asym_res-2_desc-tfr.tfrecords (deflated 0%)
  adding: ../data/sub-101915/func/sub-101915_task-WM_run-LR_space-MNI152NLin6Asym_res-2_desc-tfr.tfrecords (deflated 0%)
  adding: ../data/sub-102008/func/sub-102008_task-WM_run-LR_space-MNI152NLin6Asym_res-2_desc-tfr.tfrecords (deflated 0%)
  adding: ../data/sub-102311/fun