# File moving and rearranging (04-12-2020)
In this notebook, we do a few things in order to get the DICOMS rearranged into the structure that we need to run `heudiconv` on the cluster so that we can have the `.nii` files we need to run `fMRIPrep`.

1. We create a folder using the `mkdir` command (the `!` at the beginning of the cell signifies that we will be running the contents of the cell in bash instead of python)
2. Create a list of the folders that correspond to each subject (`subs`), then use a string operation on this list (`sub.split()`) to create a `sublist` containing just the subject number for each subject. 
3. Use this `sublist` to create more directories (using `os.mkdir()` since we have to embed the command in python code)
4. Create variables for each file category containing each of the files that we want to move. We acquire these lists using `glob`.
5. Move and rearrange the files (detailed below).

In [1]:
import glob
import os
import tarfile
import numpy as np

In [6]:
# ! pwd

/lustre/scratch/mzielins/collab_files/code


In [8]:
os.chdir("../")

In [8]:
# !mkdir "dicomdir"

In [7]:
subs = glob.glob("../../[0-3][0-9]")

In [8]:
sublist=[]
for sub in subs:
    s = sub.split("/")[-1]
    sublist.append(s)

In [9]:
sublist

['18',
 '13',
 '21',
 '17',
 '20',
 '06',
 '14',
 '09',
 '23',
 '15',
 '05',
 '28',
 '19',
 '01',
 '07',
 '04',
 '02',
 '10',
 '26',
 '12',
 '29',
 '11',
 '03',
 '27',
 '25',
 '22',
 '30',
 '08',
 '16',
 '24']

Making directories for each subject in the `rearranged_files` directory

In [5]:
for sub in sublist:
#     os.mkdir("dicomdir/sub-"+sub)
    print("making directory "+"dicomdir/sub-"+sub)

In [9]:
bold_files = glob.glob("../[0-3][0-9]/*/EP2D_PACE_MOCO_RUN*/*")
t1s = glob.glob("../[0-3][0-9]/*/T1_MPRAGE_SAG_P2_ISO_0_9_0005/*")
DTIs1 = glob.glob("../[0-3][0-9]/*/EP2D_DIFF_MDDW_64_P2_TENSOR_0025/*")
DTIs2 = glob.glob("../[0-3][0-9]/*/EP2D_DIFF_MDDW_64_P2_TRACEW_0022/*")

## Doing the file moving and rearranging
In the below three cells, we do the following three things `for` each subject in our `sublist`:
1. Create a list of the `bold_files` that we have for the subject
2. `for` each of these bold files, we create a new name for the file that leaves out anything before the filename itself (`new_name`), then add the file to a `tar` file that is located in the correct directory.
3. Once all of the files are added, we `untar` the files through the `tar.extractall()` function
4. Delete the old tarfile since we don't need it any more. 
5. `print()` that we are done with that particular subject so we can keep an eye on the progress of the mover.

In [None]:
for sub in sublist:
    if sub == "01":
        bold_files = glob.glob("../01/*/EP2D_PACE_MOCO_RUN*/*")
        bold_dupes = [f for f in bold_files if "2019.08.02" in f]
        bold_files = [f for f in bold_files if f not in bold_dupes]
    elif sub == "18":
        bold_files = glob.glob("../18/*/EP2D_PACE_MOCO_RUN*/*")
        run1_dupes = [f for f in bold_files if "RUN1" in f and "2019.09.23.14.44" in f]
        run2_dupes = [f for f in bold_files if "RUN2" in f and "2019.09.23.14.35" in f]
        bold_files = [f for f in bold_files if f not in run1_dupes and f not in run2_dupes]
    elif sub == "20":
        run1_files = glob.glob("../20/*/EP2D_PACE_MOCO_RUN1_2*/*")
        run2_files = glob.glob("../20/*/EP2D_PACE_MOCO_RUN2*/*")
        bold_files = run1_files + run2_files
    else:
        bold_files = glob.glob(f"../{sub}/*/EP2D_PACE_MOCO_RUN*/*")
    with tarfile.open(f"./dicomdir/sub-{sub}/bold_files.tar.gz", "w:gz") as tar:
        for f in bold_files:
            new_name = f.split("/")[-1]
            tar.add(f, arcname = new_name)
    nfiles = len(bold_files)
    tf = tarfile.open(f"./dicomdir/sub-{sub}/bold_files.tar.gz")
    tf.extractall(f"./dicomdir/sub-{sub}/")
    os.remove(f"./dicomdir/sub-{sub}/bold_files.tar.gz")
    print(f"Finished moving {nfiles} files for sub-{sub}!")

In [13]:
for sub in sublist:
    if sub == "01":
        T1s = glob.glob("../01/*/T1_MPRAGE*/*")
        T1_dupes = [f for f in T1s if "2019.08.02" in f]
        T1s = [f for f in T1s if f not in T1_dupes]
    elif sub == "18":
        T1s = glob.glob("../18/*/T1_MPRAGE*/*")
        T1_dupes = [f for f in T1s if "2019.09.23.14.35" in f]
        T1s = [f for f in T1s if f not in T1_dupes]
    else:
        T1s = glob.glob(f"../{sub}/*/T1_MPRAGE*/*")
    with tarfile.open(f"./dicomdir/sub-{sub}/t1s.tar.gz", "w:gz") as tar:
        for f in T1s:
            new_name = f.split("/")[-1]
            tar.add(f, arcname = new_name)
    nfiles = len(T1s)
    tf = tarfile.open(f"./dicomdir/sub-{sub}/t1s.tar.gz")
    tf.extractall(f"./dicomdir/sub-{sub}/")
    os.remove(f"./dicomdir/sub-{sub}/t1s.tar.gz")
    print(f"Finished moving {nfiles} T1 files for sub-{sub}!")

Finished moving 192 T1 files for sub-18!
Finished moving 192 T1 files for sub-13!
Finished moving 192 T1 files for sub-21!
Finished moving 192 T1 files for sub-17!
Finished moving 192 T1 files for sub-20!
Finished moving 192 T1 files for sub-06!
Finished moving 192 T1 files for sub-14!
Finished moving 192 T1 files for sub-09!
Finished moving 192 T1 files for sub-23!
Finished moving 192 T1 files for sub-15!
Finished moving 192 T1 files for sub-05!
Finished moving 192 T1 files for sub-28!
Finished moving 192 T1 files for sub-19!
Finished moving 192 T1 files for sub-01!
Finished moving 192 T1 files for sub-07!
Finished moving 192 T1 files for sub-04!
Finished moving 192 T1 files for sub-02!
Finished moving 192 T1 files for sub-10!
Finished moving 192 T1 files for sub-26!
Finished moving 192 T1 files for sub-12!
Finished moving 192 T1 files for sub-29!
Finished moving 192 T1 files for sub-11!
Finished moving 192 T1 files for sub-03!
Finished moving 192 T1 files for sub-27!
Finished moving 

In [14]:
for sub in sublist:
    DTIs1 = glob.glob(f"../{sub}/*/EP2D_DIFF_MDDW_64_P2_TENSOR*/*")
    DTIs2 = glob.glob(f"../{sub}/*/EP2D_DIFF_MDDW_64_P2_TRACEW*/*")
    DTIs = DTIs1 + DTIs2
    nfiles = len(DTIs)
    with tarfile.open(f"./dicomdir/sub-{sub}/DTIs.tar.gz", "w:gz") as tar:
        for f in DTIs:
            new_name = f.split("/")[-1]
            tar.add(f, arcname = new_name)
    tf = tarfile.open(f"./dicomdir/sub-{sub}/DTIs.tar.gz")
    tf.extractall(f"./dicomdir/sub-{sub}/")
    os.remove(f"./dicomdir/sub-{sub}/DTIs.tar.gz")
    print(f"Finished moving {nfiles} DTI files for sub-{sub}!")

Finished moving 38 DTI files for sub-18!
Finished moving 38 DTI files for sub-13!
Finished moving 38 DTI files for sub-21!
Finished moving 38 DTI files for sub-17!
Finished moving 38 DTI files for sub-20!
Finished moving 38 DTI files for sub-06!
Finished moving 38 DTI files for sub-14!
Finished moving 38 DTI files for sub-09!
Finished moving 38 DTI files for sub-23!
Finished moving 38 DTI files for sub-15!
Finished moving 38 DTI files for sub-05!
Finished moving 38 DTI files for sub-28!
Finished moving 38 DTI files for sub-19!
Finished moving 38 DTI files for sub-01!
Finished moving 38 DTI files for sub-07!
Finished moving 38 DTI files for sub-04!
Finished moving 38 DTI files for sub-02!
Finished moving 38 DTI files for sub-10!
Finished moving 0 DTI files for sub-26!
Finished moving 38 DTI files for sub-12!
Finished moving 38 DTI files for sub-29!
Finished moving 38 DTI files for sub-11!
Finished moving 38 DTI files for sub-03!
Finished moving 38 DTI files for sub-27!
Finished moving 3

## Catching subject 20's non-standard file structure
There was a typo in the code for moving subject 20's data around so I'm just going to re-do subject 20 here. If we re-run all of the file moving code above it should be fixed and we don't have to run this again.

In [19]:
data_dir = "/lustre/scratch/mzielins"
out_dir = data_dir + "/collab_files/dicomdir"

In [21]:
T1s = glob.glob(data_dir + "/20/*/T1_MPRAGE*/*")
nfiles = len(T1s)

run1_files = glob.glob(data_dir + "/20/*/EP2D_PACE_MOCO_RUN1_2*/*")
run2_files = glob.glob(data_dir + "/20/*/EP2D_PACE_MOCO_RUN2*/*")
bold_files = run1_files + run2_files

DTIs1 = glob.glob(data_dir + "/20/*/EP2D_DIFF_MDDW_64_P2_TENSOR*/*")
DTIs2 = glob.glob(data_dir + "/20/*/EP2D_DIFF_MDDW_64_P2_TRACEW*/*")
DTIs = DTIs1 + DTIs2

# Processing T1s
with tarfile.open(out_dir + "/sub-20/t1s.tar.gz", "w:gz") as tar:
        for f in T1s:
            new_name = f.split("/")[-1]
            tar.add(f, arcname = new_name)
            

tf = tarfile.open(out_dir + "/sub-20/t1s.tar.gz")
tf.extractall(out_dir + "/sub-20/")
os.remove(out_dir + "/sub-20/t1s.tar.gz")
print(f"Finished moving {nfiles} T1 files for sub-20!")

# Processing BOLD Files

with tarfile.open(out_dir + "/sub-20/bold_files.tar.gz", "w:gz") as tar:
    for f in bold_files:
        new_name = f.split("/")[-1]
        tar.add(f, arcname = new_name)
nfiles = len(bold_files)
tf = tarfile.open(out_dir + "/sub-20/bold_files.tar.gz")
tf.extractall(out_dir + "/sub-20/")
os.remove(out_dir + "/sub-20/bold_files.tar.gz")
print(f"Finished moving {nfiles} BOLD files for sub-20!")

# Processing DTI Files
with tarfile.open(out_dir + "/sub-20/DTIs.tar.gz", "w:gz") as tar:
    for f in DTIs:
        new_name = f.split("/")[-1]
        tar.add(f, arcname = new_name)
        
tf = tarfile.open(out_dir + "/sub-20/DTIs.tar.gz")
tf.extractall(out_dir + "/sub-20/")
os.remove(out_dir + "/sub-20/DTIs.tar.gz")
print(f"Finished moving {nfiles} DTI files for sub-20!")

Finished moving 192 T1 files for sub-20!
Finished moving 392 files for sub-20!
Finished moving 392 DTI files for sub-20!
