forked from NeuroanatomyAndConnectivity/LeiCA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_09_nki_delete_duplicates_and_build_subjects_list_r6_onward_for_LeiCA.py
104 lines (80 loc) · 3.45 KB
/
run_09_nki_delete_duplicates_and_build_subjects_list_r6_onward_for_LeiCA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os, glob, datetime, shutil
import json
from variables import dicom_dir, freesurfer_dir, subjects_dir
doDelete = False
def get_max_n_elements_in_dir_index(dir_list):
import numpy as np
return np.argmax([len(glob.glob(d)) for d in dir_list])
check_img_list = ['REST_645_*', 'REST_1400_*', 'MPRAGE_SIEMENS_DEFACED*', 'REST_CAP_*', 'DIFF_137_AP_*']
#dicom_dir='/scr/adenauer2/Franz/r7_test/nki/dicom/triotim/mmilham/discoverysci_30001'
#dicom_dir='/scr/adenauer2/Franz/r7_test/nki/dicom/triotim/mmilham/'
os.chdir(dicom_dir)
ugly_subjects_list = [s for s in glob.glob('*/A*')]
no_v2 = [] #subjects without visit2
# remove duplicates
for s in ugly_subjects_list:
v2 = glob.glob(os.path.join(s, '*_V2'))
if len(v2)==0:
no_v2.append(s)
if len(v2)>1:
#delete all except directory with largest number of elements
dirs_to_del = v2
dirs_to_del.remove(v2[get_max_n_elements_in_dir_index(v2)])
for dir_del in dirs_to_del:
print 'deleted %s'%dir_del
if doDelete:
shutil.rmtree(dir_del)
#update v2
v2 = glob.glob(os.path.join(s, '*_V2'))
if len(v2) == 1: #only check for duplicates if visit2 is there
# check sequence duplicates
for sequence in check_img_list:
se = glob.glob(os.path.join(s, '*_V2', sequence))
if len(se)>1: #duplicates: remove all but the last one
dirs_to_del = list(se)
dirs_to_del.remove(dirs_to_del[-1])
print se, dirs_to_del
for dir_del in dirs_to_del:
print 'deleted %s'%dir_del
if doDelete:
shutil.rmtree(dir_del)
[ugly_subjects_list.remove(rem_subj) for rem_subj in no_v2] #remove no v2 subjects from subject_list
subjects_list = [os.path.basename(s) for s in ugly_subjects_list]
dicom_dict = {'t1w':'MPRAGE_SIEMENS_DEFACED*',
'rs':'REST_645_*',
'diffusion':'DIFF_137_AP_*'}
dicom_missing = {'t1w':[], 'rs':[], 'diffusion':[], 'fs':[]}
bad_subjects_list = []
for subject in subjects_list:
for img, img_file in dicom_dict.items():
check_file = glob.glob(os.path.join(dicom_dir, '*', subject, '*_V2', img_file))
if not check_file:
print ('NOOOK: %s %s does not exist'%(subject, img))
dicom_missing[img].append(subject)
if not subject in bad_subjects_list:
bad_subjects_list.append(subject)
#check if fs dir exists
check_file = os.path.join(freesurfer_dir, subject)
file_exists = os.path.exists(check_file)
if not file_exists:
print ('NOOOK: %s %s does not exist'%(subject, 'fs'))
dicom_missing['fs'].append(subject)
if not subject in bad_subjects_list:
bad_subjects_list.append(subject)
print bad_subjects_list
for bad_subject in bad_subjects_list:
subjects_list.remove(bad_subject)
print len(subjects_list)
td_str = datetime.datetime.isoformat(datetime.datetime.today())[:10] + '_r6_r_7'
filename = os.path.join(subjects_dir, 'subjects_%s.txt'%td_str)
file = open(filename, 'w')
for subject in subjects_list:
file.write('%s\n'%subject)
file.close()
filename = os.path.join(subjects_dir, 'subjects_%s_excluded.txt'%td_str)
file = open(filename, 'w')
for subject in bad_subjects_list:
file.write('%s\n'%subject)
file.close()
filename = os.path.join(subjects_dir, 'subjects_%s_missing_files.txt'%td_str)
json.dump(dicom_missing, open(filename,'w'))