-
Notifications
You must be signed in to change notification settings - Fork 0
/
dcmanon.py
237 lines (188 loc) · 8.37 KB
/
dcmanon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
###################################################################################################
#
# Anonymizing a DICOM folder
#
# -The purpose of this script is to go through all folders and encrypt all patient names within said
# folders.
#
# -To run, Anaconda Python must be installed (https://www.continuum.io/downloads)
# -After installation, open up the Anaconda Command Prompt (http://i.imgur.com/xeWBABj.png)
# -Type in "pip install pydicom" (http://i.imgur.com/NgZvgJM.png)
# -Move the dcmanon.py file to the current directory (C:\User\USERNAME\Anaconda)
# -To test out, type in:
# python dcmanon.py -d "Z:\Images\Databases\SamplePatient"
# -d = directory
#
# Created by: Adrian Lam (ayplam@gmail.com)
# Date: 2015-11-30
#
###################################################################################################
import re
import os
import dicom
import numpy as np
import sys
from dicom.filereader import InvalidDicomError
from optparse import OptionParser
from multiprocessing import Pool
def create_ascii_encrypt_key():
pi = '314159265358979323846264338327\
950288419716939937510582097494\
459230781640628620899862803482\
534211706798214808651328230664\
709384460955058223172535940812\
848111745028410270193852110555\
964462294895493038196442881097\
566593344612847564823378678316\
527120190914564856692346034861\
045432664821339360726024914127'
seq = np.arange(32,128)
randomizer = np.empty(0)
while (seq.size != 0) & (len(pi) > 1):
if int(pi[:2]) < seq.size:
randomizer = np.append(randomizer,seq[int(pi[:2])])
seq = np.delete(seq,int(pi[:2]))
pi = pi[2:]
elif int(pi[:1]) < seq.size:
randomizer = np.append(randomizer,seq[int(pi[:1])])
seq = np.delete(seq,int(pi[:1]))
pi = pi[1:]
else:
# Skip number
pi = pi[1:]
return randomizer
# Global variables
KEY = create_ascii_encrypt_key();
fields_to_anon = ['PatientsName','MedicalAlerts','PatientsAddress','SpecialNeeds']
# For a single dicom file.
def encrypt_dicom_name(dcm):
# ( dicomname, (bool_encrypt, bool_digitcheck))
dcmname = dcm[0]
opts = dcm[1]
bool_encrypt = opts[0]
bool_digitcheck = opts[1]
bool_write = False
try:
dcminf = dicom.read_file(dcmname)
except:
# If not readable, simply exit
return
# If it is a dicom, scramble all information.
for field in fields_to_anon:
# Check to make sure dicom field exists.
if hasattr(dcminf,field):
name = getattr(dcminf,field)
# If it's not a string, skip any anonymization for the field
if not isinstance(name, basestring):
continue
else:
name = name.encode('ascii','ignore')
if bool_encrypt:
# encrypt the dicomfield
if bool_digitcheck:
bool_hasdigits = re.findall('\d+',name)
else:
bool_hasdigits = False
# Ignore if the name has the words "anonymous" or "volunteer" in it
if ( name.lower().find("anonymous") >= 0 ) | ( name.lower().find("volunteer") >= 0 ):
continue
# The additional "_JNO" ending is a safety to prevent items
# from being re-encrypted. It is assumed that if the name
# has any numbers, the patient field has already been anonymized.
if (name[-4:] != "_JNO") & (not bool_hasdigits):
anon_name = encrypt_string(name,KEY) + "_JNO"
setattr(dcminf,field,anon_name)
bool_write = True
else:
# If the name has "_JNO" as the ending, it has been encrypted
# and needs to be unencrypted.
if name[-4:] == "_JNO":
anon_name = unencrypt_string(name[:-4],KEY)
setattr(dcminf,field,anon_name)
bool_write = True
if bool_write:
dicom.write_file(dcmname,dcminf)
def encrypt_string(string,randomizer):
encryptd_string = ''
for char in string:
encryptd_string += chr(int(randomizer[int(ord(char)-32)]))
return encryptd_string
def unencrypt_string(string,randomizer):
unencrypt = np.argsort(KEY) + 32
unencryptd_string = ''
for char in string:
unencryptd_string += chr(int(unencrypt[int(ord(char)-32)]))
return unencryptd_string
def main():
# Example (need to change directory to where create_mr_db.py is located):
# create_mr_db.py -f neostem_database.csv -d Z:\Users\Shared\Sleepystuff\images\EFFERVESCENT PATIENTS
parser = OptionParser(usage="usage: %prog [options] filename")
parser.add_option("-d", "--dir",
action="store", # optional because action defaults to "store"
type="string",
dest="directory",
default=os.getcwd(),
help="Directory file to search through")
parser.add_option("-f", "--filename",
action="store", # optional because action defaults to "store"
type="string",
dest="filename",
default="",
help="Directory file to search through")
parser.add_option("-v", "--verbose",
action="store_true", # optional because action defaults to "store"
dest="verbose", # Do you want the script to show ALL directories it's going through?
help="Show current folder")
parser.add_option("-u", "--anon",
action="store_false" , # optional because action defaults to "store"
dest="anon", # flag to encrypt names
default=True, # default option is to encrypt
help="Flag to decrypt dicom fields. Default is to encrypt fields")
parser.add_option("-n", "--numbers",
action="store_true" , # optional because action defaults to "store"
dest="numbers", # flag to encrypt names
default=False,
help="Check for numbers (0-9) in the field. If numbers exist in field, do not encrypt")
(options, args) = parser.parse_args()
# Allow a textfile to be read to automatically anonymize multiple folders
if options.filename:
with open(options.filename,'r') as f:
dirs = f.read()
dirs = dirs.split('\n')
directories_to_anonymize = filter(None, dirs)
else:
# If no text file is specified, anonymize current directory
directories_to_anonymize = [options.directory]
# Create a tuple to send over to the encrypt_dicom_name
opt_tuple = (options.anon,options.numbers)
p = Pool(20) # Use 15 processes to paralleze dicom anonymization
print directories_to_anonymize
# base is the base directory to search in and get ALL subfolders
for base in directories_to_anonymize:
print "Current main directory:", base
if options.verbose:
if options.anon:
print "Encrypting the following folders..."
else:
print "Decrypting the following folders..."
# os.walk goes through every subdirectory as a loop. The current directory
# in the loop is "dirname". All files in dirname are in a list, "filenames"
for dirname, dirnames, filenames in os.walk(base):
if options.verbose:
print dirname
#Append dicom options to each item
fullfilenames_options = [ [os.path.join(dirname,filename),opt_tuple] for filename in filenames]
# Sometimes this pulls an error. I'm not sure why, but there are five chances for it to retry.
for x in xrange(5):
try:
# Use the pool to parallelze encryption
p.map(encrypt_dicom_name,fullfilenames_options)
except:
# If it's the fourth try, print the failed directory.
if x == 4:
print "DIRECTORY FAILED TO ANONYMIZE: ", dirname
# Continue onto next "iteration" if it fails; skip the break
continue
break
if __name__ == '__main__':
main()