-
Notifications
You must be signed in to change notification settings - Fork 229
/
ixi.py
250 lines (216 loc) · 8.94 KB
/
ixi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
"""
The `Information eXtraction from Images (IXI)`_
dataset contains "nearly 600 MR images from normal, healthy subjects",
including "T1, T2 and PD-weighted images,
MRA images and Diffusion-weighted images (15 directions)".
.. note ::
This data is made available under the
Creative Commons CC BY-SA 3.0 license.
If you use it please acknowledge the source of the IXI data, e.g.
`the IXI website <https://brain-development.org/ixi-dataset/>`_.
.. _Information eXtraction from Images (IXI): https://brain-development.org/ixi-dataset/
""" # noqa: E501
# Adapted from
# https://pytorch.org/docs/stable/_modules/torchvision/datasets/mnist.html#MNIST
import shutil
from pathlib import Path
from typing import Optional, Sequence
from tempfile import NamedTemporaryFile
from ..typing import TypePath
from ..transforms import Transform
from ..download import download_and_extract_archive
from .. import SubjectsDataset, Subject, ScalarImage, LabelMap
class IXI(SubjectsDataset):
"""Full IXI dataset.
Args:
root: Root directory to which the dataset will be downloaded.
transform: An instance of
:class:`~torchio.transforms.transform.Transform`.
download: If set to ``True``, will download the data into :attr:`root`.
modalities: List of modalities to be downloaded. They must be in
``('T1', 'T2', 'PD', 'MRA', 'DTI')``.
.. warning:: The size of this dataset is multiple GB.
If you set :attr:`download` to ``True``, it will take some time
to be downloaded if it is not already present.
Example::
>>> import torchio as tio
>>> transforms = [
... tio.ToCanonical(), # to RAS
... tio.Resample((1, 1, 1)), # to 1 mm iso
... ]
>>> ixi_dataset = tio.datasets.IXI(
... 'path/to/ixi_root/',
... modalities=('T1', 'T2'),
... transform=tio.Compose(transforms),
... download=True,
... )
>>> print('Number of subjects in dataset:', len(ixi_dataset)) # 577
>>> sample_subject = ixi_dataset[0]
>>> print('Keys in subject:', tuple(sample_subject.keys())) # ('T1', 'T2')
>>> print('Shape of T1 data:', sample_subject['T1'].shape) # [1, 180, 268, 268]
>>> print('Shape of T2 data:', sample_subject['T2'].shape) # [1, 241, 257, 188]
""" # noqa: E501
base_url = 'http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI-{modality}.tar' # noqa: FS003,E501
md5_dict = {
'T1': '34901a0593b41dd19c1a1f746eac2d58',
'T2': 'e3140d78730ecdd32ba92da48c0a9aaa',
'PD': '88ecd9d1fa33cb4a2278183b42ffd749',
'MRA': '29be7d2fee3998f978a55a9bdaf3407e',
'DTI': '636573825b1c8b9e8c78f1877df3ee66',
}
def __init__(
self,
root: TypePath,
transform: Optional[Transform] = None,
download: bool = False,
modalities: Sequence[str] = ('T1', 'T2'),
**kwargs,
):
root = Path(root)
for modality in modalities:
if modality not in self.md5_dict:
message = (
f'Modality "{modality}" must be'
f' one of {tuple(self.md5_dict.keys())}'
)
raise ValueError(message)
if download:
self._download(root, modalities)
if not self._check_exists(root, modalities):
message = (
'Dataset not found.'
' You can use download=True to download it'
)
raise RuntimeError(message)
subjects_list = self._get_subjects_list(root, modalities)
super().__init__(subjects_list, transform=transform, **kwargs)
@staticmethod
def _check_exists(root, modalities):
for modality in modalities:
modality_dir = root / modality
if not modality_dir.is_dir():
exists = False
break
else:
exists = True
return exists
@staticmethod
def _get_subjects_list(root, modalities):
# The number of files for each modality is not the same
# E.g. 581 for T1, 578 for T2
# Let's just use the first modality as reference for now
# I.e. only subjects with all modalities will be included
one_modality = modalities[0]
paths = sglob(root / one_modality, '*.nii.gz')
subjects = []
for filepath in paths:
subject_id = get_subject_id(filepath)
images_dict = {'subject_id': subject_id}
images_dict[one_modality] = ScalarImage(filepath)
for modality in modalities[1:]:
globbed = sglob(
root / modality, f'{subject_id}-{modality}.nii.gz')
if globbed:
assert len(globbed) == 1
images_dict[modality] = ScalarImage(globbed[0])
else:
skip_subject = True
break
else:
skip_subject = False
if skip_subject:
continue
subjects.append(Subject(**images_dict))
return subjects
def _download(self, root, modalities):
"""Download the IXI data if it does not exist already."""
for modality in modalities:
modality_dir = root / modality
if modality_dir.is_dir():
continue
modality_dir.mkdir(exist_ok=True, parents=True)
# download files
url = self.base_url.format(modality=modality)
md5 = self.md5_dict[modality]
with NamedTemporaryFile(suffix='.tar', delete=False) as f:
download_and_extract_archive(
url,
download_root=modality_dir,
filename=f.name,
md5=md5,
)
class IXITiny(SubjectsDataset):
r"""
This is the dataset used in the main `notebook`_.
It is a tiny version of IXI, containing 566 :math:`T_1`-weighted brain MR
images and their corresponding brain segmentations,
all with size :math:`83 \times 44 \times 55`.
It can be used as a medical image MNIST.
Args:
root: Root directory to which the dataset will be downloaded.
transform: An instance of
:class:`~torchio.transforms.transform.Transform`.
download: If set to ``True``, will download the data into :attr:`root`.
.. _notebook: https://github.com/fepegar/torchio/blob/master/tutorials/README.md
""" # noqa: E501
url = 'https://www.dropbox.com/s/ogxjwjxdv5mieah/ixi_tiny.zip?dl=1'
md5 = 'bfb60f4074283d78622760230bfa1f98'
def __init__(
self,
root: TypePath,
transform: Optional[Transform] = None,
download: bool = False,
**kwargs,
):
root = Path(root)
if download:
self._download(root)
if not root.is_dir():
message = (
'Dataset not found.'
' You can use download=True to download it'
)
raise RuntimeError(message)
subjects_list = self._get_subjects_list(root)
super().__init__(subjects_list, transform=transform, **kwargs)
@staticmethod
def _get_subjects_list(root):
image_paths = sglob(root / 'image', '*.nii.gz')
label_paths = sglob(root / 'label', '*.nii.gz')
if not (image_paths and label_paths):
message = (
f'Images not found. Remove the root directory ({root})'
' and try again'
)
raise FileNotFoundError(message)
subjects = []
for image_path, label_path in zip(image_paths, label_paths):
subject_id = get_subject_id(image_path)
subject_dict = {}
subject_dict['image'] = ScalarImage(image_path)
subject_dict['label'] = LabelMap(label_path)
subject_dict['subject_id'] = subject_id
subjects.append(Subject(**subject_dict))
return subjects
def _download(self, root):
"""Download the tiny IXI data if it doesn't exist already."""
if root.is_dir(): # assume it's been downloaded
print('Root directory for IXITiny found:', root) # noqa: T001
return
print('Root directory for IXITiny not found:', root) # noqa: T001
print('Downloading...') # noqa: T001
with NamedTemporaryFile(suffix='.zip', delete=False) as f:
download_and_extract_archive(
self.url,
download_root=root,
filename=f.name,
md5=self.md5,
)
ixi_tiny_dir = root / 'ixi_tiny'
(ixi_tiny_dir / 'image').rename(root / 'image')
(ixi_tiny_dir / 'label').rename(root / 'label')
shutil.rmtree(ixi_tiny_dir)
def sglob(directory, pattern):
return sorted(Path(directory).glob(pattern))
def get_subject_id(path):
return '-'.join(path.name.split('-')[:-1])