Skip to content

Commit

Permalink
add parallel documentation and info on filepatterns
Browse files Browse the repository at this point in the history
  • Loading branch information
jjjermiah committed Jan 7, 2024
1 parent b697aa9 commit 48174bd
Showing 1 changed file with 117 additions and 49 deletions.
166 changes: 117 additions & 49 deletions docs/Tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"24-01-07 11:27 | NBIAClient | DEBUG | Setting up OAuth2 client... with username nbia_guest\n"
"24-01-07 11:57 | NBIAClient | DEBUG | Setting up OAuth2 client... with username nbia_guest\n"
]
}
],
Expand All @@ -41,15 +41,15 @@
"from pprint import pprint\n",
"\n",
"# Instantiate the client. \n",
"client = NBIAClient(log_level='debug')\n",
"client = NBIAClient(log_level='info')\n",
"\n",
"# NOTE::READTHEDOCS \n",
"# THIS WILL FAIL UNTIL WE UPLOAD PACKAGE TO PYPI AND INSTALL VIA PIP"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -74,14 +74,14 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"24-01-07 11:27 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getCollectionValues\n"
"24-01-07 11:57 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getCollectionValues\n"
]
},
{
Expand Down Expand Up @@ -111,14 +111,14 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"24-01-07 11:27 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/getCollectionValuesAndCounts\n"
"24-01-07 11:57 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/getCollectionValuesAndCounts\n"
]
},
{
Expand Down Expand Up @@ -148,15 +148,15 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"24-01-07 11:27 | NBIAClient | DEBUG | Parsing params: {'self': <nbiatoolkit.nbia.NBIAClient object at 0x105a7afc0>, 'Collection': '', 'Modality': ''}\n",
"24-01-07 11:27 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/getBodyPartValuesAndCounts\n"
"24-01-07 11:57 | NBIAClient | DEBUG | Parsing params: {'self': <nbiatoolkit.nbia.NBIAClient object at 0x116aff560>, 'Collection': '', 'Modality': ''}\n",
"24-01-07 11:57 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/getBodyPartValuesAndCounts\n"
]
},
{
Expand Down Expand Up @@ -198,15 +198,15 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"24-01-07 11:27 | NBIAClient | DEBUG | Parsing params: {'self': <nbiatoolkit.nbia.NBIAClient object at 0x105a7afc0>, 'Collection': '4D-Lung', 'Modality': 'CT'}\n",
"24-01-07 11:27 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getPatientByCollectionAndModality\n"
"24-01-07 11:58 | NBIAClient | DEBUG | Parsing params: {'self': <nbiatoolkit.nbia.NBIAClient object at 0x116aff560>, 'Collection': '4D-Lung', 'Modality': 'CT'}\n",
"24-01-07 11:58 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getPatientByCollectionAndModality\n"
]
},
{
Expand Down Expand Up @@ -250,14 +250,14 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"24-01-07 11:27 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeries\n"
"24-01-07 11:58 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeries\n"
]
},
{
Expand Down Expand Up @@ -295,14 +295,14 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"24-01-07 11:27 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeries\n"
"24-01-07 11:58 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeries\n"
]
},
{
Expand Down Expand Up @@ -345,14 +345,17 @@
"# download Series Data\n",
"``` python\n",
"downloadSeries(\n",
" SeriesInstanceUID: str,\n",
" downloadDir: str)\n",
" SeriesInstanceUID: Union[str, list],\n",
" downloadDir: str,\n",
" filePattern: str,\n",
" overwrite: bool,\n",
" nParallel: int)\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand All @@ -362,69 +365,67 @@
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.336250251691987239290048605884',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.227929163446067537882961857921',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.925990093742075237571072608963',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.139116724721865252687455544825',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.364787732307640672278270360328',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.384197169742944248273003912317',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.149750833495190982103087204448',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.300347070051003027185063750283',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.317831614083862743715273480521',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.736089011729021729851027177073',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.133381852562664457904201355429',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.909088026336573109170906532418',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.953079890279542310843831057254',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.427052348021168186336245283790',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.295010883410722294053941635303',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.263257070197787007872578860295',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.672179203515231442641005032212',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.184961274239908956209701869504',\n",
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.797307942821711099898506950104']"
" '1.3.6.1.4.1.14519.5.2.1.6834.5010.139116724721865252687455544825']"
]
},
"execution_count": 10,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# first get a list of the SeriesInstanceUIDs\n",
"seriesUIDS = [series['SeriesInstanceUID'] for series in seriesJSON]\n",
"seriesUIDS[0:20]"
"seriesUIDS[0:5]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-11-20 16:55:28,455 | NBIAClient | INFO | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n"
"24-01-07 11:58 | NBIAClient | DEBUG | Downloading series: 1.3.6.1.4.1.14519.5.2.1.6834.5010.189721824525842725510380467695\n",
"24-01-07 11:58 | NBIAClient | DEBUG | Downloading series: 1.3.6.1.4.1.14519.5.2.1.6834.5010.336250251691987239290048605884\n",
"24-01-07 11:58 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"24-01-07 11:58 | NBIAClient | DEBUG | Downloading series: 1.3.6.1.4.1.14519.5.2.1.6834.5010.227929163446067537882961857921\n",
"24-01-07 11:58 | NBIAClient | DEBUG | Downloading series: 1.3.6.1.4.1.14519.5.2.1.6834.5010.925990093742075237571072608963\n",
"24-01-07 11:58 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"24-01-07 11:58 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"24-01-07 11:58 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading to: /home/bioinf/bhklab/jermiah/projects/NBIA-toolkit/docs/data\n"
"Downloading to: /Users/bhklab/Documents/GitHub/NBIA-toolkit/docs/data\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-11-20 16:55:33,722 | NBIAClient | INFO | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"2023-11-20 16:55:39,813 | NBIAClient | INFO | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"2023-11-20 16:55:44,858 | NBIAClient | INFO | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"2023-11-20 16:55:49,399 | NBIAClient | INFO | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n"
"Downloading 5 series: 0%| | 0/5 [00:00<?, ?it/s]24-01-07 11:58 | NBIAClient | DEBUG | Downloading series: 1.3.6.1.4.1.14519.5.2.1.6834.5010.139116724721865252687455544825\n",
"Downloading 5 series: 20%|██ | 1/5 [00:02<00:10, 2.61s/it]24-01-07 11:58 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"Downloading 5 series: 100%|██████████| 5/5 [00:04<00:00, 1.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"['1.3.6.1.4.1.14519.5.2.1.6834.5010.227929163446067537882961857921', '1.3.6.1.4.1.14519.5.2.1.6834.5010.336250251691987239290048605884', '1.3.6.1.4.1.14519.5.2.1.6834.5010.189721824525842725510380467695', '1.3.6.1.4.1.14519.5.2.1.6834.5010.925990093742075237571072608963', '1.3.6.1.4.1.14519.5.2.1.6834.5010.139116724721865252687455544825']\n"
"['P100']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
Expand All @@ -434,13 +435,80 @@
"downloadDir = \"./data\"\n",
"os.makedirs(downloadDir, exist_ok=True)\n",
"print(\"Downloading to: \" + os.path.abspath(downloadDir))\n",
"for seriesUID in seriesUIDS[0:5]:\n",
" client.downloadSeries(seriesUID, downloadDir)\n",
"\n",
"cores = 4 # number of parallel downloads\n",
"client.downloadSeries(\n",
" seriesUIDS[0:5], downloadDir, overwrite=True, nParallel=cores)\n",
" \n",
"pprint(os.listdir(downloadDir))\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure File names during download\n",
"\n",
"Due to the unique nature of the data in NBIA, the file names are not always consistent.\n",
"\n",
"To configure the file names during download you can pass in a parameter called `filePattern` to the `downloadSeries` method which is used by the `DICOMSorter`. For more information on how to configure the `filePattern` see the `nbiatoolkit.DICOMSorter()` class.\n",
"\n",
"The filePattern is a string of DICOM tags indicated by a `%` that are extracted from each DICOM file metadata and used to create the file name: \n",
"- i.e `%PatientName%_%SeriesInstanceUID%.dcm` will create a file name with the PatientName and SeriesInstanceUID.\n",
" - note: the UIDs will be shortened to the final 5 characters to avoid long file names.\n",
"\n",
"The default filePattern is : `%PatientName/%StudyDescription-%StudyDate/%SeriesNumber-%SeriesDescription-%SeriesInstanceUID/%InstanceNumber.dcm`. This will create the following tree structure:\n",
"\n",
"``` json\n",
"PatientName\n",
"└── StudyDescription-StudyDate\n",
"    └── SeriesNumber-SeriesDescription-SeriesInstanceUID\n",
"    └── InstanceNumber.dcm\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"24-01-07 12:05 | NBIAClient | DEBUG | Downloading series: 1.3.6.1.4.1.14519.5.2.1.6834.5010.189721824525842725510380467695\n",
"24-01-07 12:05 | NBIAClient | DEBUG | Downloading series: 1.3.6.1.4.1.14519.5.2.1.6834.5010.336250251691987239290048605884\n",
"24-01-07 12:05 | NBIAClient | DEBUG | Downloading series: 1.3.6.1.4.1.14519.5.2.1.6834.5010.227929163446067537882961857921\n",
"Downloading 5 series: 0%| | 0/5 [00:00<?, ?it/s]24-01-07 12:05 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"24-01-07 12:05 | NBIAClient | DEBUG | Downloading series: 1.3.6.1.4.1.14519.5.2.1.6834.5010.925990093742075237571072608963\n",
"24-01-07 12:05 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"24-01-07 12:05 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"24-01-07 12:05 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"24-01-07 12:05 | NBIAClient | DEBUG | Downloading series: 1.3.6.1.4.1.14519.5.2.1.6834.5010.139116724721865252687455544825\n",
"Downloading 5 series: 20%|██ | 1/5 [00:02<00:09, 2.29s/it]24-01-07 12:05 | NBIAClient | DEBUG | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getImageWithMD5Hash\n",
"Downloading 5 series: 100%|██████████| 5/5 [00:05<00:00, 1.04s/it]\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client.downloadSeries(\n",
" seriesUIDS[0:5], \n",
" downloadDir, \n",
" filePattern=\"%PatientName/%SeriesNumber-%SeriesInstanceUID/%InstanceNumber-%SOPInstanceUID.dcm\",\n",
" overwrite=True, nParallel=4)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down

0 comments on commit 48174bd

Please sign in to comment.