Skip to content

Commit

Permalink
Merge pull request #57 from jjjermiah/example_notebook
Browse files Browse the repository at this point in the history
documentation for notebook and readme
  • Loading branch information
jjjermiah committed Jan 28, 2024
2 parents 1bdd456 + 3dc3eae commit f477c4a
Show file tree
Hide file tree
Showing 3 changed files with 362 additions and 17 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
[![PyPI - Downloads](https://img.shields.io/pypi/dm/nbiatoolkit.svg?label=pypi%20downloads)](https://pypi.org/project/nbiatoolkit/)
![GitHub repo size](https://img.shields.io/github/repo-size/jjjermiah/nbia-toolkit)
[![Docker Pulls](https://img.shields.io/docker/pulls/jjjermiah/nbiatoolkit)](https://hub.docker.com/r/jjjermiah/nbiatoolkit)
<img alt="GitHub milestone details" src="https://img.shields.io/github/milestones/progress-percent/jjjermiah/nbia-toolkit/1?style=flat-square&label=1.0.0%20Stable%20Release%20Milestone&link=https%3A%2F%2Fgithub.com%2Fjjjermiah%2Fnbia-toolkit%2Fmilestone%2F1"><img alt="GitHub milestone details" src="https://img.shields.io/github/milestones/progress/jjjermiah/nbia-toolkit/1?style=flat-square&label=%20&link=https%3A%2F%2Fgithub.com%2Fjjjermiah%2Fnbia-toolkit%2Fmilestone%2F1">


# *NBIA Toolkit*
`nbiatoolkit` is a python package that provides programmatic access to query and download images from the National Biomedical Imaging Archive (**NBIA**) and The Cancer Imaging Archive (**TCIA**) databases.
Expand Down
326 changes: 326 additions & 0 deletions docs/Example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,326 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"!pip install nbiatoolkit"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"from nbiatoolkit import NBIAClient\n",
"from pprint import pprint"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"client = NBIAClient()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['TCGA-BLCA', 'TCGA-BRCA', 'TCGA-CESC', 'TCGA-COAD', 'TCGA-ESCA', 'TCGA-KICH', 'TCGA-KIRC', 'TCGA-KIRP', 'TCGA-LIHC', 'TCGA-LUAD', 'TCGA-LUSC', 'TCGA-OV', 'TCGA-PRAD', 'TCGA-READ', 'TCGA-SARC', 'TCGA-STAD', 'TCGA-THCA', 'TCGA-UCEC']\n"
]
}
],
"source": [
"collections = client.getCollections(prefix = \"TCGA\")\n",
"print(collections)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total Number of Series: 1688\n",
"{'SeriesInstanceUID': '1.3.6.1.4.1.14519.5.2.1.3344.4008.114273558613268508155402014536', 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.3344.4008.130981350480025856864319518013', 'Modality': 'MR', 'SeriesDate': '1996-06-12 00:00:00.0', 'SeriesDescription': 'Ax Flair', 'BodyPartExamined': 'LIVER', 'SeriesNumber': 3, 'Collection': 'TCGA-LIHC', 'PatientID': 'TCGA-DD-A3A0', 'Manufacturer': 'GE MEDICAL SYSTEMS', 'ManufacturerModelName': 'GENESIS_SIGNA', 'SoftwareVersions': '09', 'ImageCount': 30, 'TimeStamp': '2013-10-07 22:54:58.0', 'LicenseName': 'Creative Commons Attribution 3.0 Unported License', 'LicenseURI': 'http://creativecommons.org/licenses/by/3.0/', 'CollectionURI': 'https://doi.org/10.7937/K9/TCIA.2016.IMMQW8UQ', 'FileSize': 4117194}\n"
]
}
],
"source": [
"# Liver hepatocellular carcinoma\n",
"seriesList = client.getSeries(Collection='TCGA-LIHC')\n",
"\n",
"print(\"Total Number of Series: \" + str(len(seriesList)))\n",
"pprint(seriesList[0])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total Number of CT-only Series: 777\n",
"{'BodyPartExamined': 'LIVER',\n",
" 'Collection': 'TCGA-LIHC',\n",
" 'CollectionURI': 'https://doi.org/10.7937/K9/TCIA.2016.IMMQW8UQ',\n",
" 'FileSize': 48626832,\n",
" 'ImageCount': 92,\n",
" 'LicenseName': 'Creative Commons Attribution 3.0 Unported License',\n",
" 'LicenseURI': 'http://creativecommons.org/licenses/by/3.0/',\n",
" 'Manufacturer': 'GE MEDICAL SYSTEMS',\n",
" 'ManufacturerModelName': 'LightSpeed QX/i',\n",
" 'Modality': 'CT',\n",
" 'PatientID': 'TCGA-DD-A3A9',\n",
" 'ProtocolName': '6.3 BI-PHASE LIVER ()',\n",
" 'SeriesDate': '1995-06-25 00:00:00.0',\n",
" 'SeriesDescription': 'AXIAL',\n",
" 'SeriesInstanceUID': '1.3.6.1.4.1.14519.5.2.1.3344.4008.164450824675944451114070841306',\n",
" 'SeriesNumber': 3,\n",
" 'SoftwareVersions': 'LightSpeedApps10.5_2.8.2I_H1.3M4',\n",
" 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.3344.4008.260454144139081692373465010706',\n",
" 'TimeStamp': '2013-10-07 22:53:09.0'}\n"
]
}
],
"source": [
"seriesList = client.getSeries(Collection='TCGA-LIHC', Modality='CT')\n",
"print(\"Total Number of CT-only Series: \" + str(len(seriesList)))\n",
"pprint(seriesList[0])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading to: /Users/bhklab/Documents/GitHub/NBIA-toolkit/docs/data\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading 100 series: 0%| | 0/100 [00:00<?, ?it/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading 100 series: 100%|██████████| 100/100 [01:29<00:00, 1.12it/s]\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# call client.downloadSeries() on each SeriesInstanceUID\n",
"import os\n",
"downloadDir = \"./data\"\n",
"os.makedirs(downloadDir, exist_ok=True)\n",
"print(\"Downloading to: \" + os.path.abspath(downloadDir))\n",
"\n",
"seriesUIDS = [s['SeriesInstanceUID'] for s in seriesList]\n",
"\n",
"client.downloadSeries(\n",
" seriesUIDS[0:100], \n",
" downloadDir, \n",
" filePattern=\"%PatientName/%SeriesNumber-%SeriesInstanceUID/%InstanceNumber-%SOPInstanceUID.dcm\",\n",
" overwrite=True, nParallel=8)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[01;34m./data\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A113\u001b[0m\n",
"│   ├── \u001b[01;34m1-70243\u001b[0m\n",
"│   ├── \u001b[01;34m1-75734\u001b[0m\n",
"│   ├── \u001b[01;34m2-23267\u001b[0m\n",
"│   ├── \u001b[01;34m2-86495\u001b[0m\n",
"│   ├── \u001b[01;34m3-36497\u001b[0m\n",
"│   ├── \u001b[01;34m3-55949\u001b[0m\n",
"│   ├── \u001b[01;34m4-83272\u001b[0m\n",
"│   ├── \u001b[01;34m5-32629\u001b[0m\n",
"│   └── \u001b[01;34m6-37190\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A11C\u001b[0m\n",
"│   ├── \u001b[01;34m1-01534\u001b[0m\n",
"│   ├── \u001b[01;34m1-70467\u001b[0m\n",
"│   ├── \u001b[01;34m2-24983\u001b[0m\n",
"│   ├── \u001b[01;34m2-58971\u001b[0m\n",
"│   ├── \u001b[01;34m3-12708\u001b[0m\n",
"│   ├── \u001b[01;34m3-68242\u001b[0m\n",
"│   ├── \u001b[01;34m4-43276\u001b[0m\n",
"│   ├── \u001b[01;34m4-87471\u001b[0m\n",
"│   ├── \u001b[01;34m5-11205\u001b[0m\n",
"│   ├── \u001b[01;34m5-40174\u001b[0m\n",
"│   ├── \u001b[01;34m6-31809\u001b[0m\n",
"│   ├── \u001b[01;34m6-77340\u001b[0m\n",
"│   ├── \u001b[01;34m7-69740\u001b[0m\n",
"│   └── \u001b[01;34m8-96817\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A11D\u001b[0m\n",
"│   ├── \u001b[01;34m1-18437\u001b[0m\n",
"│   ├── \u001b[01;34m101-16024\u001b[0m\n",
"│   ├── \u001b[01;34m102-04127\u001b[0m\n",
"│   ├── \u001b[01;34m2-57551\u001b[0m\n",
"│   ├── \u001b[01;34m3-79698\u001b[0m\n",
"│   └── \u001b[01;34m4-73368\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A1EE\u001b[0m\n",
"│   ├── \u001b[01;34m1-36537\u001b[0m\n",
"│   ├── \u001b[01;34m1-84927\u001b[0m\n",
"│   ├── \u001b[01;34m2-03522\u001b[0m\n",
"│   ├── \u001b[01;34m2-22115\u001b[0m\n",
"│   ├── \u001b[01;34m2-44020\u001b[0m\n",
"│   ├── \u001b[01;34m3-21563\u001b[0m\n",
"│   ├── \u001b[01;34m3-30320\u001b[0m\n",
"│   ├── \u001b[01;34m4-17731\u001b[0m\n",
"│   ├── \u001b[01;34m4-76883\u001b[0m\n",
"│   ├── \u001b[01;34m5-67388\u001b[0m\n",
"│   ├── \u001b[01;34m5-93966\u001b[0m\n",
"│   ├── \u001b[01;34m6-29405\u001b[0m\n",
"│   └── \u001b[01;34m6-91131\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A39V\u001b[0m\n",
"│   ├── \u001b[01;34m1-48712\u001b[0m\n",
"│   ├── \u001b[01;34m1-65145\u001b[0m\n",
"│   ├── \u001b[01;34m2-91799\u001b[0m\n",
"│   └── \u001b[01;34m2-99457\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A39X\u001b[0m\n",
"│   ├── \u001b[01;34m1-94864\u001b[0m\n",
"│   ├── \u001b[01;34m2-47484\u001b[0m\n",
"│   └── \u001b[01;34m3-64527\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A39Y\u001b[0m\n",
"│   ├── \u001b[01;34m1-16594\u001b[0m\n",
"│   ├── \u001b[01;34m1-53399\u001b[0m\n",
"│   ├── \u001b[01;34m2-20075\u001b[0m\n",
"│   └── \u001b[01;34m2-28229\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A3A0\u001b[0m\n",
"│   ├── \u001b[01;34m1-06368\u001b[0m\n",
"│   ├── \u001b[01;34m1-40120\u001b[0m\n",
"│   ├── \u001b[01;34m1-98172\u001b[0m\n",
"│   ├── \u001b[01;34m2-27146\u001b[0m\n",
"│   ├── \u001b[01;34m2-34395\u001b[0m\n",
"│   ├── \u001b[01;34m2-68690\u001b[0m\n",
"│   ├── \u001b[01;34m3-29588\u001b[0m\n",
"│   ├── \u001b[01;34m4-88607\u001b[0m\n",
"│   ├── \u001b[01;34m5-33425\u001b[0m\n",
"│   └── \u001b[01;34m7-30967\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A3A1\u001b[0m\n",
"│   ├── \u001b[01;34m1-42245\u001b[0m\n",
"│   ├── \u001b[01;34m1-92117\u001b[0m\n",
"│   ├── \u001b[01;34m2-10916\u001b[0m\n",
"│   ├── \u001b[01;34m2-17869\u001b[0m\n",
"│   ├── \u001b[01;34m3-43861\u001b[0m\n",
"│   └── \u001b[01;34m3-45654\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A3A4\u001b[0m\n",
"│   ├── \u001b[01;34m1-06677\u001b[0m\n",
"│   ├── \u001b[01;34m1-08320\u001b[0m\n",
"│   ├── \u001b[01;34m1-30551\u001b[0m\n",
"│   ├── \u001b[01;34m1-33518\u001b[0m\n",
"│   ├── \u001b[01;34m1-69911\u001b[0m\n",
"│   ├── \u001b[01;34m1-76003\u001b[0m\n",
"│   ├── \u001b[01;34m1-82190\u001b[0m\n",
"│   ├── \u001b[01;34m1-98020\u001b[0m\n",
"│   ├── \u001b[01;34m2-21357\u001b[0m\n",
"│   ├── \u001b[01;34m2-30939\u001b[0m\n",
"│   ├── \u001b[01;34m2-40760\u001b[0m\n",
"│   ├── \u001b[01;34m2-71527\u001b[0m\n",
"│   ├── \u001b[01;34m2-74041\u001b[0m\n",
"│   ├── \u001b[01;34m2-97891\u001b[0m\n",
"│   └── \u001b[01;34m3-98547\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A3A6\u001b[0m\n",
"│   ├── \u001b[01;34m1-873.8\u001b[0m\n",
"│   └── \u001b[01;34m1-87348\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A3A7\u001b[0m\n",
"│   ├── \u001b[01;34m1-20117\u001b[0m\n",
"│   ├── \u001b[01;34m1-24262\u001b[0m\n",
"│   ├── \u001b[01;34m1-40860\u001b[0m\n",
"│   ├── \u001b[01;34m2-38868\u001b[0m\n",
"│   ├── \u001b[01;34m2-60477\u001b[0m\n",
"│   └── \u001b[01;34m2-78991\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A3A8\u001b[0m\n",
"│   ├── \u001b[01;34m1-47925\u001b[0m\n",
"│   └── \u001b[01;34m2-71947\u001b[0m\n",
"├── \u001b[01;34mTCGA-DD-A3A9\u001b[0m\n",
"│   ├── \u001b[01;34m1-28475\u001b[0m\n",
"│   ├── \u001b[01;34m2-67719\u001b[0m\n",
"│   └── \u001b[01;34m3-41306\u001b[0m\n",
"└── \u001b[01;34mTCGA-DD-A4NK\u001b[0m\n",
" ├── \u001b[01;34m1-75495\u001b[0m\n",
" ├── \u001b[01;34m2-32574\u001b[0m\n",
" └── \u001b[01;34m3-15155\u001b[0m\n",
"\n",
"116 directories, 0 files\n"
]
}
],
"source": [
"!tree -L 2 ./data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "nbia",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit f477c4a

Please sign in to comment.