Skip to content

Commit

Permalink
add download Image
Browse files Browse the repository at this point in the history
  • Loading branch information
Jermiah committed Nov 20, 2023
1 parent fd36a2d commit f6860b8
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 56 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,7 @@ dmypy.json
# Any Data
tciaDownload*
sandbox*
old_src
old_src
resources/*
dicomsort.py
NBIA-toolkit.code-workspace
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
[![PyTests](https://github.com/jjjermiah/NBIA-toolkit/actions/workflows/main.yml/badge.svg)](https://github.com/jjjermiah/NBIA-toolkit/actions/workflows/main.yml)

# none of this works yet lol but it will soon
# NBIA Toolkit
- Packaged code to access the NBIA REST API

See the [Wiki](https://github.com/jjjermiah/NBIA-toolkit/wiki) for more information.



### none of this works yet lol but it will soon
# nbiatoolkit

A python package to query the National Biomedical Imaging Archive (NBIA) database.
Expand Down
110 changes: 85 additions & 25 deletions docs/example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,9 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-11-19 17:45:24,124 | NBIAClient | INFO | Setting up OAuth2 client... with username nbia_guest\n",
"2023-11-19 17:45:24,124 | NBIAClient | INFO | Setting up OAuth2 client... with username nbia_guest\n"
]
}
],
"outputs": [],
"source": [
"from nbiatoolkit import NBIAClient\n",
"\n",
Expand All @@ -46,7 +37,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -71,37 +62,106 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pprint import pprint\n",
"bodypart_count = client.getBodyPartCounts()\n",
"print(\"Total Number of Collections:\" + str(len(bodypart_count)))\n",
"\n",
"print(\"First 5 body parts:\")\n",
"pprint(bodypart_count[0:5])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# add /home/bioinf/bhklab/jermiah/projects/NBIA-toolkit/src to path\n",
"import sys, os\n",
"import requests\n",
"sys.path.append(os.path.abspath(\"/home/bioinf/bhklab/jermiah/projects/NBIA-toolkit/src\"))\n",
"\n",
"from nbiatoolkit.utils import NBIA_ENDPOINTS\n",
"import pandas as pd\n",
"from pprint import pprint"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"response = client.getSeries(Collection=\"4D-Lung\")\n",
"pprint(response[0:5])\n",
"# df = pd.DataFrame(response)\n",
"# df.head()"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-11-19 17:42:42,892 | NBIAClient | INFO | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/getBodyPartValuesAndCounts\n"
"2023-11-20 16:08:28,847 | NBIAClient | INFO | Setting up OAuth2 client... with username nbia_guest\n",
"2023-11-20 16:08:29,136 | NBIAClient | INFO | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeries\n",
"2023-11-20 16:08:29,136 | NBIAClient | DEBUG | API headers: 'Authorization:Bearer eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJkZGFhMGY3YS1kZTBmLTRkYWQtYjM1ZS05MjljYjBiMTY3YjgifQ.eyJleHAiOjE3MDA1MDM3MDksImlhdCI6MTcwMDQ5NjUwOSwianRpIjoiNGIwNzQ2ZGYtN2VkNS00ZTllLTg5NGQtZDgzZWIxMTViOWNlIiwiaXNzIjoiaHR0cHM6Ly9rZXljbG9hay5kYm1pLmNsb3VkL2F1dGgvcmVhbG1zL1RDSUEiLCJhdWQiOiJhY2NvdW50Iiwic3ViIjoiZjowMTliNTYzNC1kYWJkLTQyMTEtYTQxZC03MjNjNDRhZmNmZmQ6bmJpYV9ndWVzdCIsInR5cCI6IkJlYXJlciIsImF6cCI6Im5iaWEiLCJzZXNzaW9uX3N0YXRlIjoiNWZjNjZlMzMtMjM0ZS00OTNiLWI1NGMtOWVkYjMzYjMwYmRmIiwiYWNyIjoiMSIsImFsbG93ZWQtb3JpZ2lucyI6WyJodHRwczovL3NlcnZpY2VzLmNhbmNlcmltYWdpbmdlYXJjaGl2ZS5uZXQiLCJodHRwczovL25iaWEuY2FuY2VyaW1hZ2luZ2VhcmNoaXZlLm5ldCIsImh0dHBzOi8vd3d3LmNhbmNlcmltYWdpbmdlYXJjaGl2ZS5uZXQiLCIqIiwiaHR0cDovL3RjaWEtbmJpYS0yLmFkLnVhbXMuZWR1OjQ1MjEwIiwiaHR0cHM6Ly9jYW5jZXJpbWFnaW5nZWFyY2hpdmUubmV0IiwiaHR0cDovL3RjaWEtbmJpYS0xLmFkLnVhbXMuZWR1OjQ1MjEwIiwiaHR0cHM6Ly9wdWJsaWMuY2FuY2VyaW1hZ2luZ2VhcmNoaXZlLm5ldCJdLCJyZWFsbV9hY2Nlc3MiOnsicm9sZXMiOlsib2ZmbGluZV9hY2Nlc3MiLCJkZWZhdWx0LXJvbGVzLXRjaWEiLCJ1bWFfYXV0aG9yaXphdGlvbiJdfSwicmVzb3VyY2VfYWNjZXNzIjp7ImFjY291bnQiOnsicm9sZXMiOlsibWFuYWdlLWFjY291bnQiLCJtYW5hZ2UtYWNjb3VudC1saW5rcyIsInZpZXctcHJvZmlsZSJdfX0sInNjb3BlIjoib3BlbmlkIHByb2ZpbGUgZW1haWwiLCJzaWQiOiI1ZmM2NmUzMy0yMzRlLTQ5M2ItYjU0Yy05ZWRiMzNiMzBiZGYiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwibmFtZSI6Ik5CSUEgR3Vlc3QiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJuYmlhX2d1ZXN0IiwiZ2l2ZW5fbmFtZSI6Ik5CSUEiLCJmYW1pbHlfbmFtZSI6Ikd1ZXN0IiwiZW1haWwiOiJuYmlhX2d1ZXN0QGNhbmNlcmltYWdpbmdhcmNoaXZlLm5ldCJ9.7nI2jqCgAu646UGEp0uQWG5vxu_K_9dGGHHEcrgGMnE' -k 'https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeries?Collection=4D-Lung&params={'Collection': '4D-Lung', 'params': {...}}'\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total Number of Collections:98\n",
"First 5 body parts:\n",
"[{'BodyPartExamined': 'NOT SPECIFIED', 'Count': '8490'},\n",
" {'BodyPartExamined': 'ABDOMEN', 'Count': '2968'},\n",
" {'BodyPartExamined': 'ABDOMEN CAVIT', 'Count': '2'},\n",
" {'BodyPartExamined': 'ABDOMENPELVIC', 'Count': '2'},\n",
" {'BodyPartExamined': 'ABDOMENPELVIS', 'Count': '51'}]\n"
"{'BodyPartExamined': 'LUNG',\n",
" 'Collection': '4D-Lung',\n",
" 'CollectionURI': 'https://doi.org/10.7937/K9/TCIA.2016.ELN8YGLE',\n",
" 'FileSize': 26405988,\n",
" 'ImageCount': 50,\n",
" 'LicenseName': 'Creative Commons Attribution 3.0 Unported License',\n",
" 'LicenseURI': 'http://creativecommons.org/licenses/by/3.0/',\n",
" 'Manufacturer': 'Varian Imaging Laboratories, Switzerland',\n",
" 'ManufacturerModelName': 'Trilogy Cone Beam CT',\n",
" 'Modality': 'CT',\n",
" 'PatientID': '100_HM10395',\n",
" 'ProtocolName': '5.1 4DCT & ITV FB + 4D + INSP/EXP',\n",
" 'SeriesDate': '1997-10-03 00:00:00.0',\n",
" 'SeriesDescription': 'P4^P100^S113^I0, Gated, 70.0%',\n",
" 'SeriesInstanceUID': '1.3.6.1.4.1.14519.5.2.1.6834.5010.189721824525842725510380467695',\n",
" 'SeriesNumber': 507,\n",
" 'SoftwareVersions': 'LightSpeedverrel',\n",
" 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.6834.5010.552215730027211807644647167706',\n",
" 'TimeStamp': '2015-07-20 17:58:54.0'}\n",
"<class 'dict'>\n",
"\n",
"v2/getImageWithMD5Hash\n",
"<Response [200]>\n"
]
}
],
"source": [
"from nbiatoolkit import NBIAClient\n",
"import requests\n",
"from pprint import pprint\n",
"bodypart_count = client.getBodyPartCounts()\n",
"print(\"Total Number of Collections:\" + str(len(bodypart_count)))\n",
"import io, zipfile\n",
"\n",
"print(\"First 5 body parts:\")\n",
"pprint(bodypart_count[0:5])"
"client = NBIAClient(log_level=\"DEBUG\")\n",
"series = client.getSeries(Collection=\"4D-Lung\")\n",
"pprint(series[0])\n",
"print(type(series[0]))\n",
"print()\n",
"response = client.downloadSeries(series[0][\"SeriesInstanceUID\"])\n",
"pprint(response)\n",
"\n",
"file = zipfile.ZipFile(io.BytesIO(response.content))\n",
"file.extractall(path = \".\")"
]
},
{
Expand Down
20 changes: 11 additions & 9 deletions driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
import requests
from pprint import pprint

client = NBIAClient(log_level="INFO")
client = NBIAClient(log_level="DEBUG")
# series = client.getSeries(Collection="4D-Lung")
# pprint(series[0])
# print(type(series[0]))
# print()

# response = client.getPatients(collection="LIDC-IDRI", modality="CT")

# pprint(response[0:5])


response = client.getPatients(collection="LIDC-IDRI", modality="MRI")

pprint(response[0:5])
# series0 = series[0]['SeriesInstanceUID']
series0 = '1.3.6.1.4.1.14519.5.2.1.6834.5010.189721824525842725510380467695'
response = client.downloadSeries(
SeriesInstanceUID = series0,
downloadDir = "/home/bioinf/bhklab/jermiah/projects/NBIA-toolkit/resources")
# pprint(response)
13 changes: 2 additions & 11 deletions src/nbiatoolkit/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,7 @@ class OAuth2:
>>> oauth = OAuth2()
To use a custom account:
>>> oauth = OAuth2(username="my_username", password="my_password")
From the REST API documentation, you need the Authentication
headers to access the API. You can get the headers by calling:
>>> api_headers = oauth.getToken()
You can then use these headers with the `requests` library to
access the API:
>>> requests.get(url=query_url, headers=api_headers)
"""

def __init__(self, username: str = "nbia_guest", password: str = "", client_id: str = "NBIA"):
Expand Down Expand Up @@ -123,8 +115,7 @@ def getToken(self):
self.access_token = token_data.get('access_token')

self.api_headers = {
'Authorization': f'Bearer {self.access_token}',
'Accept': 'application/json'
'Authorization':f'Bearer {self.access_token}'
}

self.expiry_time = time.ctime(time.time() + token_data.get('expires_in'))
Expand Down
101 changes: 96 additions & 5 deletions src/nbiatoolkit/nbia.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ class NBIAClient:
TODO:: Add docstring
FIXME:: logger prints duplicate logs if you instantiate the class more than once
"""



def __init__(self,
username: str = "nbia_guest",
password: str = "",
Expand Down Expand Up @@ -38,7 +37,12 @@ def query_api(self, endpoint: NBIA_ENDPOINTS, params: dict = {}) -> dict:
headers=self.api_headers,
params=params
)
response = response.json()
# Check if response is likely to be JSON
if response.headers.get('Content-Type') == 'application/json':
response_data = response.json()
else:
# If response is binary data, return raw response
response_data = response.content
except JSONDecodeError as j:
if (response.text==""):
self.logger.error("Response text is empty.")
Expand All @@ -49,8 +53,7 @@ def query_api(self, endpoint: NBIA_ENDPOINTS, params: dict = {}) -> dict:
self.logger.error("Error querying API: %s", e)
raise e


return response
return response_data

def _createDebugURL(self, endpoint, params):
auth = "'Authorization:" + self.api_headers["Authorization"] + "' -k "
Expand Down Expand Up @@ -107,3 +110,91 @@ def getPatients(self, collection: str, modality: str) -> list:
patientList = [_["PatientId"] for _ in response]
return patientList

def getSeries(self,
Collection: str = "",
PatientID: str = "",
StudyInstanceUID: str = "",
Modality: str = "",
SeriesInstanceUID: str = "",
BodyPartExamined: str = "",
ManufacturerModelName: str = "",
Manufacturer: str = "",
) -> list:

params = dict()

for key, value in locals().items():
if (value != "") and (key != "self"):
params[key] = value


response = self.query_api(
endpoint = NBIA_ENDPOINTS.GET_SERIES,
params = params)

return response

def downloadSeries(self,
SeriesInstanceUID: str,
downloadDir: str,
) -> list:

import io, zipfile, os

params = dict()
params["SeriesInstanceUID"] = SeriesInstanceUID


response = self.query_api(
endpoint = NBIA_ENDPOINTS.DOWNLOAD_SERIES,
params = params)

if isinstance(response, bytes):
file = zipfile.ZipFile(io.BytesIO(response))
seriesDir = os.path.join(downloadDir, SeriesInstanceUID)
file.extractall(path=seriesDir)

self.validateMD5(seriesDir=seriesDir)
else:
# Handle the case where the expected binary data is not received
# Log error or raise an exception
pass

return response


def _calculateMD5(self,
filepath: str
) -> str:

import hashlib
hash_md5 = hashlib.md5()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()

def validateMD5(self,
seriesDir: str
) -> bool:
import os
md5File = os.path.join(seriesDir, "md5hashes.csv")
assert os.path.isfile(md5File), "MD5 hash file not found in download directory."

with open(md5File, "r") as f:
lines = f.readlines()

for line in lines[1:]:
filepath = os.path.join(seriesDir, line.split(",")[0])
if not os.path.isfile(filepath):
print(f"File not found in seriesDir: {filepath}")
return False

md5hash = line.split(",")[1].strip().lower()
md5 = self._calculateMD5(filepath)

assert md5 == md5hash, f"MD5 hash mismatch for file: {filepath}"
# delete the md5 file if all hashes match
os.remove(md5File)
return True

22 changes: 20 additions & 2 deletions src/nbiatoolkit/utils/nbia_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,27 @@ class NBIA_ENDPOINTS(Enum):
GET_COLLECTION_PATIENT_COUNT = 'getCollectionValuesAndCounts'
GET_COLLECTIONS = 'v2/getCollectionValues'
GET_BODY_PART_PATIENT_COUNT = 'getBodyPartValuesAndCounts'
GET_PATIENT_BY_COLLECTION_AND_MODALITY = 'v2/getPatientByCollectionAndModality'
GET_PATIENT_BY_COLLECTION_AND_MODALITY = 'v2/getPatientByCollectionAndModality'
GET_SERIES = 'v2/getSeries'

DOWNLOAD_SERIES = 'v2/getImageWithMD5Hash'

# TIMES OUT???
GET_SERIES_SIZE = 'v2/getSeriesSize'
GET_UPDATED_SERIES = 'v2/getUpdatedSeries'

# curl -H 'Authorization:Bearer eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJkZGFhMGY3YS1kZTBmLTRkYWQtYjM1ZS05MjljYjBiMTY3YjgifQ.eyJleHAiOjE3MDA1MDI1MzksImlhdCI6MTcwMDQ5NTMzOSwianRpIjoiYmY0NjgyNDktYjU4ZS00MTM2LTllYTQtOWE2NjkzOTVhZjQxIiwiaXNzIjoiaHR0cHM6Ly9rZXljbG9hay5kYm1pLmNsb3VkL2F1dGgvcmVhbG1zL1RDSUEiLCJhdWQiOiJhY2NvdW50Iiwic3ViIjoiZjowMTliNTYzNC1kYWJkLTQyMTEtYTQxZC03MjNjNDRhZmNmZmQ6bmJpYV9ndWVzdCIsInR5cCI6IkJlYXJlciIsImF6cCI6Im5iaWEiLCJzZXNzaW9uX3N0YXRlIjoiMjgzZDc0MjYtZGE1Yi00NTExLWI2MzEtN2YyMzY5YjA2MmU0IiwiYWNyIjoiMSIsImFsbG93ZWQtb3JpZ2lucyI6WyJodHRwczovL3NlcnZpY2VzLmNhbmNlcmltYWdpbmdlYXJjaGl2ZS5uZXQiLCJodHRwczovL25iaWEuY2FuY2VyaW1hZ2luZ2VhcmNoaXZlLm5ldCIsImh0dHBzOi8vd3d3LmNhbmNlcmltYWdpbmdlYXJjaGl2ZS5uZXQiLCIqIiwiaHR0cDovL3RjaWEtbmJpYS0yLmFkLnVhbXMuZWR1OjQ1MjEwIiwiaHR0cHM6Ly9jYW5jZXJpbWFnaW5nZWFyY2hpdmUubmV0IiwiaHR0cDovL3RjaWEtbmJpYS0xLmFkLnVhbXMuZWR1OjQ1MjEwIiwiaHR0cHM6Ly9wdWJsaWMuY2FuY2VyaW1hZ2luZ2VhcmNoaXZlLm5ldCJdLCJyZWFsbV9hY2Nlc3MiOnsicm9sZXMiOlsib2ZmbGluZV9hY2Nlc3MiLCJkZWZhdWx0LXJvbGVzLXRjaWEiLCJ1bWFfYXV0aG9yaXphdGlvbiJdfSwicmVzb3VyY2VfYWNjZXNzIjp7ImFjY291bnQiOnsicm9sZXMiOlsibWFuYWdlLWFjY291bnQiLCJtYW5hZ2UtYWNjb3VudC1saW5rcyIsInZpZXctcHJvZmlsZSJdfX0sInNjb3BlIjoib3BlbmlkIHByb2ZpbGUgZW1haWwiLCJzaWQiOiIyODNkNzQyNi1kYTViLTQ1MTEtYjYzMS03ZjIzNjliMDYyZTQiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwibmFtZSI6Ik5CSUEgR3Vlc3QiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJuYmlhX2d1ZXN0IiwiZ2l2ZW5fbmFtZSI6Ik5CSUEiLCJmYW1pbHlfbmFtZSI6Ikd1ZXN0IiwiZW1haWwiOiJuYmlhX2d1ZXN0QGNhbmNlcmltYWdpbmdhcmNoaXZlLm5ldCJ9.YIrmcZcDT9w22diON2bFxDVcY1-BU59FwcklsnYHBT0' -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesMetaData?SeriesInstanceUID=1.3.6.1.4.1.9590.100.1.2.374115997511889073021386151921807063992"

# https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesMetaData?SeriesInstanceUID=1.3.6.1.4.1.9590.100.1.2.374115997511889073021386151921807063992

# https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesSize?SeriesInstanceUID=1.3.6.1.4.1.9590.100.1.2.374115997511889073021386151921807063992

# curl -H "Authorization:Bearer YOUR_ACCESS_TOKEN" -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeries"
# curl -H "Authorization:Bearer YOUR_ACCESS_TOKEN" -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesMetaData"
# curl -H "Authorization:Bearer YOUR_ACCESS_TOKEN" -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesSize"

# curl -H "Authorization:Bearer YOUR_ACCESS_TOKEN" -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getUpdatedSeries"


# Helper functions
def __str__(self):
return self.value
Expand Down

0 comments on commit f6860b8

Please sign in to comment.