add download Image

jjjermiah · Nov 20, 2023 · f6860b8 · f6860b8
1 parent fd36a2d
commit f6860b8
Show file tree

Hide file tree

Showing 7 changed files with 219 additions and 56 deletions.
diff --git a/.gitignore b/.gitignore
@@ -133,4 +133,7 @@ dmypy.json
 # Any Data
 tciaDownload*
 sandbox*
-old_src
+old_src
+resources/*
+dicomsort.py
+NBIA-toolkit.code-workspace
diff --git a/README.md b/README.md
@@ -1,13 +1,11 @@
 [![PyTests](https://github.com/jjjermiah/NBIA-toolkit/actions/workflows/main.yml/badge.svg)](https://github.com/jjjermiah/NBIA-toolkit/actions/workflows/main.yml)
 
+# none of this works yet lol but it will soon
 # NBIA Toolkit 
 - Packaged code to access the NBIA REST API 
 
 See the [Wiki](https://github.com/jjjermiah/NBIA-toolkit/wiki) for more information.
 
-
-
-### none of this works yet lol but it will soon
 # nbiatoolkit
 
 A python package to query the National Biomedical Imaging Archive (NBIA) database.

diff --git a/docs/example.ipynb b/docs/example.ipynb
@@ -25,18 +25,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2023-11-19 17:45:24,124 | NBIAClient | INFO | Setting up OAuth2 client... with username nbia_guest\n",
-      "2023-11-19 17:45:24,124 | NBIAClient | INFO | Setting up OAuth2 client... with username nbia_guest\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from nbiatoolkit import NBIAClient\n",
     "\n",
@@ -46,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -71,37 +62,106 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pprint import pprint\n",
+    "bodypart_count = client.getBodyPartCounts()\n",
+    "print(\"Total Number of Collections:\" + str(len(bodypart_count)))\n",
+    "\n",
+    "print(\"First 5 body parts:\")\n",
+    "pprint(bodypart_count[0:5])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add /home/bioinf/bhklab/jermiah/projects/NBIA-toolkit/src to path\n",
+    "import sys, os\n",
+    "import requests\n",
+    "sys.path.append(os.path.abspath(\"/home/bioinf/bhklab/jermiah/projects/NBIA-toolkit/src\"))\n",
+    "\n",
+    "from nbiatoolkit.utils import NBIA_ENDPOINTS\n",
+    "import pandas as pd\n",
+    "from pprint import pprint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = client.getSeries(Collection=\"4D-Lung\")\n",
+    "pprint(response[0:5])\n",
+    "# df = pd.DataFrame(response)\n",
+    "# df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-11-19 17:42:42,892 | NBIAClient | INFO | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/getBodyPartValuesAndCounts\n"
+      "2023-11-20 16:08:28,847 | NBIAClient | INFO | Setting up OAuth2 client... with username nbia_guest\n",
+      "2023-11-20 16:08:29,136 | NBIAClient | INFO | Querying API endpoint: https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeries\n",
+      "2023-11-20 16:08:29,136 | NBIAClient | DEBUG | API headers: 'Authorization:Bearer eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJkZGFhMGY3YS1kZTBmLTRkYWQtYjM1ZS05MjljYjBiMTY3YjgifQ.eyJleHAiOjE3MDA1MDM3MDksImlhdCI6MTcwMDQ5NjUwOSwianRpIjoiNGIwNzQ2ZGYtN2VkNS00ZTllLTg5NGQtZDgzZWIxMTViOWNlIiwiaXNzIjoiaHR0cHM6Ly9rZXljbG9hay5kYm1pLmNsb3VkL2F1dGgvcmVhbG1zL1RDSUEiLCJhdWQiOiJhY2NvdW50Iiwic3ViIjoiZjowMTliNTYzNC1kYWJkLTQyMTEtYTQxZC03MjNjNDRhZmNmZmQ6bmJpYV9ndWVzdCIsInR5cCI6IkJlYXJlciIsImF6cCI6Im5iaWEiLCJzZXNzaW9uX3N0YXRlIjoiNWZjNjZlMzMtMjM0ZS00OTNiLWI1NGMtOWVkYjMzYjMwYmRmIiwiYWNyIjoiMSIsImFsbG93ZWQtb3JpZ2lucyI6WyJodHRwczovL3NlcnZpY2VzLmNhbmNlcmltYWdpbmdlYXJjaGl2ZS5uZXQiLCJodHRwczovL25iaWEuY2FuY2VyaW1hZ2luZ2VhcmNoaXZlLm5ldCIsImh0dHBzOi8vd3d3LmNhbmNlcmltYWdpbmdlYXJjaGl2ZS5uZXQiLCIqIiwiaHR0cDovL3RjaWEtbmJpYS0yLmFkLnVhbXMuZWR1OjQ1MjEwIiwiaHR0cHM6Ly9jYW5jZXJpbWFnaW5nZWFyY2hpdmUubmV0IiwiaHR0cDovL3RjaWEtbmJpYS0xLmFkLnVhbXMuZWR1OjQ1MjEwIiwiaHR0cHM6Ly9wdWJsaWMuY2FuY2VyaW1hZ2luZ2VhcmNoaXZlLm5ldCJdLCJyZWFsbV9hY2Nlc3MiOnsicm9sZXMiOlsib2ZmbGluZV9hY2Nlc3MiLCJkZWZhdWx0LXJvbGVzLXRjaWEiLCJ1bWFfYXV0aG9yaXphdGlvbiJdfSwicmVzb3VyY2VfYWNjZXNzIjp7ImFjY291bnQiOnsicm9sZXMiOlsibWFuYWdlLWFjY291bnQiLCJtYW5hZ2UtYWNjb3VudC1saW5rcyIsInZpZXctcHJvZmlsZSJdfX0sInNjb3BlIjoib3BlbmlkIHByb2ZpbGUgZW1haWwiLCJzaWQiOiI1ZmM2NmUzMy0yMzRlLTQ5M2ItYjU0Yy05ZWRiMzNiMzBiZGYiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwibmFtZSI6Ik5CSUEgR3Vlc3QiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJuYmlhX2d1ZXN0IiwiZ2l2ZW5fbmFtZSI6Ik5CSUEiLCJmYW1pbHlfbmFtZSI6Ikd1ZXN0IiwiZW1haWwiOiJuYmlhX2d1ZXN0QGNhbmNlcmltYWdpbmdhcmNoaXZlLm5ldCJ9.7nI2jqCgAu646UGEp0uQWG5vxu_K_9dGGHHEcrgGMnE' -k 'https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeries?Collection=4D-Lung&params={'Collection': '4D-Lung', 'params': {...}}'\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Total Number of Collections:98\n",
-      "First 5 body parts:\n",
-      "[{'BodyPartExamined': 'NOT SPECIFIED', 'Count': '8490'},\n",
-      " {'BodyPartExamined': 'ABDOMEN', 'Count': '2968'},\n",
-      " {'BodyPartExamined': 'ABDOMEN CAVIT', 'Count': '2'},\n",
-      " {'BodyPartExamined': 'ABDOMENPELVIC', 'Count': '2'},\n",
-      " {'BodyPartExamined': 'ABDOMENPELVIS', 'Count': '51'}]\n"
+      "{'BodyPartExamined': 'LUNG',\n",
+      " 'Collection': '4D-Lung',\n",
+      " 'CollectionURI': 'https://doi.org/10.7937/K9/TCIA.2016.ELN8YGLE',\n",
+      " 'FileSize': 26405988,\n",
+      " 'ImageCount': 50,\n",
+      " 'LicenseName': 'Creative Commons Attribution 3.0 Unported License',\n",
+      " 'LicenseURI': 'http://creativecommons.org/licenses/by/3.0/',\n",
+      " 'Manufacturer': 'Varian Imaging Laboratories, Switzerland',\n",
+      " 'ManufacturerModelName': 'Trilogy Cone Beam CT',\n",
+      " 'Modality': 'CT',\n",
+      " 'PatientID': '100_HM10395',\n",
+      " 'ProtocolName': '5.1 4DCT & ITV FB + 4D + INSP/EXP',\n",
+      " 'SeriesDate': '1997-10-03 00:00:00.0',\n",
+      " 'SeriesDescription': 'P4^P100^S113^I0, Gated, 70.0%',\n",
+      " 'SeriesInstanceUID': '1.3.6.1.4.1.14519.5.2.1.6834.5010.189721824525842725510380467695',\n",
+      " 'SeriesNumber': 507,\n",
+      " 'SoftwareVersions': 'LightSpeedverrel',\n",
+      " 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.6834.5010.552215730027211807644647167706',\n",
+      " 'TimeStamp': '2015-07-20 17:58:54.0'}\n",
+      "<class 'dict'>\n",
+      "\n",
+      "v2/getImageWithMD5Hash\n",
+      "<Response [200]>\n"
      ]
     }
    ],
    "source": [
+    "from nbiatoolkit import NBIAClient\n",
+    "import requests\n",
     "from pprint import pprint\n",
-    "bodypart_count = client.getBodyPartCounts()\n",
-    "print(\"Total Number of Collections:\" + str(len(bodypart_count)))\n",
+    "import io, zipfile\n",
     "\n",
-    "print(\"First 5 body parts:\")\n",
-    "pprint(bodypart_count[0:5])"
+    "client = NBIAClient(log_level=\"DEBUG\")\n",
+    "series = client.getSeries(Collection=\"4D-Lung\")\n",
+    "pprint(series[0])\n",
+    "print(type(series[0]))\n",
+    "print()\n",
+    "response = client.downloadSeries(series[0][\"SeriesInstanceUID\"])\n",
+    "pprint(response)\n",
+    "\n",
+    "file = zipfile.ZipFile(io.BytesIO(response.content))\n",
+    "file.extractall(path = \".\")"
    ]
   },
   {

diff --git a/driver.py b/driver.py
@@ -2,13 +2,15 @@
 import requests
 from pprint import pprint
 
-client = NBIAClient(log_level="INFO")
+client = NBIAClient(log_level="DEBUG")
+# series = client.getSeries(Collection="4D-Lung")
+# pprint(series[0])
+# print(type(series[0]))
+# print()
 
-# response = client.getPatients(collection="LIDC-IDRI", modality="CT")
-
-# pprint(response[0:5])
-
-
-response = client.getPatients(collection="LIDC-IDRI", modality="MRI")
-
-pprint(response[0:5])
+# series0 = series[0]['SeriesInstanceUID']
+series0 = '1.3.6.1.4.1.14519.5.2.1.6834.5010.189721824525842725510380467695'
+response = client.downloadSeries(
+    SeriesInstanceUID = series0,
+    downloadDir = "/home/bioinf/bhklab/jermiah/projects/NBIA-toolkit/resources")
+# pprint(response)
diff --git a/src/nbiatoolkit/auth.py b/src/nbiatoolkit/auth.py
@@ -48,15 +48,7 @@ class OAuth2:
     >>> oauth = OAuth2()
     To use a custom account:
     >>> oauth = OAuth2(username="my_username", password="my_password")
-    
-    From the REST API documentation, you need the Authentication
-    headers to access the API. You can get the headers by calling:
-    >>> api_headers = oauth.getToken()
-    
-    You can then use these headers with the `requests` library to 
-    access the API:
-    >>> requests.get(url=query_url, headers=api_headers)
-       
+
     """
 
     def __init__(self, username: str = "nbia_guest", password: str = "", client_id: str = "NBIA"):
@@ -123,8 +115,7 @@ def getToken(self):
         self.access_token = token_data.get('access_token')
 
         self.api_headers = {
-            'Authorization': f'Bearer {self.access_token}',
-            'Accept': 'application/json'
+            'Authorization':f'Bearer {self.access_token}'
         }
 
         self.expiry_time = time.ctime(time.time() + token_data.get('expires_in'))

diff --git a/src/nbiatoolkit/nbia.py b/src/nbiatoolkit/nbia.py
@@ -8,8 +8,7 @@ class NBIAClient:
     TODO:: Add docstring
     FIXME:: logger prints duplicate logs if you instantiate the class more than once
     """
-
-
+
     def __init__(self, 
                  username: str = "nbia_guest", 
                  password: str = "",
@@ -38,7 +37,12 @@ def query_api(self, endpoint: NBIA_ENDPOINTS, params: dict = {}) -> dict:
                 headers=self.api_headers,
                 params=params
                 )
-            response = response.json()
+             # Check if response is likely to be JSON
+            if response.headers.get('Content-Type') == 'application/json':
+                response_data = response.json()
+            else:
+                # If response is binary data, return raw response
+                response_data = response.content
         except JSONDecodeError as j:
             if (response.text==""):
                 self.logger.error("Response text is empty.")
@@ -49,8 +53,7 @@ def query_api(self, endpoint: NBIA_ENDPOINTS, params: dict = {}) -> dict:
             self.logger.error("Error querying API: %s", e)
             raise e
 
-
-        return response
+        return response_data
 
     def _createDebugURL(self, endpoint, params):
         auth = "'Authorization:" + self.api_headers["Authorization"] + "' -k "
@@ -107,3 +110,91 @@ def getPatients(self, collection: str, modality: str) -> list:
         patientList = [_["PatientId"] for _ in response]
         return patientList
 
+    def getSeries(self,
+        Collection: str = "", 
+        PatientID: str = "",
+        StudyInstanceUID: str = "",
+        Modality: str = "",
+        SeriesInstanceUID: str = "",
+        BodyPartExamined: str = "",
+        ManufacturerModelName: str = "",
+        Manufacturer: str = "",
+        ) -> list:
+
+        params = dict()
+
+        for key, value in locals().items():
+            if (value != "") and (key != "self"):
+                params[key] = value
+
+
+        response = self.query_api(
+            endpoint = NBIA_ENDPOINTS.GET_SERIES,
+            params = params)
+
+        return response
+
+    def downloadSeries(self,
+        SeriesInstanceUID: str,
+        downloadDir: str,
+        ) -> list:
+
+        import io, zipfile, os
+
+        params = dict()
+        params["SeriesInstanceUID"] = SeriesInstanceUID
+
+
+        response = self.query_api(
+            endpoint = NBIA_ENDPOINTS.DOWNLOAD_SERIES,
+            params = params)
+
+        if isinstance(response, bytes):
+            file = zipfile.ZipFile(io.BytesIO(response))
+            seriesDir = os.path.join(downloadDir, SeriesInstanceUID)
+            file.extractall(path=seriesDir)
+
+            self.validateMD5(seriesDir=seriesDir)
+        else:
+        # Handle the case where the expected binary data is not received
+        # Log error or raise an exception
+            pass
+
+        return response
+
+
+    def _calculateMD5(self,
+        filepath: str
+        ) -> str:
+
+        import hashlib
+        hash_md5 = hashlib.md5()
+        with open(filepath, "rb") as f:
+            for chunk in iter(lambda: f.read(4096), b""):
+                hash_md5.update(chunk)
+        return hash_md5.hexdigest()
+
+    def validateMD5(self,
+        seriesDir: str
+        ) -> bool:
+        import os
+        md5File = os.path.join(seriesDir, "md5hashes.csv")
+        assert os.path.isfile(md5File), "MD5 hash file not found in download directory."
+
+        with open(md5File, "r") as f:
+            lines = f.readlines()
+
+        for line in lines[1:]:           
+            filepath = os.path.join(seriesDir, line.split(",")[0])
+            if not os.path.isfile(filepath):
+                print(f"File not found in seriesDir: {filepath}")
+                return False
+
+            md5hash = line.split(",")[1].strip().lower()
+            md5 = self._calculateMD5(filepath)
+
+            assert md5 == md5hash, f"MD5 hash mismatch for file: {filepath}"       
+        # delete the md5 file if all hashes match
+        os.remove(md5File)
+        return True
+
diff --git a/src/nbiatoolkit/utils/nbia_endpoints.py b/src/nbiatoolkit/utils/nbia_endpoints.py
@@ -9,9 +9,27 @@ class NBIA_ENDPOINTS(Enum):
     GET_COLLECTION_PATIENT_COUNT = 'getCollectionValuesAndCounts'
     GET_COLLECTIONS = 'v2/getCollectionValues'
     GET_BODY_PART_PATIENT_COUNT = 'getBodyPartValuesAndCounts'
-    GET_PATIENT_BY_COLLECTION_AND_MODALITY = 'v2/getPatientByCollectionAndModality'
+    GET_PATIENT_BY_COLLECTION_AND_MODALITY = 'v2/getPatientByCollectionAndModality'    
+    GET_SERIES = 'v2/getSeries'
+
+    DOWNLOAD_SERIES = 'v2/getImageWithMD5Hash'
+
+    # TIMES OUT???
+    GET_SERIES_SIZE = 'v2/getSeriesSize'
+    GET_UPDATED_SERIES = 'v2/getUpdatedSeries'
+
+    # curl -H 'Authorization:Bearer eyJhbGciOiJIUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJkZGFhMGY3YS1kZTBmLTRkYWQtYjM1ZS05MjljYjBiMTY3YjgifQ.eyJleHAiOjE3MDA1MDI1MzksImlhdCI6MTcwMDQ5NTMzOSwianRpIjoiYmY0NjgyNDktYjU4ZS00MTM2LTllYTQtOWE2NjkzOTVhZjQxIiwiaXNzIjoiaHR0cHM6Ly9rZXljbG9hay5kYm1pLmNsb3VkL2F1dGgvcmVhbG1zL1RDSUEiLCJhdWQiOiJhY2NvdW50Iiwic3ViIjoiZjowMTliNTYzNC1kYWJkLTQyMTEtYTQxZC03MjNjNDRhZmNmZmQ6bmJpYV9ndWVzdCIsInR5cCI6IkJlYXJlciIsImF6cCI6Im5iaWEiLCJzZXNzaW9uX3N0YXRlIjoiMjgzZDc0MjYtZGE1Yi00NTExLWI2MzEtN2YyMzY5YjA2MmU0IiwiYWNyIjoiMSIsImFsbG93ZWQtb3JpZ2lucyI6WyJodHRwczovL3NlcnZpY2VzLmNhbmNlcmltYWdpbmdlYXJjaGl2ZS5uZXQiLCJodHRwczovL25iaWEuY2FuY2VyaW1hZ2luZ2VhcmNoaXZlLm5ldCIsImh0dHBzOi8vd3d3LmNhbmNlcmltYWdpbmdlYXJjaGl2ZS5uZXQiLCIqIiwiaHR0cDovL3RjaWEtbmJpYS0yLmFkLnVhbXMuZWR1OjQ1MjEwIiwiaHR0cHM6Ly9jYW5jZXJpbWFnaW5nZWFyY2hpdmUubmV0IiwiaHR0cDovL3RjaWEtbmJpYS0xLmFkLnVhbXMuZWR1OjQ1MjEwIiwiaHR0cHM6Ly9wdWJsaWMuY2FuY2VyaW1hZ2luZ2VhcmNoaXZlLm5ldCJdLCJyZWFsbV9hY2Nlc3MiOnsicm9sZXMiOlsib2ZmbGluZV9hY2Nlc3MiLCJkZWZhdWx0LXJvbGVzLXRjaWEiLCJ1bWFfYXV0aG9yaXphdGlvbiJdfSwicmVzb3VyY2VfYWNjZXNzIjp7ImFjY291bnQiOnsicm9sZXMiOlsibWFuYWdlLWFjY291bnQiLCJtYW5hZ2UtYWNjb3VudC1saW5rcyIsInZpZXctcHJvZmlsZSJdfX0sInNjb3BlIjoib3BlbmlkIHByb2ZpbGUgZW1haWwiLCJzaWQiOiIyODNkNzQyNi1kYTViLTQ1MTEtYjYzMS03ZjIzNjliMDYyZTQiLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwibmFtZSI6Ik5CSUEgR3Vlc3QiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJuYmlhX2d1ZXN0IiwiZ2l2ZW5fbmFtZSI6Ik5CSUEiLCJmYW1pbHlfbmFtZSI6Ikd1ZXN0IiwiZW1haWwiOiJuYmlhX2d1ZXN0QGNhbmNlcmltYWdpbmdhcmNoaXZlLm5ldCJ9.YIrmcZcDT9w22diON2bFxDVcY1-BU59FwcklsnYHBT0' -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesMetaData?SeriesInstanceUID=1.3.6.1.4.1.9590.100.1.2.374115997511889073021386151921807063992"
+
+    # https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesMetaData?SeriesInstanceUID=1.3.6.1.4.1.9590.100.1.2.374115997511889073021386151921807063992
+
+    # https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesSize?SeriesInstanceUID=1.3.6.1.4.1.9590.100.1.2.374115997511889073021386151921807063992
+
+    # curl -H "Authorization:Bearer YOUR_ACCESS_TOKEN" -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeries"
+    # curl -H "Authorization:Bearer YOUR_ACCESS_TOKEN" -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesMetaData"
+    # curl -H "Authorization:Bearer YOUR_ACCESS_TOKEN" -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesSize"
+
+    # curl -H "Authorization:Bearer YOUR_ACCESS_TOKEN" -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getUpdatedSeries"
 
-
     # Helper functions
     def __str__(self):
         return self.value