Skip to content

Commit

Permalink
feat: retrieve new series after a given date, +docs +tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jjjermiah committed Feb 4, 2024
1 parent 67fb6e4 commit c6a506d
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 12 deletions.
51 changes: 49 additions & 2 deletions docs/Tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -44,7 +44,7 @@
{
"data": {
"text/plain": [
"'0.18.1'"
"'0.22.1'"
]
},
"execution_count": 2,
Expand Down Expand Up @@ -668,6 +668,53 @@
"pprint(seriesbyPatientJSON[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### get New Series after a given date\n",
"\n",
"``` python\n",
"getNewSeries(\n",
" Date: Union[str, datetime], # (required) accepted formats:\n",
" # \"%Y-%m-%d\", \"%Y/%m/%d\", \"%Y%m%d\", \n",
" # \"%m/%d/%Y\", \"%d/%m/%Y\", \"%d-%m-%Y\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"01/01/2024\n",
"Total new series after 2024-01-01: 4751\n",
"{'BodyPartExamined': 'BREAST',\n",
" 'Collection': 'Advanced-MRI-Breast-Lesions',\n",
" 'ImageCount': 580,\n",
" 'Manufacturer': 'GE MEDICAL SYSTEMS',\n",
" 'ManufacturerModelName': 'Signa HDxt',\n",
" 'Modality': 'MR',\n",
" 'PatientID': 'AMBL-376',\n",
" 'SeriesDate': '2005-04-14 00:00:00.0',\n",
" 'SeriesDescription': 'AX Sen Vibrant MultiPhase',\n",
" 'SeriesInstanceUID': '1.3.6.1.4.1.14519.5.2.1.196168555404542578475976858220037429361',\n",
" 'SeriesNumber': 5,\n",
" 'SoftwareVersions': '24',\n",
" 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.201692485188138093977458202425301357349'}\n"
]
}
],
"source": [
"newSeries = client.getNewSeries(Date=\"2024/01/01\")\n",
"print(f\"Total new series after 2024-01-01: {len(newSeries)}\")\n",
"pprint(newSeries[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
28 changes: 26 additions & 2 deletions src/nbiatoolkit/nbia.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from tqdm import tqdm
from pyfiglet import Figlet
import os

from datetime import datetime
# set __version__ variable
__version__ = "0.22.1"

Expand Down Expand Up @@ -51,12 +51,14 @@ def __init__(
def headers(self):
return self._api_headers


def query_api(
self, endpoint: NBIA_ENDPOINTS, params: dict = {}
) -> Union[list, dict, bytes]:
query_url = NBIA_ENDPOINTS.BASE_URL.value + endpoint.value

self.log.debug("Querying API endpoint: %s", query_url)
self.log.debug("Query parameters: %s", params)
response: requests.Response
try:
response = requests.get(url=query_url, headers=self.headers, params=params)
Expand Down Expand Up @@ -172,7 +174,10 @@ def getPatients(self, Collection: str = "") -> Union[list[dict[str, str]], None]

return patientList

def getNewPatients(self, Collection: str, Date: str) -> Union[list[dict[str, str]], None]:
def getNewPatients(self,
Collection: str,
Date: Union[str, datetime],
) -> Union[list[dict[str, str]], None]:
assert Collection is not None
assert Date is not None

Expand Down Expand Up @@ -298,6 +303,25 @@ def getSeries(

return response

def getNewSeries(
self,
Date: Union[str, datetime],
) -> Union[list[dict], None]:
assert Date is not None and isinstance(Date, (str, datetime)), \
"Date must be a string or datetime object"

# for some reason this endpoint requires the date in %d/%m/%Y format
fromDate = convertDateFormat(input_date=Date, format="%d/%m/%Y")
PARAMS = self.parsePARAMS({"fromDate": fromDate})

response = self.query_api(endpoint=NBIA_ENDPOINTS.GET_UPDATED_SERIES, params=PARAMS)

if not isinstance(response, list):
self.log.error("Expected list, but received: %s", type(response))
return None

return response

def downloadSeries(
self,
SeriesInstanceUID: Union[str, list],
Expand Down
5 changes: 1 addition & 4 deletions src/nbiatoolkit/utils/nbia_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,12 @@ class NBIA_ENDPOINTS(Enum):
GET_STUDIES = "v2/getPatientStudy"

GET_SERIES = "v2/getSeries"
GET_UPDATED_SERIES = "v2/getUpdatedSeries" # ?fromDate=01/01/2024
DOWNLOAD_SERIES = "v2/getImageWithMD5Hash"

GET_UPDATED_SERIES = "v2/getUpdatedSeries" # ?fromDate=01/01/2024

# https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesMetaData?SeriesInstanceUID=1.3.6.1.4.1.9590.100.1.2.374115997511889073021386151921807063992

# https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesSize?SeriesInstanceUID=1.3.6.1.4.1.9590.100.1.2.374115997511889073021386151921807063992

# curl -H "Authorization:Bearer YOUR_ACCESS_TOKEN" -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeries"
# curl -H "Authorization:Bearer YOUR_ACCESS_TOKEN" -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesMetaData"
# curl -H "Authorization:Bearer YOUR_ACCESS_TOKEN" -k "https://services.cancerimagingarchive.net/nbia-api/services/v2/getSeriesSize"

Expand Down
10 changes: 8 additions & 2 deletions src/nbiatoolkit/utils/parsers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from bs4 import BeautifulSoup
from datetime import datetime

from typing import Union

def clean_html(html_string: str) -> str:
"""
Expand Down Expand Up @@ -39,7 +39,10 @@ def convertMillis(millis: int) -> str:
return datetime.fromtimestamp(millis / 1000.0).strftime('%Y-%m-%d')


def convertDateFormat(input_date: str, format: str = "%Y/%m/%d") -> str:
def convertDateFormat(
input_date: Union[str, datetime],
format: str = "%Y/%m/%d"
) -> str:
"""
Converts the input date to the desired format.
Expand All @@ -56,6 +59,9 @@ def convertDateFormat(input_date: str, format: str = "%Y/%m/%d") -> str:
possible_formats = [
"%Y-%m-%d", "%Y/%m/%d", "%Y%m%d", "%m/%d/%Y", "%d/%m/%Y", "%d-%m-%Y"
]
if isinstance(input_date, datetime):
return input_date.strftime(format)

# Try parsing the input date with each possible format
for date_format in possible_formats:
try:
Expand Down
11 changes: 9 additions & 2 deletions tests/test_nbia.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def test_getPatients(nbia_patients):
assert "PatientSex" in nbia_patients[0]

def test_getNewPatients(nbia_client):
patients = nbia_client.getNewPatients('TCGA-BLCA', Date = "2019-01-01")
patients = nbia_client.getNewPatients('CMB-LCA', Date = "2022/12/06")
assert isinstance(patients, list)
assert len(patients) > 0
assert isinstance(patients[0], dict)
Expand Down Expand Up @@ -144,7 +144,14 @@ def test_fail_getSeries(nbia_client, nbia_collections, nbia_patients):
assert isinstance(seriesList, list)
assert len(seriesList) > 0
assert isinstance(seriesList[0], dict)


def test_getNewSeries(nbia_client):
Date = "01/01/2024"
series = nbia_client.getNewSeries(Date)
assert isinstance(series, list) or series is None
if series is not None:
assert all(isinstance(s, dict) for s in series)

def test_downloadSeries(nbia_client, nbia_collections, nbia_patients):
seriesList = nbia_client.getSeries(
Collection=nbia_collections[0],
Expand Down

0 comments on commit c6a506d

Please sign in to comment.