Skip to content

Commit

Permalink
Merge pull request #75 from jjjermiah/46-feature-get_collection_descr…
Browse files Browse the repository at this point in the history
…iptions

feat: add getCollectionDescriptions method, with tests, and updated d…
  • Loading branch information
jjjermiah committed Feb 3, 2024
2 parents 0edfd89 + 8617395 commit 89133ac
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 10 deletions.
54 changes: 48 additions & 6 deletions docs/Tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -38,16 +38,16 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'0.15.1'"
"'0.18.1'"
]
},
"execution_count": 23,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -74,7 +74,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -94,7 +94,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -110,6 +110,48 @@
"print(collections)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### get Collection Description\n",
"\n",
"``` python\n",
"getCollectionDescriptions(\n",
" collectionName: str # (required)\n",
")\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'collectionName': 'TCGA-BLCA',\n",
" 'description': 'The Cancer Genome Atlas-Bladder Endothelial Carcinoma '\n",
" '(TCGA-BLCA) data collection is part of a larger effort to '\n",
" 'enhance the TCGA http://cancergenome.nih.gov/ data set with '\n",
" 'characterized radiological images. The Cancer Imaging '\n",
" 'Program (CIP), with the cooperation of several of the TCGA '\n",
" 'tissue-contributing institutions, has archived a large '\n",
" 'portion of the radiological images of the '\n",
" 'genetically-analyzed BLCA cases. Please see the TCGA-BLCA '\n",
" 'page to learn more about the images and to obtain any '\n",
" 'supporting metadata for this collection.',\n",
" 'descriptionURI': 'https://doi.org/10.7937/K9/TCIA.2016.8LNG8XDR',\n",
" 'lastUpdated': '2023-03-16'}]\n"
]
}
],
"source": [
"pprint(client.getCollectionDescriptions(\"TCGA-BLCA\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
24 changes: 22 additions & 2 deletions src/nbiatoolkit/nbia.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from .auth import OAuth2
from .utils.nbia_endpoints import NBIA_ENDPOINTS
from .logger.logger import setup_logger
from .utils.md5 import validateMD5
from .utils import NBIA_ENDPOINTS, validateMD5, clean_html, convertMillis
from .dicomsort import DICOMSorter

import requests
Expand Down Expand Up @@ -108,6 +107,27 @@ def getCollections(self, prefix: str = "") -> Union[list[str], None]:
collections.append(name)
return collections

def getCollectionDescriptions(self, collectionName : str) -> Union[list[dict[str, str]], None]:
PARAMS = self.parsePARAMS(locals())

response = self.query_api(NBIA_ENDPOINTS.GET_COLLECTION_DESCRIPTIONS, PARAMS)

if len(response) == 0:
raise ValueError("The response from the API is empty. Please check the collection name.")

api_response = response[0]
if not isinstance(api_response, dict):
raise ValueError("The response from the API is not a dictionary")

returnVal : dict[str, str] = {
"collectionName" : api_response['collectionName'],
"description" : clean_html(api_response['description']),
"descriptionURI" : api_response['descriptionURI'],
"lastUpdated" : convertMillis(millis=int(api_response['collectionDescTimestamp'])),
}

return [returnVal]

def getModalityValues(
self, Collection: str = "", BodyPartExamined: str = ""
) -> Union[list[str], None]:
Expand Down
4 changes: 2 additions & 2 deletions src/nbiatoolkit/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .nbia_endpoints import NBIA_ENDPOINTS
from .md5 import validateMD5

__all__ = ["NBIA_ENDPOINTS", "validateMD5"]
from .parsers import convertMillis, clean_html
__all__ = ["NBIA_ENDPOINTS", "validateMD5", "convertMillis", "clean_html"]
39 changes: 39 additions & 0 deletions src/nbiatoolkit/utils/parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from bs4 import BeautifulSoup
from datetime import datetime


def clean_html(html_string: str) -> str:
"""
Cleans the given HTML string by removing HTML tags and replacing special characters.
Args:
html_string (str): The input HTML string to be cleaned.
Returns:
str: The cleaned text content without HTML tags and special characters.
"""
assert isinstance(html_string, str), "The input must be a string"
assert html_string != "", "The input string cannot be empty"
soup = BeautifulSoup(html_string, 'html.parser')
text_content = soup.get_text(separator=' ', strip=True)
text_content = text_content.replace('\xa0', ' ')
return text_content


from datetime import datetime

def convertMillis(millis: int) -> str:
"""
Convert milliseconds to a formatted date string.
Args:
millis (int): The number of milliseconds to convert.
Returns:
str: The formatted date string in the format 'YYYY-MM-DD'.
Raises:
AssertionError: If the input is not an integer.
"""
assert isinstance(millis, int), "The input must be an integer"
return datetime.fromtimestamp(millis / 1000.0).strftime('%Y-%m-%d')
16 changes: 16 additions & 0 deletions tests/test_nbia.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,19 @@ def test_downloadSeries(nbia_client, nbia_collections, nbia_patients):
assert file.endswith(".dcm")
assert file[:-4].isdigit()

def test_getCollectionDescriptions(nbia_client):
collectionName = "4D-Lung"
descriptions = nbia_client.getCollectionDescriptions(collectionName)
assert isinstance(descriptions, list)
assert len(descriptions) == 1
assert isinstance(descriptions[0], dict)
assert "collectionName" in descriptions[0]
assert descriptions[0]["collectionName"] == collectionName
assert "description" in descriptions[0]
assert "descriptionURI" in descriptions[0]
assert "lastUpdated" in descriptions[0]

def test_failed_getCollectionDescriptions(nbia_client):
collectionName = "bad_collection"
with pytest.raises(ValueError):
nbia_client.getCollectionDescriptions(collectionName)
46 changes: 46 additions & 0 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@

from numpy import exp
from src.nbiatoolkit.utils.parsers import clean_html, convertMillis
from datetime import datetime
import pytest
def test_clean_html_valid_input():
# Test case for valid input with HTML tags and special characters
html_string = "<p>This is <b>bold</b> text with special characters: &amp; &lt; &gt;</p>"
expected_output = "This is bold text with special characters: & < >"
assert clean_html(html_string) == expected_output

def test_clean_html_empty_input():
# Test case for empty input string
html_string = ""
expected_output = ""
with pytest.raises(AssertionError) as e:
clean_html(html_string)

def test_clean_html_no_html_tags():
# Test case for input string without any HTML tags
html_string = "This is a plain text without any HTML tags"
expected_output = "This is a plain text without any HTML tags"
assert clean_html(html_string) == expected_output

def test_clean_html_special_characters_only():
# Test case for input string with only special characters
html_string = "&amp; &lt; &gt;"
expected_output = "& < >"
assert clean_html(html_string) == expected_output


def test_convertMillis_valid_input():
# Test case for valid input
target_date = datetime(2021, 9, 1)
millis = int(target_date.timestamp() * 1000)
expected_output = "2021-09-01"
assert convertMillis(millis) == expected_output

def test_convertMillis_invalid_input():
# Test case for invalid input
millis = "1630444800000" # Invalid input: string instead of integer
try:
convertMillis(millis) # type: ignore
assert False, "Expected AssertionError"
except AssertionError as e:
assert str(e) == "The input must be an integer"

0 comments on commit 89133ac

Please sign in to comment.