Merge pull request #13 from jjjermiah/development

refactor dicomsorter class and add some tests
jjjermiah · Nov 25, 2023 · 7e51e20 · 7e51e20
2 parents 931eb4a + b5cf783
commit 7e51e20
Show file tree

Hide file tree

Showing 4 changed files with 164 additions and 2 deletions.
diff --git a/src/nbiatoolkit/dicomsort/helper_functions.py b/src/nbiatoolkit/dicomsort/helper_functions.py
@@ -0,0 +1,59 @@
+import re
+from typing import Tuple, List
+def parseDICOMKeysFromFormat(targetPattern: str) -> Tuple[str, List[str]]:
+    """
+    Parse the target pattern to create a format string with named placeholders
+    and extract a list of DICOM keys.
+
+    The target pattern is a string with placeholders matching '%<DICOMKey>'.
+    This method converts placeholders into a format string with named placeholders
+    and creates a list of DICOM keys contained within the placeholders.
+
+    Returns:
+        Tuple[str, List[str]]: A tuple containing the format string and a list of DICOM keys.
+
+    Example usage:
+        fmt, keys = parseDICOMKeysFromFormat(targetPattern)
+        print(fmt)  # "%(PatientID)s-%(StudyDate)s"
+        print(keys)  # ['PatientID', 'StudyDate']
+    """
+
+    # Compile the regex pattern for efficiency
+    dicom_key_pattern = re.compile(r'%([A-Za-z]+)')
+    keys = dicom_key_pattern.findall(targetPattern)
+    # Use the same compiled pattern for replacing
+    formatted_pattern = dicom_key_pattern.sub(r'%(\1)s', targetPattern)
+
+    return formatted_pattern, keys
+
+def sanitizeFileName(fileName: str) -> str:
+    """
+    Sanitize the file name by replacing potentially dangerous characters.
+    """
+    assert fileName is not None
+    assert isinstance(fileName, str)
+    # Define a pattern for disallowed filename characters and their replacements
+    disallowed_characters_pattern = re.compile(r'[<>:"/\\|?*\x00-\x1f]')
+    # Replace disallowed characters with an underscore
+    sanitized_name = disallowed_characters_pattern.sub('_', fileName)
+
+    # replace spaces with underscores
+    sanitized_name = sanitized_name.replace(" ", "_")
+
+    # Remove subsequent underscores
+    sanitized_name = re.sub(r'(_{2,})', '_', sanitized_name)
+
+    return sanitized_name
+
+
+def truncateUID(uid: str, lastDigits: int = 5) -> str:
+    """
+    Truncate the UID to the last n characters (includes periods & underscores).
+    If the UID is shorter than n characters, the entire UID is returned.
+    """
+    assert uid is not None
+    assert isinstance(uid, str)
+    assert isinstance(lastDigits, int)
+    # Truncate the UID to the last n digits
+    truncated_uid = uid[-lastDigits:]
+    return truncated_uid
diff --git a/src/nbiatoolkit/dicomsort/test_helper_functions.py b/src/nbiatoolkit/dicomsort/test_helper_functions.py
diff --git a/tests/test_dicom_helpers.py b/tests/test_dicom_helpers.py
@@ -0,0 +1,103 @@
+from nbiatoolkit.dicomsort.helper_functions import parseDICOMKeysFromFormat,sanitizeFileName,truncateUID
+import pytest
+###############################################################################
+# parseDICOMKeysFromFormat
+
+def test_parseDICOMKeysFromFormat():
+    targetPattern = "%PatientID-%StudyDate"
+    expected_format = "%(PatientID)s-%(StudyDate)s"
+    expected_keys = ['PatientID', 'StudyDate']
+
+    format_string, keys = parseDICOMKeysFromFormat(targetPattern)
+
+    assert format_string == expected_format
+    assert keys == expected_keys
+
+def test_parseDICOMKeysFromFormat_no_keys():
+    targetPattern = "some string without keys"
+    expected_format = "some string without keys"
+    expected_keys = []
+
+    format_string, keys = parseDICOMKeysFromFormat(targetPattern)
+
+    assert format_string == expected_format
+    assert keys == expected_keys
+
+def test_parseDICOMKeysFromFormat_multiple_keys():
+    targetPattern = "%PatientID-%StudyDate-%SeriesNumber"
+    expected_format = "%(PatientID)s-%(StudyDate)s-%(SeriesNumber)s"
+    expected_keys = ['PatientID', 'StudyDate', 'SeriesNumber']
+
+    format_string, keys = parseDICOMKeysFromFormat(targetPattern)
+
+    assert format_string == expected_format
+    assert keys == expected_keys
+
+def test_parseDICOMKeysFromFormat_duplicate_keys():
+    targetPattern = "%PatientID-%StudyDate-%PatientID"
+    expected_format = "%(PatientID)s-%(StudyDate)s-%(PatientID)s"
+    expected_keys = ['PatientID', 'StudyDate', 'PatientID']
+
+    format_string, keys = parseDICOMKeysFromFormat(targetPattern)
+
+    assert format_string == expected_format
+    assert keys == expected_keys
+
+###############################################################################
+# sanitizeFileName
+
+def test_sanitizeFileName_no_special_characters():
+    fileName = "test_file_name"
+    sanitized_name = sanitizeFileName(fileName)
+    assert sanitized_name == fileName
+
+def test_sanitizeFileName_with_special_characters():
+    fileName = "file<name>:with?special*characters"
+    sanitized_name = sanitizeFileName(fileName)
+    assert sanitized_name == "file_name_with_special_characters"
+
+def test_sanitizeFileName_with_spaces():
+    fileName = "file name with spaces"
+    sanitized_name = sanitizeFileName(fileName)
+    assert sanitized_name == "file_name_with_spaces"
+
+def test_sanitizeFileName_empty_string():
+    fileName = ""
+    sanitized_name = sanitizeFileName(fileName)
+    assert sanitized_name == ""
+
+def test_sanitizeFileName_assertions():
+    with pytest.raises(AssertionError):
+        sanitizeFileName(None)
+    with pytest.raises(AssertionError):
+        sanitizeFileName(123)
+
+###############################################################################
+# truncateUID
+
+@pytest.fixture(scope="session")
+def uid():
+    uid = "1.3.6.1.4.1.14519.5.2.1.215314536760363548451614931725770729635"
+    return uid
+
+
+def test_truncateUID_with_valid_inputs(uid):
+    lastDigits = 5
+    expected_output = "29635"
+    assert truncateUID(uid, lastDigits) == expected_output
+
+def test_truncateUID_with_lastDigits_greater_than_length_of_UID(uid):
+    lastDigits = 100
+    expected_output = uid
+    assert truncateUID(uid, lastDigits) == expected_output
+
+def test_truncateUID_with_invalid_input_types(uid):
+    lastDigits = "5"
+    with pytest.raises(AssertionError):
+        truncateUID(uid, lastDigits)
+
+def test_truncateUID_with_None_input(uid):
+    lastDigits = None
+    with pytest.raises(AssertionError):
+        truncateUID(uid, lastDigits)
+
diff --git a/tests/test_nbia.py b/tests/test_nbia.py
@@ -65,7 +65,7 @@ def test_getPatients(nbia_patients):
     assert isinstance(nbia_patients[0], str)
     assert len(nbia_patients[0]) > 0
 
-@pytest.mark.getSeries
+
 def test_getSeries(nbia_client, nbia_collections, nbia_patients):
     seriesList = nbia_client.getSeries(
         Collection=nbia_collections[0],
@@ -77,7 +77,7 @@ def test_getSeries(nbia_client, nbia_collections, nbia_patients):
     assert len(seriesList) > 0
     assert isinstance(seriesList[0], dict)
 
-@pytest.mark.getSeries
+
 def test_fail_getSeries(nbia_client, nbia_collections, nbia_patients):
     with pytest.raises(Exception):
         seriesList = nbia_client.getSeries(