Skip to content

Commit

Permalink
Merge pull request #129 from jjjermiah/development
Browse files Browse the repository at this point in the history
feat: adding manipulation of tags
  • Loading branch information
jjjermiah committed Apr 1, 2024
2 parents 5944ed0 + ebbfc85 commit a826122
Show file tree
Hide file tree
Showing 5 changed files with 225 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
name: coverage-report
path: coverage-report

Codecov:
Code-Coverage:
needs: Unit-Tests
runs-on: ubuntu-latest
steps:
Expand Down
24 changes: 24 additions & 0 deletions src/nbiatoolkit/dicomtags/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from .tags import (
convert_element_to_int,
convert_int_to_element,
LOOKUP_TAG,
element_VR_lookup,
getSeriesModality,
)

from .tags import (
subsetSeriesTags,
getReferencedFrameOfReferenceSequence,
getReferencedSeriesUIDS,
)

__all__ = [
"convert_element_to_int",
"convert_int_to_element",
"LOOKUP_TAG",
"element_VR_lookup",
"getSeriesModality",
"subsetSeriesTags",
"getReferencedFrameOfReferenceSequence",
"getReferencedSeriesUIDS",
]
144 changes: 141 additions & 3 deletions src/nbiatoolkit/dicomtags/tags.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from math import log
from pydicom.datadict import dictionary_VR
from pydicom.datadict import tag_for_keyword
from pydicom._dicom_dict import DicomDictionary
import pandas as pd
from typing import Any, Union, List
from typing import List


def convert_element_to_int(element_str: str) -> int:
Expand Down Expand Up @@ -154,6 +154,17 @@ def getSeriesModality(series_tags_df: pd.DataFrame) -> str:
def subsetSeriesTags(series_tags_df: pd.DataFrame, element: str) -> pd.DataFrame:
"""
Subsets a DataFrame containing DICOM series tags based on the start and end elements.
Args:
series_tags_df (pd.DataFrame): A DataFrame containing DICOM series tags.
element (str): The element to subset the DataFrame.
Returns:
pd.DataFrame: A DataFrame containing the subset of the series tags.
Raises:
ValueError: If the element is not found in the series tags.
ValueError: If more than two elements are found in the series tags.
"""

locs: pd.DataFrame
Expand All @@ -162,13 +173,31 @@ def subsetSeriesTags(series_tags_df: pd.DataFrame, element: str) -> pd.DataFrame
if len(locs) == 0:
raise ValueError("Element not found in the series tags.")

if len(locs) == 1:
raise ValueError(
"Only one element found in the series tags. Ensure element is a sequence"
)

if len(locs) > 2:
raise ValueError("More than two elements found in the series tags.")

return series_tags_df.iloc[locs.index[0] : locs.index[1]]
return series_tags_df.iloc[locs.index[0] : locs.index[1] + 1]


def getReferencedFrameOfReferenceSequence(series_tags_df: pd.DataFrame) -> pd.DataFrame:
"""
Given a DataFrame containing DICOM series tags, retrieves the ReferencedFrameOfReferenceSequence.
Args:
series_tags_df (pd.DataFrame): A DataFrame containing DICOM series tags.
Returns:
pd.DataFrame: A DataFrame containing the ReferencedFrameOfReferenceSequence.
Raises:
ValueError: If the series is not an RTSTRUCT.
"""
modality = getSeriesModality(series_tags_df=series_tags_df)
if modality != "RTSTRUCT":
raise ValueError("Series is not an RTSTRUCT.")
Expand Down Expand Up @@ -220,3 +249,112 @@ def getReferencedSeriesUIDS(series_tags_df: pd.DataFrame) -> List[str]:
UIDS: list[str] = value["data"].to_list()

return UIDS


def getSequenceElement(
sequence_tags_df: pd.DataFrame, element_keyword: str
) -> pd.DataFrame:
"""
Given a DataFrame containing DICOM sequence tags, retrieves the search space
based on the element keyword.
Args:
sequence_tags_df (pd.DataFrame): A DataFrame containing DICOM sequence tags.
element_keyword (str): The keyword of the element to search for.
Returns:
pd.DataFrame: A DataFrame containing the search space based on the element keyword.
Raises:
ValueError: If the element is not found in the sequence tags.
ValueError: If more than two elements are found in the sequence tags.
"""
tag: int = LOOKUP_TAG(keyword=element_keyword)
element: str = convert_int_to_element(combined_int=tag)

df: pd.DataFrame = subsetSeriesTags(
series_tags_df=sequence_tags_df, element=element
)

return df


def camel_case_tag(string: str) -> str:
"""
Convert a string to camel case.
Args:
string (str): The input string to be converted.
Returns:
str: The camel case string.
Example:
>>> camel_case_tag("hello world")
'HelloWorld'
Note:
This function does not actually convert to camel case to not modify
the tags from the DICOM dictionary.
"""
return "".join(word for word in string.split())


def extract_ROI_info(StructureSetROISequence) -> dict[str, dict[str, str]]:
"""
Extracts ROI information from the StructureSetROISequence.
Args:
StructureSetROISequence (pandas.DataFrame): A pandas DataFrame representing the StructureSetROISequence.
Returns:
dict[str, dict[str, str]]: A dictionary containing ROI information, where the key is the ROI number and the value is the ROI information.
Raises:
ValueError: If ROI Number is not found in the StructureSetROISequence.
"""

# Initialize an empty dictionary to store ROI information
ROISet: dict[str, dict[str, str]] = {}

# get the rows where name = " ROI Number"
ROI_indices = StructureSetROISequence[
StructureSetROISequence["name"] == "ROI Number"
].index

if ROI_indices.empty:
raise ValueError("ROI Number not found in the StructureSetROISequence.")

# Iterate between the indices of the ROI numbers, to extract the ROI information
# add to the dictionary where the key is the ROI number and the value is the ROI information
for i in range(len(ROI_indices) - 1):
ROI_number: str = StructureSetROISequence.loc[ROI_indices[i], "data"]

ROI_info: pd.DataFrame = StructureSetROISequence.loc[
ROI_indices[i] + 1 : ROI_indices[i + 1] - 1
]

ROISet[ROI_number] = {
camel_case_tag(string=row["name"]): row["data"]
for _, row in ROI_info.iterrows()
}

return ROISet


# def getRTSTRUCT_ROI_info(seriesUID: str) -> dict[str, dict[str, str]]:
# """
# Given a SeriesInstanceUID of an RTSTRUCT, retrieves the ROI information.

# Args:
# seriesUID (str): The SeriesInstanceUID of the RTSTRUCT.

# Returns:
# dict[str, dict[str, str]]: A dictionary containing the ROI information.
# """

# RTSTRUCT_Tags = client.getDICOMTags(seriesUID)

# StructureSetROISequence = getSequenceElement(sequence_tags_df=RTSTRUCT_Tags, element_keyword="StructureSetROISequence")

# return extract_ROI_info(StructureSetROISequence)
22 changes: 22 additions & 0 deletions src/nbiatoolkit/nbia.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@
ReturnType,
conv_response_list,
)

from .dicomtags.tags import (
getReferencedSeriesUIDS,
extract_ROI_info,
getSequenceElement,
)

import pandas as pd
import requests
from requests.exceptions import JSONDecodeError as JSONDecodeError
Expand Down Expand Up @@ -615,6 +622,21 @@ def getDICOMTags(

return conv_response_list(response, returnType)

def getRefSeriesUIDs(
self,
SeriesInstanceUID: str,
) -> List[str]:

tags_df = self.getDICOMTags(
SeriesInstanceUID=SeriesInstanceUID,
return_type=ReturnType.DATAFRAME,
)

if type(tags_df) != pd.DataFrame:
raise ValueError("DICOM Tags not df or not found in the response.")

return getReferencedSeriesUIDS(series_tags_df=tags_df)

def downloadSeries(
self,
SeriesInstanceUID: Union[str, list],
Expand Down
37 changes: 37 additions & 0 deletions tests/test_tags.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pandas import DataFrame
import pytest
from src.nbiatoolkit import NBIAClient
from src.nbiatoolkit.dicomtags.tags import convert_int_to_element
Expand Down Expand Up @@ -154,3 +155,39 @@ def test_getSeriesModality(RTSTRUCT_Tags):
def test_failsubsetSeriesTags(RTSTRUCT_Series):
with pytest.raises(KeyError) as e:
subsetSeriesTags(RTSTRUCT_Series, "(0008,0060)")


def test_extract_ROI_info(RTSTRUCT_Tags):
# tests both getSequenceElement and extract_ROI_info

StructureSetROISequence: DataFrame = getSequenceElement(
sequence_tags_df=RTSTRUCT_Tags, element_keyword="StructureSetROISequence"
)

# make sure that the StructureSetROISequence is not empty
assert (
not StructureSetROISequence.empty
), "Expected StructureSetROISequence to not be empty, but got empty"

ROI_info: dict[str, dict[str, str]] = extract_ROI_info(StructureSetROISequence)

assert ROI_info is not None, "Expected ROI_info to not be None, but got None"

# ROI_info should have atleast 29 keys all of which are strings of ints from 1 to 28
assert len(ROI_info) >= 26, f"Expected atleast 26 keys, but got {len(ROI_info)}"
keys = [int(key) for key in ROI_info.keys()]

# assert all keys are between 1 and 29
assert all(
[1 <= key <= 29 for key in keys]
), "Expected all keys to be between 1 and 28"

print("All test cases passed!")


def test_getReferencedSeriesUIDS(client, RTSTRUCT_Series):
result = client.getRefSeriesUIDs(RTSTRUCT_Series["SeriesInstanceUID"].values[0])

expected = ["1.3.6.1.4.1.14519.5.2.1.133742245714270925254982946723351496764"]

assert result == expected, f"Expected {expected}, but got {result}"

0 comments on commit a826122

Please sign in to comment.