Skip to content

Commit

Permalink
add todos and rearrange md5 function
Browse files Browse the repository at this point in the history
  • Loading branch information
Jermiah committed Nov 21, 2023
1 parent 4aac4e4 commit 2c70522
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 42 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@ See Documentation at [NBIA-Toolkit Read The Docs](https://nbia-toolkit.readthedo
TODO::nbia.py::implement better error handling
TODO::nbia.py::implement better logging & logger configuration
TODO::nbia.py::enforce type checking for all functions and add type hints
TODO::nbia.py::implement return formats for dict, and pandas.DataFrame
TODO::nbia.py::implement return formats for dict, and pandas.DataFrames
TODO::nbia.py::handle error case of if resposne is not bytes
TODO::nbia.py::add tests for download Series
TODO::nbia.py::add functionality for downloadSeries to take in a list of seriesUIDs
TODO::md5.py::add tests
TODO::md5.py::add logging and error handling for non-existent files
TODO::dicomsort.py::come up with more efficient algorithm for sorting
TODO::dicomsort.py::implement better error handling
TODO::dicomsort.py::come up with solution to only use part of UIDs (last 5 digits)?
Expand Down
47 changes: 6 additions & 41 deletions src/nbiatoolkit/nbia.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from nbiatoolkit.auth import OAuth2
from nbiatoolkit.utils.nbia_endpoints import NBIA_ENDPOINTS
from nbiatoolkit.utils.logger import setup_logger
from nbiatoolkit.utils.md5 import validateMD5
import requests
from requests.exceptions import JSONDecodeError as JSONDecodeError
import io, zipfile, os
import hashlib


class NBIAClient:
"""
Expand Down Expand Up @@ -137,17 +138,13 @@ def getSeries(self,

return response

def downloadSeries(self,
SeriesInstanceUID: str,
downloadDir: str,
def downloadSeries(
self, SeriesInstanceUID: str, downloadDir: str,
) -> bool:



params = dict()
params["SeriesInstanceUID"] = SeriesInstanceUID


response = self.query_api(
endpoint = NBIA_ENDPOINTS.DOWNLOAD_SERIES,
params = params)
Expand All @@ -157,7 +154,7 @@ def downloadSeries(self,
seriesDir = os.path.join(downloadDir, SeriesInstanceUID)
file.extractall(path=seriesDir)

self.validateMD5(seriesDir=seriesDir)
validateMD5(seriesDir=seriesDir)
else:
# Handle the case where the expected binary data is not received
# Log error or raise an exception
Expand All @@ -166,38 +163,6 @@ def downloadSeries(self,
return True


def _calculateMD5(self,
filepath: str
) -> str:


hash_md5 = hashlib.md5()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()

def validateMD5(self,
seriesDir: str
) -> bool:

md5File = os.path.join(seriesDir, "md5hashes.csv")
assert os.path.isfile(md5File), "MD5 hash file not found in download directory."

with open(md5File, "r") as f:
lines = f.readlines()

for line in lines[1:]:
filepath = os.path.join(seriesDir, line.split(",")[0])
if not os.path.isfile(filepath):
print(f"File not found in seriesDir: {filepath}")
return False

md5hash = line.split(",")[1].strip().lower()
md5 = self._calculateMD5(filepath)

assert md5 == md5hash, f"MD5 hash mismatch for file: {filepath}"
# delete the md5 file if all hashes match
os.remove(md5File)
return True


30 changes: 30 additions & 0 deletions src/nbiatoolkit/utils/md5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import hashlib, os

def calculateMD5(filepath: str) -> str:
hash_md5 = hashlib.md5()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()

def validateMD5(seriesDir: str) -> bool:

md5File = os.path.join(seriesDir, "md5hashes.csv")
assert os.path.isfile(md5File), "MD5 hash file not found in download directory."

with open(md5File, "r") as f:
lines = f.readlines()

for line in lines[1:]:
filepath = os.path.join(seriesDir, line.split(",")[0])
if not os.path.isfile(filepath):
print(f"File not found in seriesDir: {filepath}")
return False

md5hash = line.split(",")[1].strip().lower()
md5 = calculateMD5(filepath)

assert md5 == md5hash, f"MD5 hash mismatch for file: {filepath}"
# delete the md5 file if all hashes match
os.remove(md5File)
return True

0 comments on commit 2c70522

Please sign in to comment.