In [1]:
import os
import sys
import pathlib

In [2]:
testPaths = pathlib.Path(".").absolute()
str(testPaths.parent)

'c:\\Users\\Max\\Documents\\GitHub\\mds_python\\mds\\src\\fairscape_mds'

In [3]:
sys.path.append(str(testPaths.parent.parent))

In [168]:
from fairscape_mds.core.config import appConfig
from fairscape_mds.models.rocrate import ROCrateUploadRequest
from fairscape_mds.models.dataset import DatasetDistribution, DistributionTypeEnum
from fairscape_mds.models.user import UserWriteModel, checkPermissions
from fairscape_mds.crud.rocrate import userPath, setDatasetObjectKey, FairscapeROCrateRequest
from fairscape_mds.crud.fairscape_request import FairscapeRequest
from fairscape_mds.crud.fairscape_response import FairscapeResponse
import uuid

In [None]:
from fairscape_models.rocrate import ROCrateV1_2
from fairscape_models.dataset import Dataset

import botocore

ImportError: cannot import name 'DatasetDistribution' from 'fairscape_models.dataset' (c:\Users\Max\anaconda3\Lib\site-packages\fairscape_models\dataset.py)

In [5]:
appConfig.identifierCollection.find_one({})

{'_id': ObjectId('68bf6290cf50efdf310b8e12'),
 '@id': 'ark:59853/dataset-paclitaxel-manifest',
 '@type': 'https://w3id.org/EVI#Dataset',
 'metadata': {'@id': 'ark:59853/dataset-paclitaxel-manifest',
  'name': 'Paclitaxel Manifest',
  'metadataType': 'https://w3id.org/EVI#Dataset',
  'additionalType': 'Dataset',
  'author': ['Hansen JN'],
  'datePublished': '02/28/2025',
  'version': '0.1.0',
  'description': 'CSV file containing the metadata for downloads for all paclitaxel treated images',
  'keywords': ['if images', 'metadata'],
  'associatedPublication': None,
  'additionalDocumentation': None,
  'format': 'csv',
  'evi:Schema': None,
  'generatedBy': [],
  'derivedFrom': [],
  'usedByComputation': [],
  'contentUrl': 'http://localhost:8080/api/dataset/download/ark:59853/dataset-paclitaxel-manifest',
  '@type': 'Dataset',
  'size': 297997,
  'isPartOf': {'@id': 'ark:59853/rocrate-paclitaxel-if-data-release',
   '@type': ['https://w3id.org/EVI#Dataset', 'https://w3id.org/EVI#ROCrate'

In [6]:
userQuery = {
	"email": "test@example.org"
}

In [51]:
# get test user from database to 
userMetadata = appConfig.userCollection.find_one(
	userQuery, 
	projection={"_id": False}
	)

currentUser = UserWriteModel.model_validate(userMetadata)

In [52]:
# create a fairscape upload request
from pydantic import BaseModel, Field
from fairscape_mds.models.user import Permissions
from typing import Optional, Any, Dict
import datetime

class FairscapeUploadJob(BaseModel):
	guid: str = Field(alias="@id")
	presigned: bool 
	uploadPath: str
	fileSize: Optional[int] = Field(default=None)
	presignedUploadLink: Optional[Dict[str, Any]] = Field(default=None)
	permissions: Permissions
	timeStarted: Optional[datetime.datetime] = Field(default_factory=datetime.datetime.now)
	timeFinished: Optional[datetime.datetime] = Field(default=None)
	status: Optional[str] = Field(default=None)
	complete: bool 
	success: bool
	rocrateGUID: Optional[str] = Field(default=None)
	identifiersMinted: Optional[int] = Field(default=None)




In [53]:
def createUploadPresignedLink(s3_client, bucket_name, object_name, expiration=7200):

	presignedUploadLink = s3_client.generate_presigned_post(
			bucket_name, 
			object_name,
			Fields=None,
			ExpiresIn=expiration,
	)

	return presignedUploadLink
		


In [54]:
# upload request modification that gives presigned minio put url

class FairscapePresignedROCrateRequest(FairscapeRequest):

	def __init_(self, config):
		super().__init__(config)
		self.config = config

	def get(
			self,
			currentUser: UserWriteModel,
			jobGUID: str
	):
		jobMetadata = self.config.asyncCollection.find_one(
			{"@id": jobGUID},
			projection={"_id": False}
		)

		if not jobMetadata:
			return FairscapeResponse(
				success=False,
				statusCode=404,
				error={
					"message": f"upload job {jobGUID} not found",
					"error": "job not found",
					"@id": jobGUID 
				}
			)

		try:
			jobInstance = FairscapeUploadJob.model_validate(jobMetadata)
		except Exception as exc:
			return FairscapeResponse(
				success=False,
				statusCode=500,
				error= {
					"message": "error unmarshaling job metadata",
					"error": str(exc),
					"@id": jobGUID
				}
			)

		# check permissions on metadata
		if checkPermissions(jobInstance.permissions, currentUser):
			return FairscapeResponse(
				success=True,
				statusCode=200,
				model=jobInstance,
			)
		else:
			return FairscapeResponse(
				success=False,
				statusCode=401,
				error={
					"message": "user unauthorized to view upload request"
				}
			)


	def execute(
		self, 
		currentUser, 
		jobGUID
	):

		# if no minio object is available then return 
		processROCrate.apply_async(args=(uploadJob.guid,), )

		return FairscapeResponse(
			success=200
		)
		

	def upload(
			self,
			currentUser: UserWriteModel, 
			filename: str, 
			filesize: int
			):

		userEmailPath = userPath(currentUser.email)
		uploadPath = f"{appConfig.minioDefaultPath}/{userEmailPath}/rocrates/{filename}"

		try:
			presignedUploadLink = createUploadPresignedLink(
				appConfig.minioClient,
				appConfig.minioBucket,
				uploadPath
			)
		except Exception as exc:
			return FairscapeResponse(
				success=False,
				statusCode=500,
				error={
					"message": "unable to grant presigned upload link",
					"error": str(exc)
					}
			)

		uploadJobInstance = FairscapeUploadJob.model_validate({
			"@id": str(uuid.uuid4()),
			"presigned": True,
			"uploadPath": uploadPath,
			"fileSize": filesize,
			"presignedUploadLink": presignedUploadLink,
			"timeStarted": None,
			"timeFinished": None,
			"permissions": currentUser.getPermissions(),
			"status": "generate presigned link for upload",
			"complete": False,
			"success": False,	
			"rocrateGUID": None,
			"identifiersMinted": 0
		})


		insertResult = appConfig.asyncCollection.insert_one(
			uploadJobInstance.model_dump(mode='json', by_alias=True)
		)

		assert insertResult.inserted_id

		return FairscapeResponse(
			success=True,
			statusCode=200,
			model=uploadJobInstance
		)



In [55]:
createUploadPresignedLink(appConfig.minioClient, appConfig.minioBucket, object_name="rocrate.zip")

{'url': 'http://localhost:9000/default',
 'fields': {'key': 'rocrate.zip',
  'x-amz-algorithm': 'AWS4-HMAC-SHA256',
  'x-amz-credential': 'miniotestadmin/20250923/us-east-1/s3/aws4_request',
  'x-amz-date': '20250923T161418Z',
  'policy': 'eyJleHBpcmF0aW9uIjogIjIwMjUtMDktMjNUMTg6MTQ6MThaIiwgImNvbmRpdGlvbnMiOiBbeyJidWNrZXQiOiAiZGVmYXVsdCJ9LCB7ImtleSI6ICJyb2NyYXRlLnppcCJ9LCB7IngtYW16LWFsZ29yaXRobSI6ICJBV1M0LUhNQUMtU0hBMjU2In0sIHsieC1hbXotY3JlZGVudGlhbCI6ICJtaW5pb3Rlc3RhZG1pbi8yMDI1MDkyMy91cy1lYXN0LTEvczMvYXdzNF9yZXF1ZXN0In0sIHsieC1hbXotZGF0ZSI6ICIyMDI1MDkyM1QxNjE0MThaIn1dfQ==',
  'x-amz-signature': '0ee3781f6b3ae37670aa32b0c0bc8364bad5f88446212bec64389c9fc14361ec'}}

In [13]:
appConfig.minioClient.list_buckets()

{'ResponseMetadata': {'RequestId': '1867A3F7C504A19D',
  'HostId': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'accept-ranges': 'bytes',
   'content-length': '457',
   'content-type': 'application/xml',
   'server': 'MinIO',
   'strict-transport-security': 'max-age=31536000; includeSubDomains',
   'vary': 'Origin, Accept-Encoding',
   'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
   'x-amz-request-id': '1867A3F7C504A19D',
   'x-content-type-options': 'nosniff',
   'x-ratelimit-limit': '4424',
   'x-ratelimit-remaining': '4424',
   'x-xss-protection': '1; mode=block',
   'date': 'Mon, 22 Sep 2025 15:21:04 GMT'},
  'RetryAttempts': 0},
 'Buckets': [{'Name': 'default',
   'CreationDate': datetime.datetime(2025, 7, 21, 7, 3, 40, 883000, tzinfo=tzutc())},
  {'Name': 'fairscape',
   'CreationDate': datetime.datetime(2025, 7, 21, 7, 4, 42, 612000, tzinfo=tzutc())}],
 'Owner': {'DisplayName':

In [56]:
presignedROCrateRequestFactory = FairscapePresignedROCrateRequest(appConfig)

In [57]:
uploadPresignedResponse = presignedROCrateRequestFactory.upload(
	currentUser,
	"paclitaxel.zip",
	4497805667
)

In [58]:
uploadPresignedResponse.model.model_dump(by_alias=True)

{'@id': '2a6f0b5d-35a0-42ec-8d08-0613302e9fd9',
 'presigned': True,
 'uploadPath': 'default/test/rocrates/paclitaxel.zip',
 'fileSize': 4497805667,
 'presignedUploadLink': {'url': 'http://localhost:9000/default',
  'fields': {'key': 'default/test/rocrates/paclitaxel.zip',
   'x-amz-algorithm': 'AWS4-HMAC-SHA256',
   'x-amz-credential': 'miniotestadmin/20250923/us-east-1/s3/aws4_request',
   'x-amz-date': '20250923T161422Z',
   'policy': 'eyJleHBpcmF0aW9uIjogIjIwMjUtMDktMjNUMTg6MTQ6MjJaIiwgImNvbmRpdGlvbnMiOiBbeyJidWNrZXQiOiAiZGVmYXVsdCJ9LCB7ImtleSI6ICJkZWZhdWx0L3Rlc3Qvcm9jcmF0ZXMvcGFjbGl0YXhlbC56aXAifSwgeyJ4LWFtei1hbGdvcml0aG0iOiAiQVdTNC1ITUFDLVNIQTI1NiJ9LCB7IngtYW16LWNyZWRlbnRpYWwiOiAibWluaW90ZXN0YWRtaW4vMjAyNTA5MjMvdXMtZWFzdC0xL3MzL2F3czRfcmVxdWVzdCJ9LCB7IngtYW16LWRhdGUiOiAiMjAyNTA5MjNUMTYxNDIyWiJ9XX0=',
   'x-amz-signature': '96fd887ff95197988207eeac0557cb4c6f5996613aaec0577364f8d90403e5c1'}},
 'permissions': {'owner': 'test@example.org', 'group': None},
 'timeStarted': None,
 'timeF

In [59]:
uploadInstance = uploadPresignedResponse.model
uploadLink = uploadInstance.presignedUploadLink.get("url")
uploadFields = uploadInstance.presignedUploadLink.get("fields")
jobGUID = uploadInstance.guid

In [60]:
import httpx
import json
import boto3
import sys

In [61]:
uploadLink

'http://localhost:9000/default'

In [62]:
cratePath ="E:\\Work\\Data\\Uploads\\upload_06_29_2025\\paclitaxel.zip" 
with open(cratePath, "rb") as cratefile:
	rocrateFiles = {
		"file": ("paclitaxel.zip", cratefile, "application/zip")
	}


	# upload a test rocrate
	paclitaxelResponse = httpx.post(
		uploadLink,
		files=rocrateFiles,
		data=uploadFields,
		timeout=10000,
		verify=False
	)


In [21]:
paclitaxelResponse

<Response [204 No Content]>

In [22]:
dict(paclitaxelResponse.headers)

{'accept-ranges': 'bytes',
 'etag': '"572c85ccd18c1abacc21ade165eb6d40"',
 'location': 'http://localhost:9000/default/default/test/rocrates/paclitaxel.zip',
 'server': 'MinIO',
 'strict-transport-security': 'max-age=31536000; includeSubDomains',
 'vary': 'Origin, Accept-Encoding',
 'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
 'x-amz-request-id': '1867A3FBDAFEEC59',
 'x-content-type-options': 'nosniff',
 'x-ratelimit-limit': '4424',
 'x-ratelimit-remaining': '4424',
 'x-xss-protection': '1; mode=block',
 'date': 'Mon, 22 Sep 2025 15:21:37 GMT'}

In [None]:
jobGUID

In [64]:
# given the job id 
jobInstance = proccessROCrate(jobGUID)



In [None]:
jobInstance.uploadPath

'default/test/rocrates/paclitaxel.zip'

In [152]:
class ROCrateException(Exception):
	def __init__(self, jobGUID, message=None):
		if not message:
			self.message = f"ROCrateException: {jobGUID}"
		else:
			self.message = message
		
		self.jobGUID = jobGUID
		super().__init__(self.message)


In [None]:
class ROCrateExceptionJobNotFound(ROCrateException):
	def __init__(self, jobGUID, message=None):
		if not message:
			self.message = f"Upload Job Not Found: {jobGUID}"
		else:
			self.message = message
		self.jobGUID = jobGUID
		super().__init__(self.message)

In [120]:
class ROCrateExceptionMetadataNotFound(ROCrateException):
	def __init__(self, jobGUID, message=None):
		if not message:
			self.message = f"Upload Job has no ROCrate Metadata: {jobGUID}"
		else:
			self.message = message
		self.jobGUID = jobGUID
		super().__init__(self.message)

NameError: name 'ROCrateException' is not defined

In [121]:
class ROCrateZipProcessingJob():
	def __init__(self, config, jobGUID):
		self.config = config
		self.jobGUID = jobGUID
		self.jobMetadata = None
		self.includeStem = False
		self.stem = None
		self.roCrateJSON = None
		self.roCrateInstance = None

	def updateJobMetadata(self, update):

		self.config.asyncCollection.update_one(
			{
				"@id": self.jobGUID
			},
			{
				"$set": update
			}
		)

	def process(self):

		try:
			self.findMetadata()
		except ROCrateExceptionJobNotFound:
			# TODO log error
			return False

		try:
			self.getROCrateMetadata()
		except ROCrateException as exc:
			# update the job
			self.updateJobMetadata(
				{
					"success": False,
					"complete": True,
					"error": exc.message
				}
			)
			return exc.message

		try:
			self.validateROCrateMetadata()
		except ROCrateException as exc:

			self.updateJobMetadata(
				{
					"success": False,
					"complete": True,
					"error": exc.message
				}
			)

			return exc.message


	def findMetadata(self):

		jobMetadata = self.config.asyncCollection.find_one(
			{"@id": self.jobGUID },
			projection={"_id": False}
		)

		if jobMetadata is None:
			raise ROCrateExceptionJobNotFound(jobGUID=self.jobGUID)
		
		self.jobMetadata = FairscapeUploadJob.model_validate(jobMetadata)

	def getROCrateMetadata(self):

		if not self.jobMetadata:
			raise ROCrateException(
				jobGUID = self.jobGUID
				)

		uploadPathString = self.jobMetadata.uploadPath

		if not uploadPathString:
			raise ROCrateException(
				jobGUID = self.jobGUID, 
				message="ROCrate Upload Job Missing Upload Path Property"
				)

		jobUploadPath = pathlib.PurePosixPath(self.jobMetadata.uploadPath)
		baseDirectory = self.jobMetadata.uploadPath 
		metadataKey = baseDirectory + "/ro-crate-metadata.json"
		metadataFound = False
		self.stem = jobUploadPath.stem
		self.includeStem = False

		try: 
			s3Response = appConfig.minioClient.get_object(
				Bucket = appConfig.minioBucket,
				Key = metadataKey
			)
			metadataFound = True
		except appConfig.minioClient.exceptions.NoSuchKey:
			metadataFound = False

		if not metadataFound:

			metadataKey = f"{baseDirectory}/{jobUploadPath.stem}/ro-crate-metadata.json"

			try: 
				s3Response = appConfig.minioClient.get_object(
					Bucket = appConfig.minioBucket,
					Key = metadataKey
				)
				metadataFound = True
			except appConfig.minioClient.exceptions.NoSuchKey:
				metadataFound = False

		if metadataFound:
			self.includeStem = True
			try:
				content = s3Response['Body']
				self.roCrateJSON = json.loads(content.read())

			except json.JSONDecodeError:
				raise ROCrateException("Failed to Decode Metadata JSON")

		else:
			raise ROCrateExceptionMetadataNotFound("Metadata Not Found in RO-Crate")

	def validateROCrateMetadata(self):

		try:
			self.roCrateInstance = ROCrateV1_2.model_validate(self.roCrateJSON)
		except Exception:
			raise ROCrateException("Validation Failed")


In [122]:
proc = ROCrateZipProcessingJob(appConfig, jobGUID)

In [123]:
proc.process()

In [68]:
import sys
def sizeof(obj):
	size = sys.getsizeof(obj)
	if isinstance(obj, dict): 
		return size + sum(map(sizeof, obj.keys())) + sum(map(sizeof, obj.values()))
	if isinstance(obj, (list, tuple, set, frozenset)): 
		return size + sum(map(sizeof, obj))
	return size

In [None]:
sizeof(proc.roCrateJSON) / 10**6

2.001645

In [148]:

rocrateInstance = proc.roCrateInstance

crateMetadata = rocrateInstance.getCrateMetadata()

isPartOfElem = {
	"@id": crateMetadata.guid,
	"@type": crateMetadata.metadataType,
	"name": crateMetadata.name
}

In [154]:
crateMetadata.metadataType

['Dataset', 'https://w3id.org/EVI#ROCrate']

In [None]:
includeStem = proc.includeStem

In [None]:
# TODO TOO Slow for large crates
conflicts = [] 

for elem in rocrateInstance.metadataGraph:
	foundElemMetadata = appConfig.identifierCollection.find_one({
		"@id": elem.guid
	})

	if foundElemMetadata:

		if isinstance(elem.metadataType, list):
			conflicts.append(elem.guid)
		elif isinstance(elem.metadataType, str):
		# if the elem is a dataset / software / computation
			if "EVI#Dataset" in elem.metadataType or "EVI#Software" in elem.metadataType or "EVI#Computation" in elem.metadataType:
				conflicts.append(elem.guid)
			else:
				merges.append(elem.guid)

if len(conflicts)>0:
	raise ROCrateException(jobGUID, "Conflicting Identifiers Found")


KeyboardInterrupt: 

## Write Dataset Metadata

In [None]:
# for every dataset

In [None]:
# get the dataset content url
datasetInstance = rocrateInstance.metadataGraph[-1]

def writeDataset(
		config,
		datasetInstance, 
		stem,
		includeStem
		):

	if not datasetInstance.contentUrl:
		distribution = None

	if datasetInstance.contentUrl == "Embargod":
		distribution = None

	elif "ftp://" in datasetInstance.contentUrl:

		pass

	elif "http://" in datasetInstance.contentUrl or "https://" in datasetInstance.contentUrl:
		pass

	elif "file://" in datasetInstance.contentUrl:

		updates = checkDatasetInROCrate(
			config.minioClient, 
			config.minioBucket, 
			datasetInstance.contentURL, 
			stem, 
			includeStem
			)

		datasetInstance.size = updates.get("size")
		distribution = updates.get("distribution")


	outputDataset = StoredIdentifier.model_validate({
		"@id": datasetElem.guid,
		"@type": MetadataTypeEnum.DATASET,
		"metadata": datasetElem,
		"permissions": permissionsSet, 
		"distribution": distribution,	
		"publicationStatus": PublicationStatusEnum.DRAFT,
		"dateCreated": now,
		"dateModified": now,
	})

	output_json = outputDataset.model_dump(
		by_alias=True,
		mode='json'
		)




def checkDatasetInROCrate(minioClient, minioBucket, contentURL, stem, includeStem):
	""" Check that a dataset exists in an rocrate zipped file while in minio
	"""
	contentUrlKey = contentURL.lstrip("file:///")

	# if file in datasetInstance
	if includeStem:
		objectKey = f"{jobInstance.uploadPath}/{stem}/{contentUrlKey}"
	else:
		objectKey = f"{jobInstance.uploadPath}/{contentUrlKey}"

	try:
		response = minioClient.head_object(
			Bucket= minioBucket,
			Key=objectKey
		)

	except botocore.exceptions.ClientError as e:	
		errorCode = e.response.get("Error", {}).get("Code")
		errorMessage = e.response.get("Error", {}).get("Message")

		raise Exception(f"message: Object Key Not Found\tkey: {objectKey}\tbucket: {appConfig.minioBucket}")

	# updates to metadata
	metadataUpdates = {
		"size": response.get("ContentLength"),
		"distribution": DatasetDistribution.model_validate({
			"distributionType": "minio",
			"location": {"path": objectKey}
		})
	}

	return metadataUpdates


# search for the file in minio

In [136]:
objectKey

'default/test/rocrates/paclitaxel.zip/paclitaxel/rgb/B2AI_5_Paclitaxel_H12_R8_z01_blue_red_green.jpg'

In [None]:
#help(appConfig.minioClient.head_object)

In [145]:
datasetInstance.size = objectSize
datasetInstance.isPartOf = isPartOfElem

In [146]:
datasetInstance.model_dump(by_alias=True)

{'@id': 'ark:59853/B2AI_5_Paclitaxel_H12_R8_z01_blue_red_green',
 'name': 'B2AI IF RGB Image B2AI_5_Paclitaxel_H12_R8_z01_blue_red_green.jpg',
 'metadataType': 'https://w3id.org/EVI#Dataset',
 'additionalType': 'Dataset',
 'author': ['Hansen JN',
  'Axelsson U',
  'Johannesson A',
  'Fall J',
  'Ballllosera Navarro F',
  'Lundberg E'],
 'datePublished': '02/28/2025',
 'version': '0.1.0',
 'description': 'This immunoflouresence image was taken of a well containing cells from the MDA-MB-468 cell line. The drug Paclitaxel was applied to these cells. This sample was stained with the compound DAPI to label the cell nucleus in blue, a tubulin antibody was applied to identify the cytoskeleton in red. Green is labeled using the antibody NEGATIVE.',
 'keywords': ['Machine learning',
  'Artificial intelligence',
  'Breast cancer',
  'Paclitaxel',
  'IF',
  'Microscopy',
  'Immunofluorescence',
  'Nucleus',
  'DAPI',
  'Tubulin'],
 'associatedPublication': None,
 'additionalDocumentation': None,


In [None]:
# stored identifier to write the 
StoredIdentifier(
	
)

In [None]:
object = appConfig.minioClient.get_object(
	Bucket= appConfig.minioBucket,
	Key=objectKey,
	#ChecksumMode=""
)

In [101]:
object.get("Body")

<botocore.response.StreamingBody at 0x231f52b2b60>

In [102]:
objectContent = object.get("Body").read()

In [None]:
# checking if md5 or hash is set on zipped objects

import hashlib

hash_object_sha256 = hashlib.sha256(objectContent)
hex_digest_sha256 = hash_object_sha256.hexdigest()
hex_digest_sha256

hash_object_md5 = hashlib.md5(objectContent)
hex_digest_md5 = hash_object_md5.hexdigest()
hex_digest_md5

In [95]:
response

{'ResponseMetadata': {'RequestId': '1867F5C4977B76F5',
  'HostId': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-length': '309581',
   'last-modified': 'Tue, 23 Sep 2025 16:14:53 GMT',
   'server': 'MinIO',
   'strict-transport-security': 'max-age=31536000; includeSubDomains',
   'vary': 'Origin, Accept-Encoding',
   'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
   'x-amz-request-id': '1867F5C4977B76F5',
   'x-content-type-options': 'nosniff',
   'x-ratelimit-limit': '4372',
   'x-ratelimit-remaining': '4372',
   'x-xss-protection': '1; mode=block',
   'date': 'Tue, 23 Sep 2025 16:20:04 GMT'},
  'RetryAttempts': 0},
 'LastModified': datetime.datetime(2025, 9, 23, 16, 14, 53, tzinfo=tzutc()),
 'ContentLength': 309581,
 'Metadata': {}}

309581

In [None]:

# find metadata object
try: 
	s3Response = appConfig.minioClient.get_object(
		Bucket = appConfig.minioBucket,
		Key = metadataKey
	)
except json.JSONDecodeError:
	raise Exception("Failed to Decode Metadata JSON")



In [74]:
def recursiveMinioSearch(s3Client, bucket: str, prefix: str, key: str):	

	listResponse = s3Client.list_objects_v2(
		Bucket= bucket,
		Prefix= prefix
	)

	nextToken = listResponse['NextContinuationToken']
	matchedMetadataKeys = []
	isTruncated = listResponse['IsTruncated']

	while isTruncated:

		matchedMetadataKeys = list(
			filter(
				lambda x: key in x['Key'],
				listResponse['Contents']
				)
			)

		listResponse = s3Client.list_objects_v2(
			Bucket=bucket,
			Prefix=prefix,
			ContinuationToken= nextToken
		)

		nextToken = listResponse.get('ContinuationToken')
		isTruncated = listResponse['IsTruncated']

	return matchedMetadataKeys



In [75]:
listResponse = recursiveMinioSearch(
	s3Client=appConfig.minioClient,
	bucket= appConfig.minioBucket,
	prefix= jobInstance.uploadPath + "/",
	key = "ro-crate-metadata.json"
)

KeyboardInterrupt: 

In [73]:
listResponse

[]

In [63]:
listResponse['ContinuationToken']

KeyError: 'ContinuationToken'

In [43]:
listResponse

{'ResponseMetadata': {'RequestId': '18644D0B8935C0A5',
  'HostId': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'accept-ranges': 'bytes',
   'content-length': '243527',
   'content-type': 'application/xml',
   'server': 'MinIO',
   'strict-transport-security': 'max-age=31536000; includeSubDomains',
   'vary': 'Origin, Accept-Encoding',
   'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
   'x-amz-request-id': '18644D0B8935C0A5',
   'x-content-type-options': 'nosniff',
   'x-ratelimit-limit': '4450',
   'x-ratelimit-remaining': '4450',
   'x-xss-protection': '1; mode=block',
   'date': 'Thu, 11 Sep 2025 18:14:26 GMT'},
  'RetryAttempts': 0},
 'IsTruncated': True,
 'Contents': [{'Key': 'default/test/rocrates/paclitaxel.zip/paclitaxel/blue/B2AI_1_Paclitaxel_A10_R12_z02_blue.jpg',
   'LastModified': datetime.datetime(2025, 9, 11, 16, 29, 4, 967000, tzinfo=tzutc()),
   'ETag': '',
   'Size': 1

In [None]:
rocrateMetadata

In [None]:
# start job

In [None]:
def presignedUploadRequest():
	pass

def executeJob():
	pass