In [1]:
import os
import sys
import pathlib

In [2]:
testPaths = pathlib.Path(".").absolute()
str(testPaths.parent)

'c:\\Users\\Max\\Documents\\GitHub\\mds_python\\mds\\src\\fairscape_mds'

In [3]:
sys.path.append(str(testPaths.parent.parent))

In [5]:
from fairscape_mds.core.config import appConfig
from fairscape_mds.models.rocrate import ROCrateUploadRequest
from fairscape_mds.models.user import UserWriteModel, checkPermissions
from fairscape_mds.crud.rocrate import userPath, setDatasetObjectKey, FairscapeROCrateRequest
from fairscape_mds.crud.fairscape_request import FairscapeRequest
from fairscape_mds.crud.fairscape_response import FairscapeResponse
import uuid

In [6]:
userQuery = {
	"email": "test@example.org"
}

In [7]:
# get test user from database to 
userMetadata = appConfig.userCollection.find_one(
	userQuery, 
	projection={"_id": False}
	)

currentUser = UserWriteModel.model_validate(userMetadata)

In [31]:
# create a fairscape upload request
from pydantic import BaseModel, Field
from fairscape_mds.models.user import Permissions
from typing import Optional, Any, Dict
import datetime

class FairscapeUploadJob(BaseModel):
	guid: str = Field(alias="@id")
	presigned: bool 
	uploadPath: str
	fileSize: Optional[int] = Field(default=None)
	presignedUploadLink: Optional[Dict[str, Any]] = Field(default=None)
	permissions: Permissions
	timeStarted: Optional[datetime.datetime] = Field(default_factory=datetime.datetime.now)
	timeFinished: Optional[datetime.datetime] = Field(default=None)
	status: Optional[str] = Field(default=None)
	complete: bool 
	success: bool
	rocrateGUID: Optional[str] = Field(default=None)
	identifiersMinted: Optional[int] = Field(default=None)




In [9]:
def createUploadPresignedLink(s3_client, bucket_name, object_name, expiration=7200):

	presignedUploadLink = s3_client.generate_presigned_post(
			bucket_name, 
			object_name,
			Fields=None,
			ExpiresIn=expiration,
	)

	return presignedUploadLink
		


In [10]:
# upload request modification that gives presigned minio put url

class FairscapePresignedROCrateRequest(FairscapeRequest):

	def __init_(self, config):
		super().__init__(config)
		self.config = config

	def get(
			self,
			currentUser: UserWriteModel,
			jobGUID: str
	):
		jobMetadata = self.config.asyncCollection.find_one(
			{"@id": jobGUID},
			projection={"_id": False}
		)

		if not jobMetadata:
			return FairscapeResponse(
				success=False,
				statusCode=404,
				error={
					"message": f"upload job {jobGUID} not found",
					"error": "job not found",
					"@id": jobGUID 
				}
			)

		try:
			jobInstance = FairscapeUploadJob.model_validate(jobMetadata)
		except Exception as exc:
			return FairscapeResponse(
				success=False,
				statusCode=500,
				error= {
					"message": "error unmarshaling job metadata",
					"error": str(exc),
					"@id": jobGUID
				}
			)

		# check permissions on metadata
		if checkPermissions(jobInstance.permissions, currentUser):
			return FairscapeResponse(
				success=True,
				statusCode=200,
				model=jobInstance,
			)
		else:
			return FairscapeResponse(
				success=False,
				statusCode=401,
				error={
					"message": "user unauthorized to view upload request"
				}
			)


	def execute(
		self, 
		currentUser, 
		jobGUID
	):

		# if no minio object is available then return 
		processROCrate.apply_async(args=(uploadJob.guid,), )

		return FairscapeResponse(
			success=200
		)
		

	def upload(
			self,
			currentUser: UserWriteModel, 
			filename: str, 
			filesize: int
			):

		userEmailPath = userPath(currentUser.email)
		uploadPath = f"{appConfig.minioDefaultPath}/{userEmailPath}/rocrates/{filename}"

		try:
			presignedUploadLink = createUploadPresignedLink(
				appConfig.minioClient,
				appConfig.minioBucket,
				uploadPath
			)
		except Exception as exc:
			return FairscapeResponse(
				success=False,
				statusCode=500,
				error={
					"message": "unable to grant presigned upload link",
					"error": str(exc)
					}
			)

		uploadJobInstance = FairscapeUploadJob.model_validate({
			"@id": str(uuid.uuid4()),
			"presigned": True,
			"uploadPath": uploadPath,
			"fileSize": filesize,
			"presignedUploadLink": presignedUploadLink,
			"timeStarted": None,
			"timeFinished": None,
			"permissions": currentUser.getPermissions(),
			"status": "generate presigned link for upload",
			"complete": False,
			"success": False,	
			"rocrateGUID": None,
			"identifiersMinted": 0
		})


		insertResult = appConfig.asyncCollection.insert_one(
			uploadJobInstance.model_dump(mode='json', by_alias=True)
		)

		assert insertResult.inserted_id

		return FairscapeResponse(
			success=True,
			statusCode=200,
			model=uploadJobInstance
		)



In [11]:
createUploadPresignedLink(appConfig.minioClient, appConfig.minioBucket, object_name="rocrate.zip")

{'url': 'http://localhost:9000/default',
 'fields': {'key': 'rocrate.zip',
  'x-amz-algorithm': 'AWS4-HMAC-SHA256',
  'x-amz-credential': 'miniotestadmin/20250911/us-east-1/s3/aws4_request',
  'x-amz-date': '20250911T162814Z',
  'policy': 'eyJleHBpcmF0aW9uIjogIjIwMjUtMDktMTFUMTg6Mjg6MTRaIiwgImNvbmRpdGlvbnMiOiBbeyJidWNrZXQiOiAiZGVmYXVsdCJ9LCB7ImtleSI6ICJyb2NyYXRlLnppcCJ9LCB7IngtYW16LWFsZ29yaXRobSI6ICJBV1M0LUhNQUMtU0hBMjU2In0sIHsieC1hbXotY3JlZGVudGlhbCI6ICJtaW5pb3Rlc3RhZG1pbi8yMDI1MDkxMS91cy1lYXN0LTEvczMvYXdzNF9yZXF1ZXN0In0sIHsieC1hbXotZGF0ZSI6ICIyMDI1MDkxMVQxNjI4MTRaIn1dfQ==',
  'x-amz-signature': '716c8b3bb8e4feb6f6ce24d26103da6dae27bac6fdf3759baa36c027765edd11'}}

In [12]:
appConfig.minioClient.list_buckets()

{'ResponseMetadata': {'RequestId': '18644740DEA76675',
  'HostId': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'accept-ranges': 'bytes',
   'content-length': '457',
   'content-type': 'application/xml',
   'server': 'MinIO',
   'strict-transport-security': 'max-age=31536000; includeSubDomains',
   'vary': 'Origin, Accept-Encoding',
   'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
   'x-amz-request-id': '18644740DEA76675',
   'x-content-type-options': 'nosniff',
   'x-ratelimit-limit': '4450',
   'x-ratelimit-remaining': '4450',
   'x-xss-protection': '1; mode=block',
   'date': 'Thu, 11 Sep 2025 16:28:18 GMT'},
  'RetryAttempts': 0},
 'Buckets': [{'Name': 'default',
   'CreationDate': datetime.datetime(2025, 7, 21, 7, 3, 40, 883000, tzinfo=tzutc())},
  {'Name': 'fairscape',
   'CreationDate': datetime.datetime(2025, 7, 21, 7, 4, 42, 612000, tzinfo=tzutc())}],
 'Owner': {'DisplayName':

In [13]:
presignedROCrateRequestFactory = FairscapePresignedROCrateRequest(appConfig)

In [14]:
uploadPresignedResponse = presignedROCrateRequestFactory.upload(
	currentUser,
	"paclitaxel.zip",
	4497805667
)

In [15]:
uploadPresignedResponse.model.model_dump(by_alias=True)

{'@id': '4919243b-9da0-469c-ab8a-aa7ee61a4eef',
 'presigned': True,
 'uploadPath': 'default/test/rocrates/paclitaxel.zip',
 'fileSize': 4497805667,
 'presignedUploadLink': {'url': 'http://localhost:9000/default',
  'fields': {'key': 'default/test/rocrates/paclitaxel.zip',
   'x-amz-algorithm': 'AWS4-HMAC-SHA256',
   'x-amz-credential': 'miniotestadmin/20250911/us-east-1/s3/aws4_request',
   'x-amz-date': '20250911T162825Z',
   'policy': 'eyJleHBpcmF0aW9uIjogIjIwMjUtMDktMTFUMTg6Mjg6MjVaIiwgImNvbmRpdGlvbnMiOiBbeyJidWNrZXQiOiAiZGVmYXVsdCJ9LCB7ImtleSI6ICJkZWZhdWx0L3Rlc3Qvcm9jcmF0ZXMvcGFjbGl0YXhlbC56aXAifSwgeyJ4LWFtei1hbGdvcml0aG0iOiAiQVdTNC1ITUFDLVNIQTI1NiJ9LCB7IngtYW16LWNyZWRlbnRpYWwiOiAibWluaW90ZXN0YWRtaW4vMjAyNTA5MTEvdXMtZWFzdC0xL3MzL2F3czRfcmVxdWVzdCJ9LCB7IngtYW16LWRhdGUiOiAiMjAyNTA5MTFUMTYyODI1WiJ9XX0=',
   'x-amz-signature': 'ed7582cdc3a927f7e8eb8dac8aa20f4658b50e2f004c2aa9af60869aa45c2c6a'}},
 'permissions': {'owner': 'test@example.org', 'group': None},
 'timeStarted': None,
 'timeF

In [26]:
uploadInstance = uploadPresignedResponse.model
uploadLink = uploadInstance.presignedUploadLink.get("url")
uploadFields = uploadInstance.presignedUploadLink.get("fields")
jobGUID = uploadInstance.guid

In [89]:
import httpx
import json
import boto3
import sys

In [18]:
uploadLink

'http://localhost:9000/default'

In [20]:
cratePath ="E:\\Work\\Data\\Uploads\\upload_06_29_2025\\paclitaxel.zip" 
with open(cratePath, "rb") as cratefile:
	rocrateFiles = {
		"file": ("paclitaxel.zip", cratefile, "application/zip")
	}


	# upload a test rocrate
	paclitaxelResponse = httpx.post(
		uploadLink,
		files=rocrateFiles,
		data=uploadFields,
		timeout=10000,
		verify=False
	)


In [21]:
paclitaxelResponse

<Response [204 No Content]>

In [25]:
dict(paclitaxelResponse.headers)

{'accept-ranges': 'bytes',
 'etag': '"84850812a162a043114183728950c7c9"',
 'location': 'http://localhost:9000/default/default/test/rocrates/paclitaxel.zip',
 'server': 'MinIO',
 'strict-transport-security': 'max-age=31536000; includeSubDomains',
 'vary': 'Origin, Accept-Encoding',
 'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
 'x-amz-request-id': '18644748FA70CAF1',
 'x-content-type-options': 'nosniff',
 'x-ratelimit-limit': '4450',
 'x-ratelimit-remaining': '4450',
 'x-xss-protection': '1; mode=block',
 'date': 'Thu, 11 Sep 2025 16:29:04 GMT'}

In [None]:
jobGUID

In [32]:
def proccessROCrate(jobGUID):

	jobMetadata = appConfig.asyncCollection.find_one(
		{"@id": jobGUID},
		projection={"_id": False}
	)

	if jobMetadata is None:
		return FairscapeResponse(
			success=False,
			statusCode=404,
			error= {"message": "job not found"}
		)
	
	return FairscapeUploadJob.model_validate(jobMetadata)
		


In [None]:
# given the job id 
jobInstance = proccessROCrate(jobGUID)



'default/test/rocrates/paclitaxel.zip'

'paclitaxel'

In [86]:


jobUploadPath = pathlib.PurePosixPath(jobInstance.uploadPath)
baseDirectory = jobInstance.uploadPath 
metadataKey = baseDirectory + "/ro-crate-metadata.json"
metadataFound = False

try: 
	s3Response = appConfig.minioClient.get_object(
		Bucket = appConfig.minioBucket,
		Key = metadataKey
	)
	metadataFound = True
except appConfig.minioClient.exceptions.NoSuchKey as e:
	metadataFound = False

if not metadataFound:

	metadataKey = f"{baseDirectory}/{jobUploadPath.stem}/ro-crate-metadata.json"

	try: 
		s3Response = appConfig.minioClient.get_object(
			Bucket = appConfig.minioBucket,
			Key = metadataKey
		)
		metadataFound = True
	except appConfig.minioClient.exceptions.NoSuchKey as e:
		metadataFound = False

if metadataFound:
	try:
		content = s3Response['Body']
		roCrateJSON = json.loads(content.read())
	except json.JSONDecodeError:
		raise Exception("Failed to Decode Metadata JSON")

else:
	raise Exception("Metadata Not Found in RO-Crate")



In [92]:
roCrateJSON

{'@context': {'@vocab': 'https://schema.org/', 'EVI': 'https://w3id.org/EVI#'},
 '@graph': [{'@id': 'ro-crate-metadata.json',
   '@type': 'CreativeWork',
   'conformsTo': {'@id': 'https://w3id.org/ro/crate/1.2-DRAFT'},
   'about': {'@id': 'ark:59853/rocrate-paclitaxel-if-data-release'}},
  {'@type': ['Dataset', 'https://w3id.org/EVI#ROCrate'],
   'isPartOf': [],
   'version': '0.1.0',
   'datePublished': '02/28/2025',
   'license': 'https://creativecommons.org/licenses/by-nc-sa/4.0/deed.en',
   'associatedPublication': None,
   'author': ['Hansen JN',
    'Axelsson U',
    'Johannesson A',
    'Fall J',
    'Ballllosera Navarro F',
    'Lundberg E'],
   'conditionsOfAccess': 'This dataset was created by investigators and staff of the Cell Maps for Artificial Intelligence project (CM4AI - https://cm4ai.org), a Data Generation Project of the NIH Bridge2AI program for cellular imaging data by The Board of Trustees of the Leland Stanford Junior University. It is licensed for reuse under th

In [96]:
import sys
def sizeof(obj):
	size = sys.getsizeof(obj)
	if isinstance(obj, dict): 
		return size + sum(map(sizeof, obj.keys())) + sum(map(sizeof, obj.values()))
	if isinstance(obj, (list, tuple, set, frozenset)): 
		return size + sum(map(sizeof, obj))
	return size

In [97]:
sizeof(roCrateJSON) / 10**6

98.071248

In [None]:

# find metadata object
try: 
	s3Response = appConfig.minioClient.get_object(
		Bucket = appConfig.minioBucket,
		Key = metadataKey
	)
except json.JSONDecodeError:
	raise Exception("Failed to Decode Metadata JSON")



In [74]:
def recursiveMinioSearch(s3Client, bucket: str, prefix: str, key: str):	

	listResponse = s3Client.list_objects_v2(
		Bucket= bucket,
		Prefix= prefix
	)

	nextToken = listResponse['NextContinuationToken']
	matchedMetadataKeys = []
	isTruncated = listResponse['IsTruncated']

	while isTruncated:

		matchedMetadataKeys = list(
			filter(
				lambda x: key in x['Key'],
				listResponse['Contents']
				)
			)

		listResponse = s3Client.list_objects_v2(
			Bucket=bucket,
			Prefix=prefix,
			ContinuationToken= nextToken
		)

		nextToken = listResponse.get('ContinuationToken')
		isTruncated = listResponse['IsTruncated']

	return matchedMetadataKeys



In [75]:
listResponse = recursiveMinioSearch(
	s3Client=appConfig.minioClient,
	bucket= appConfig.minioBucket,
	prefix= jobInstance.uploadPath + "/",
	key = "ro-crate-metadata.json"
)

KeyboardInterrupt: 

In [73]:
listResponse

[]

In [63]:
listResponse['ContinuationToken']

KeyError: 'ContinuationToken'

In [43]:
listResponse

{'ResponseMetadata': {'RequestId': '18644D0B8935C0A5',
  'HostId': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'accept-ranges': 'bytes',
   'content-length': '243527',
   'content-type': 'application/xml',
   'server': 'MinIO',
   'strict-transport-security': 'max-age=31536000; includeSubDomains',
   'vary': 'Origin, Accept-Encoding',
   'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
   'x-amz-request-id': '18644D0B8935C0A5',
   'x-content-type-options': 'nosniff',
   'x-ratelimit-limit': '4450',
   'x-ratelimit-remaining': '4450',
   'x-xss-protection': '1; mode=block',
   'date': 'Thu, 11 Sep 2025 18:14:26 GMT'},
  'RetryAttempts': 0},
 'IsTruncated': True,
 'Contents': [{'Key': 'default/test/rocrates/paclitaxel.zip/paclitaxel/blue/B2AI_1_Paclitaxel_A10_R12_z02_blue.jpg',
   'LastModified': datetime.datetime(2025, 9, 11, 16, 29, 4, 967000, tzinfo=tzutc()),
   'ETag': '',
   'Size': 1

In [None]:
rocrateMetadata

In [None]:
# start job

In [None]:
def presignedUploadRequest():
	pass

def executeJob():
	pass