In [1]:
# import rocrate models
import os
import sys

#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
#os.path.join(os.path.dirname(__file__), '../')
srcPath = os.path.abspath('C:\\Users\\Max\\Documents\\GitHub\\mds_python\\src\\' )
sys.path.insert(0, srcPath)

In [2]:
from fairscape_mds.models.rocrate import (
	ROCrateV1_2,
	ROCrateDataset,
	ROCrateSoftware,
	ROCrateComputation,
	ROCrateOrganization,
	ROCrateProject,
	ROCrateMetadataElem
)
from fairscape_mds.models.document import MongoDocument

  "class": algorithms.Blowfish,


In [3]:
import pathlib
from fairscape_mds.config import create_fairscape_config
from dotenv import load_dotenv
import os

load_dotenv("../local.env")

fairscapeConfig = create_fairscape_config(os.environ)

#ldapConnection = fairscapeConfig.ldap.connectAdmin()
#currentUserLDAP = getUserByCN(ldapConnection, userCN)
#ldapConnection.unbind()

minioClient = fairscapeConfig.minio.CreateClient()
mongoClient = fairscapeConfig.mongo.CreateClient()

mongoDB = mongoClient[fairscapeConfig.mongo.db]
asyncCollection = mongoDB[fairscapeConfig.mongo.async_collection]
identifierCollection = mongoDB[fairscapeConfig.mongo.identifier_collection]
rocrateCollection = mongoDB[fairscapeConfig.mongo.rocrate_collection]

In [4]:
import pymongo
import json

In [5]:
# load an example ROCrate
metadataPath = pathlib.Path("../serialization/data/1.cm4ai_chromatin_mda-mb-468_untreated_apmsloader_initialrun0.1alpha/ro-crate-metadata.json")
with metadataPath.open('r') as metadataFile:
	crateModel = ROCrateV1_2.model_validate_json(metadataFile.read())


In [6]:
crateModel

ROCrateV1_2(context={'EVI': 'https://w3id.org/EVI#', '@vocab': 'https://schema.org/'}, metadataGraph=[ROCrateMetadataFileElem(guid='ro-crate-metadata.json', metadataType='CreativeWork', conformsTo=IdentifierValue(guid='https://w3id.org/ro/crate/1.2-DRAFT'), about=IdentifierValue(guid='https://fairscape.net/ark:59852/rocrate-1.cm4ai_chromatin_mda-mb-468_untreated_apmsloader_initialrun0.1alpha')), ROCrateMetadataElem(guid='https://fairscape.net/ark:59852/rocrate-1.cm4ai_chromatin_mda-mb-468_untreated_apmsloader_initialrun0.1alpha', metadataType=['Dataset', 'https://w3id.org/EVI#ROCrate'], name='Initial integration run', keywords=['Ideker Lab', 'CM4AI', '0.1 alpha', 'MDA-MB-468', 'untreated', 'chromatin', 'Initial integration run', 'AP-MS edgelist download'], isPartOf=[IdentifierValue(guid='ark:/Ideker_Lab'), IdentifierValue(guid='ark:/Ideker_Lab/CM4AI')], version='0.5alpha', dataLicense='https://creativecommons.org/licenses/by-nc-sa/4.0/deed.en', associatedPublication='Clark T, Schaffer 

In [7]:
crateModel.cleanIdentifiers()

In [8]:
crateModel.getEVIElements()

[ROCrateDataset(guid='ark:59852/dataset-cellmaps_ppidownloader-output-file-58sGTge', metadataType='https://w3id.org/EVI#Dataset', additionalType='Dataset', author=['Richa Tiwari', 'Atoshi Banerjee', 'Monita Muralidharan', 'Kristen Obernier', 'Antoine Forget', 'Nevan Krogan'], datePublished='2023-08-31', version='0.5alpha', description='Ideker Lab CM4AI 0.1 alpha MDA-MB-468 untreated chromatin Initial integration run AP-MS Edgelist AP-MS gene node attributes file', keywords=[''], associatedPublication='Clark T, Schaffer L, Obernier K, Al Manir S, Churas CP, Dailamy A, Doctor Y, Forget A, Hansen JN, Hu M, Lenkiewicz J, Levinson MA, Marquez C, Mohan J, Nourreddine S, Niestroy J, Pratt D, Qian G, Thaker S, Belisle-Pipon J-C, Brandt C, Chen J, Ding Y, Fodeh S, Krogan N, Lundberg E, Mali P, Payne-Foster P, Ratcliffe S, Ravitsky V, Sali A, Schulz W, Ideker T. Cell Maps for Artificial Intelligence: AI-Ready Maps of Human Cell Architecture from Disease-Relevant Cell Lines. BioRXiv 2024.', addit

In [9]:
# custom exception for file uri in metadata only record

class ROCrateMetadataOnlyException(Exception):
	""" Exception Raised when ROCrate has file uri in a metadata only submission
	"""

	def __init__(self, message, errors):
		self.message = message
		self.errors = errors
		super().__init__(message)

	def __str__(self):
		return self.message


# detect if contentUrl properties for datasets are all https
def validateROCrateMetadata(crateModel: ROCrateV1_2):
	# make a dictionary of dataset guid to content url if a file uri is in the content url "file://"
	contentUrlDict = {
		crateDataset.guid: crateDataset.contentUrl for crateDataset in crateModel.getDatasets() 
		if "file" in crateDataset.contentUrl
	}

	if len(contentUrlDict.items()) != 0:
		raise ROCrateMetadataOnlyException(
			message="All Identifiers must reference content by URI",
			errors = contentUrlDict
		)

In [10]:
rocrateMetadata = crateModel.getCrateMetadata()
crateGUID = rocrateMetadata.guid

ownerCN = "mal8ch"


In [23]:
#print(crateModel.getCrateMetadata().model_dump_json(by_alias=True, indent=2))

In [None]:
# publish ROcrate metadata


[ObjectId('67ab81c0bfb04f4005e52723'),
 ObjectId('67ab81c0bfb04f4005e52724'),
 ObjectId('67ab81c0bfb04f4005e52725'),
 ObjectId('67ab81c0bfb04f4005e52726')]

In [None]:
class MintROCrateMetadataRequest():
	""" Class for Creating metadata only RO Crate records
	"""

	def __init__(self, 
		rocrateCollection: pymongo.collection.Collection, 
		identifierCollection: pymongo.collection.Collection, 
		crateModel: ROCrateV1_2,
		):

		self.rocrateCollection = rocrateCollection
		self.identifierCollection = identifierCollection
		self.crateModel = crateModel


	def validateROCrateMetadata(self):

		contentUrlDict = {
			crateDataset.guid: crateDataset.contentUrl for crateDataset in self.crateModel.getDatasets() 
			if "file" in crateDataset.contentUrl
		}

		if len(contentUrlDict.items()) != 0:
			raise ROCrateMetadataOnlyException(
				message="All Identifiers must reference content by URI",
				errors = contentUrlDict
			)


	def writeIdentifiers(self):
		""" Write identifiers to mongo
		"""

		rocrateMetadataElem = self.crateModel.getCrateMetadata()

		# create a mongo document
		rocrateDocument = MongoDocument.model_validate({
			"@id": rocrateMetadataElem.guid,
			"@type": "https://w3id.org/EVI#ROCrate",
			"owner": ownerCN,
			"metadata": self.crateModel,
			"distribution": None
		})

		# publish rocrate 
		insertResult = rocrateCollection.insert_one(
			rocrateDocument.model_dump(by_alias=True)
			)

		# if rocrate metadata fails to write
		if insertResult.inserted_id is None:
			# TODO more detailed exception
			raise Exception

		# insert identifier metadata for each of the elements
		identifierList = [ rocrateMetadataElem ] + crateModel.getEVIElements()
		documentList = []
		for metadataElem in identifierList:
			documentMetadata = {
				"@id": metadataElem.guid,
				"@type": metadataElem.metadataType,
				"owner": ownerCN,
				"metadata": metadataElem,
				"distribution": None
			}

			if isinstance(metadataElem, ROCrateMetadataElem):
				documentMetadata['@type'] = "https://w3id.org/EVI#ROCrate"
				

			metadataElemDocument = MongoDocument.model_validate(documentMetadata)

			# add to list to insert into mongo
			documentList.append(
				metadataElemDocument.model_dump(by_alias=True)
			)

		# insert all documents into identifier collection
		insertResult = identifierCollection.insert_many(documents=documentList)

		if len(insertResult.inserted_ids) != len(documentList):
			raise Exception
			
	
	def publish(self):
		""" Preform all operations needed to write a metadata only ROCrate into Fairscape 
		"""

		self.crateModel.cleanIdentifiers()	
		self.validateROCrateMetadata()
		self.writeIdentifiers()

In [None]:
# mint metadata
def storeROCrateMetadata(
	rocrateCollection: pymongo.collection.Collection,
	identifierCollection: pymongo.collection.Collection,
	crateMetadata: ROCrateV1_2
	):
	pass