
## Mermaid Example

```html
<html>
  <body>
    Here is one mermaid diagram:
    <pre class="mermaid">
            graph TD 
            A[Client] --> B[Load Balancer] 
            B --> C[Server1] 
            B --> D[Server2]
    </pre>

    And here is another:
    <pre class="mermaid">
            graph TD 
            A[Client] -->|tcp_123| B
            B(Load Balancer) 
            B -->|tcp_456| C[Server1] 
            B -->|tcp_456| D[Server2]
    </pre>

    <script type="module">
      import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.esm.min.mjs';
      mermaid.initialize({ startOnLoad: true });
    </script>
  </body>
</html>
```

## Generate Viz for Evidence Graph

## Generate prov for ROCrate

In [3]:
import requests
from jinja2 import Environment, PackageLoader, select_autoescape
import json
from typing import List, Dict

In [3]:

env = Environment(
	loader=PackageLoader("viz"),
	autoescape=True
)

template = env.get_template("ro-crate.html")

In [6]:
#with open("test_eg.json", "r") as jsonfile:
#	local_test_eg = json.load(jsonfile)
#local_test_eg

#### Adding Multiple Lines to Nodes

```
%%{init: {"flowchart": {"htmlLabels": false}} }%%
flowchart LR
    markdown["`This **is** _Markdown_`"]
    newLines["`Line1
    Line 2
    Line 3`"]
    markdown --> newLines
```

```

example_mermaid = """
	graph TD 
	A[Client] --> B[Load Balancer] 
        B --> C[Server1] 
	        B --> D[Server2]"""
```

## Mermaid Rendering Class

In [10]:
example_metadata[0]

{'@id': 'ark:59852/fd21fe5d-ca8c-463c-88a3-fe1035284819',
 '@context': {'@vocab': 'https://schema.org/', 'evi': 'https://w3id.org/EVI#'},
 'metadataType': 'https://w3id.org/EVI#Dataset',
 'url': None,
 'name': 'Simulated Samples file',
 'keywords': ['cm4ai', 'alpha', 'data release', 'b2ai'],
 'description': 'Samples file derived from input RO-CRATE tsv file',
 'author': 'NA',
 'datePublished': '2023-08-28',
 'version': '0.1 alpha',
 'associatedPublication': None,
 'additionalDocumentation': None,
 'format': 'tsv',
 'schema': {},
 'generatedBy': [],
 'derivedFrom': [],
 'usedBy': ['ark:59852/623f4e05-a897-4c3c-8f6a-bdd81f036dd5'],
 'contentUrl': 'file:///samplescopy.csv',
 '@type': 'https://w3id.org/EVI#Dataset',
 'additionalType': 'Dataset',
 'isPartOf': {'@id': 'ark:59852/1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha',
  'name': '1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha'}}

In [4]:
# custom theme for nodes
custom_theme = """%%{
	init: {
		'theme': 'base',
		'themeVariables': {
			'fontFamily': 'verdana',
			'fontSize': '20px',
			'primaryColor': '#3b50a3'
		}
	}
}%%
"""

In [49]:
evi_dataset_type = "https://w2id.org/EVI#Dataset"
evi_software_type = "https://w2id.org/EVI#Software"
evi_computation_type = "https://w2id.org/EVI#Computation"

mermaid_terminator = "\n\t"

class MermaidDiagram():
	def __init__(self, metadata, subgraph=True, click=True, label_edges=True):

		self.metadata = metadata	
		self.subgraph = subgraph
		self.click = click
		self.label_edges = label_edges
		self.threshold = 5
		self.computation_nodes = []

	def render_subgraph(self):
		
		node_guid_mapping = {}
		self.computation_nodes = []
		nodes = [] 
		click = []
		edges = []
		
		def metadata_to_row(passed_metadata, passed_idx):	
			node_name = passed_metadata['name']			
			metadata_type = passed_metadata["additionalType"]
			mermaid_row = ""
			if metadata_type == "Software":
				mermaid_row = f"id{passed_idx}" + "[" + node_name + "]" 
			elif metadata_type == "Computation":
				mermaid_row = f"id{passed_idx}" + "[\\" + node_name + "/]" 
				#self.computation_nodes.append(f"id{idx}")
			elif metadata_type == "Dataset":
				mermaid_row = f"id{passed_idx}" + "([" + node_name + "])"
			return mermaid_row

		crates = set([ 
			metadata_elem.get("isPartOf", {}).get("@id") for metadata_elem in self.metadata
			])

		print(crates)

		idx = 0	
		subgraphs = []
		for crate_guid in crates:
			# filter the nodes to add
			subgraph_header = f"subgraph {crate_guid}\n\t"
			subgraph_footer = "\nend\n"
			subgraph_nodes = []	
			for subgraph_elem in filter(
				lambda metadata_elem: metadata_elem.get("isPartOf", {}).get("@id") == crate_guid, 
				self.metadata):
				
				# set mapping
				node_guid_mapping[subgraph_elem['@id']] = idx 

				# add subgraph node
				output_node = metadata_to_row(subgraph_elem, idx)
				subgraph_nodes.append(output_node)

				# increment
				idx += 1

			subgraphs.append(subgraph_header + "\n\t".join(subgraph_nodes) + subgraph_footer)

		# render edges using computation metadata
		idx = 0 
		for crate_guid in crates:
			for metadata in filter(
				lambda metadata_elem: metadata_elem.get("isPartOf", {}).get("@id") == crate_guid, 
				self.metadata):
				# if computation
				if metadata.get("additionalType") == "Computation":
				
				# get usedSoftware
					for used_software_id in metadata.get("usedSoftware"):
						obj_node_id = node_guid_mapping.get(used_software_id)
						if self.label_edges:
							used_software_row = f"id{obj_node_id} -->|usedSoftware| id{idx}" 
						else:
							used_software_row = f"id{idx} --> id{obj_node_id}" 

						edges.append(used_software_row)	

					for used_dataset_id in metadata.get("usedDataset"):
						obj_node_id = node_guid_mapping.get(used_dataset_id)
						if self.label_edges:
							used_dataset_row = f"id{idx} -->|usedDataset| id{obj_node_id}" 
						else:
							used_dataset_row = f"id{idx} --> id{obj_node_id}" 

						edges.append(used_dataset_row)

					# get generated
					for generated_id in metadata.get("generated"):
						obj_node_id = node_guid_mapping.get(generated_id)
						if self.label_edges:
							generated_row = f"id{obj_node_id} -->|generatedBy| id{idx}" 
						else:
							generated_row = f"id{obj_node_id} --> id{idx}" 

						edges.append(generated_row)

				idx+=1

		return "flowchart TD\n" + "\n".join(subgraphs) + "\n".join(edges)



	def render_eg(self) -> str:
		""" Expecting List in self.metadata
		"""

		graph_text = "flowchart TD"	+ mermaid_terminator

		node_guid_mapping = {}
		self.computation_nodes = []
		nodes = [] 
		click = []
		edges = []



		for idx, metadata  in enumerate(self.metadata):
			node_guid_mapping[metadata['@id']] = idx 
			output_node = metadata_to_row(metadata, idx)

			# if node is clickable
			if self.click:
				landing_page = f"https://fairscape.pods.uvarc.io/{metadata.get('@id')}"
				click_row = f'click id{idx} "{landing_page}"' 
				click.append(click_row)
			
			# append to 
			nodes.append(output_node)

		# render edges using computation metadata
		for idx, metadata  in enumerate(self.metadata):
			# if computation
			if metadata.get("additionalType") == "Computation":
				
				# get usedSoftware
				for used_software_id in metadata.get("usedSoftware"):
					obj_node_id = node_guid_mapping.get(used_software_id)
					if self.label_edges:
						used_software_row = f"id{obj_node_id} -->|usedSoftware| id{idx}" 
					else:
						used_software_row = f"id{idx} --> id{obj_node_id}" 

					edges.append(used_software_row)	


				for used_dataset_id in metadata.get("usedDataset"):
					obj_node_id = node_guid_mapping.get(used_dataset_id)
					if self.label_edges:
						used_dataset_row = f"id{idx} -->|usedDataset| id{obj_node_id}" 
					else:
						used_dataset_row = f"id{idx} --> id{obj_node_id}" 

					edges.append(used_dataset_row)

				# get generated
				for generated_id in metadata.get("generated"):
					obj_node_id = node_guid_mapping.get(generated_id)
					if self.label_edges:
						generated_row = f"id{obj_node_id} -->|generatedBy| id{idx}" 
					else:
						generated_row = f"id{obj_node_id} --> id{idx}" 

					edges.append(generated_row)

		# generate text of mermaid
		self.edges = edges
		self.nodes = nodes
		self.click_rows = click
		full_rows = nodes + click + edges
		for row in full_rows:
			graph_text += row + mermaid_terminator


		return graph_text


In [50]:
print(MermaidDiagram(metadata=trimmed_metadata, click=False).render_subgraph())

{'ark:59852/3.cm4ai_chromatin_mda-mb-468_untreated_coembedfold1_initialrun0.1alpha', 'ark:59852/1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha', 'ark:59852/2.cm4ai_chromatin_mda-mb-468_untreated_imageembedfold1_initialrun0.1alpha'}
flowchart TD
subgraph ark:59852/3.cm4ai_chromatin_mda-mb-468_untreated_coembedfold1_initialrun0.1alpha
	id0([coembedding_emd.tsv coembedding output file])
	id1[cellmaps_coembedding]
	id2[\Merged Embedding/]
end

subgraph ark:59852/1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha
	id3([Simulated Samples file])
	id4([Simulated unique file])
	id5([cellmaps_imagedownloader output file])
	id6([cellmaps_imagedownloader output file])
	id7([B2AI_1_untreated_G1_R2_red.jpg red channel image])
	id8[cellmaps_imagedownloader]
	id9[\IF Image Loader/]
	id10([etc.])
end

subgraph ark:59852/2.cm4ai_chromatin_mda-mb-468_untreated_imageembedfold1_initialrun0.1alpha
	id11([cellmaps_image_embedding output file])
	id12([Densenet model f

### Load Evidence Graph

In [30]:

# image loader evidence graph
def reload_metadata():
	image_loading = requests.get("https://fairscape.pods.uvarc.io/ark:59852/1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha").json()
	image_embed = requests.get("https://fairscape.pods.uvarc.io/ark:59852/2.cm4ai_chromatin_mda-mb-468_untreated_imageembedfold1_initialrun0.1alpha").json()
#	image_embed2 = requests.get("https://fairscape.pods.uvarc.io/ark:59852/2.cm4ai_chromatin_mda-mb-468_untreated_imageembedfold2_initialrun0.1alpha").json()
	coembed = requests.get("https://fairscape.pods.uvarc.io/ark:59852/3.cm4ai_chromatin_mda-mb-468_untreated_coembedfold1_initialrun0.1alpha").json()
#	coembed2 = requests.get("https://fairscape.pods.uvarc.io/ark:59852/3.cm4ai_chromatin_mda-mb-468_untreated_coembedfold2_initialrun0.1alpha").json()

	image_loading['@graph'].pop(0)
	image_embed['@graph'].pop(0)
#	image_embed2['@graph'].pop(0)
	coembed['@graph'].pop(0)
#	coembed2['@graph'].pop(0)

	#return image_loading['@graph'] + image_embed['@graph'] + image_embed2['@graph'] + coembed['@graph'] + coembed2['@graph']
	return image_loading['@graph'] + image_embed['@graph'] + coembed['@graph'] 

In [267]:
#print(computations[0]['usedDataset'])
#print(computations[1]['usedDataset'])
#print(computations[0]['generated'])
#print(computations[1]['generated'])

In [38]:

def trim_metadata(metadata: List[Dict]):

	# get the computations
	computations = list(filter(lambda x: x.get("additionalType") == "Computation",metadata))

	def get_used_by_computation(ark: str):
		for i in range(len(computations)):
			if ark in computations[i]['usedDataset']:
				yield computations[i]['@id']

	def update_used_by_computation(computation_ark, etc_node_id, removed_datasets):
		for computation in filter(lambda x: x.get("@id") == computation_ark, metadata):
			computation['usedDataset'] = [ etc_node_id ] + [ 
				elem for elem in computation['usedDataset'] if elem not in removed_datasets
				]

			print(computation)

	for idx, eg_computation in enumerate(computations):
		etc_used_dataset = set()

		if len(eg_computation['generated'])>3:

			# get an example generated
			example_generated = next(filter(lambda elem: elem.get("@id") == eg_computation['generated'][0], metadata))	

			etc_generated = {
				"@id": f"etc{idx}",
				"name": "etc.",
				"additionalType": "Dataset",
				"isPartOf": example_generated.get("isPartOf")
			}
			metadata.append(etc_generated)

			remove_generated = eg_computation['generated'][3:]
			new_generated = [etc_generated['@id']] + eg_computation['generated'][:3] 
			eg_computation['generated']	= new_generated


			
			for removal_elem in remove_generated:
				for elem in get_used_by_computation(removal_elem):
					etc_used_dataset.add(elem)

			for elem in etc_used_dataset:
				update_used_by_computation(elem, etc_generated['@id'], remove_generated)


			for node in remove_generated:	
				for i in range(len(metadata)):
					if metadata[i]['@id'] == node:
						del metadata[i]
						break

	# return altered metadata
	return metadata

In [39]:
example_metadata = reload_metadata()

In [40]:
trimmed_metadata = trim_metadata(example_metadata)

{'@id': 'ark:59852/30d1e880-f04e-4834-965c-36120b336526', '@context': {'@vocab': 'https://schema.org/', 'evi': 'https://w3id.org/EVI#'}, 'metadataType': 'https://w3id.org/EVI#Computation', 'url': None, 'name': 'IF Image Embedding', 'keywords': ['cm4ai', 'alpha', 'data release', 'b2ai'], 'description': 'Ideker Lab CM4AI 0.1 alpha MDA-MB-468 untreated chromatin Initial integration run IF Image Embedding IF microscopy images embedding fold1 run of cellmaps_image_embedding', 'runBy': 'cchuras', 'dateCreated': '2023-08-31', 'associatedPublication': None, 'additionalDocumentation': None, 'command': "{'outdir': '0.1pipeline/2.cm4ai_chromatin_mda-mb-468_untreated_imageembedfold1_initialrun0.1alpha', 'inputdir': '0.1pipeline/1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha', 'model_path': 'https://github.com/CellProfiling/densenet/releases/download/v0.1.0/external_crop512_focal_slov_hardlog_class_densenet121_dropout_i768_aug2_5folds_fold0_final.pth', 'name': None, 'organiza

In [41]:
set([elem.get("isPartOf", {}).get("@id") for elem in trimmed_metadata])

{'ark:59852/1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha',
 'ark:59852/2.cm4ai_chromatin_mda-mb-468_untreated_imageembedfold1_initialrun0.1alpha',
 'ark:59852/3.cm4ai_chromatin_mda-mb-468_untreated_coembedfold1_initialrun0.1alpha'}

In [8]:
rendered_graph = MermaidDiagram(metadata=trimmed_metadata, click=False).render_eg()

{'ark:59852/fd21fe5d-ca8c-463c-88a3-fe1035284819': 0, 'ark:59852/3c159191-564c-4f5c-a1a0-99d5813dd7f9': 1, 'ark:59852/892697d9-9f97-4916-abef-0d60bdb0bd6f': 2, 'ark:59852/3b49ed96-68a7-4584-9153-cebb6b864007': 3, 'ark:59852/b0af601d-06e9-4e45-a3d2-d90ba4ad2fd6': 4, 'ark:59852/0ea7cbc3-d2aa-4a77-b612-827b2824f110': 5, 'ark:59852/623f4e05-a897-4c3c-8f6a-bdd81f036dd5': 6, 'ark:59852/4d50ee5a-058e-482c-957b-3b32327944de': 7, 'ark:59852/c170412d-43c6-4d78-8cce-4322f40bb5d4': 8, 'ark:59852/0970ef42-25f1-4ad7-945a-423ad1a7f488': 9, 'ark:59852/30d1e880-f04e-4834-965c-36120b336526': 10, 'ark:59852/37e8c3e0-b2db-4519-9132-6baefc0cf24c': 11, 'ark:59852/fc22bbc3-1e2a-4e61-958a-277340141e4d': 12, 'ark:59852/e96e70b9-ca75-45e2-a7de-b6160b0717a1': 13, 'ark:59852/19e5a27b-26e1-4f53-9b87-d57ab2b7484a': 14, 'ark:59852/0fbf0814-37be-4f23-acee-784ef9cd353a': 15, 'ark:59852/44d12e3a-9965-4f7a-9749-c6a1e81d86f4': 16, 'ark:59852/1b0dc14d-bc23-401f-b539-6849a9ac0fff': 17, 'ark:59852/ebda79fa-275e-4e35-9f77-ae

In [9]:
print(rendered_graph)

flowchart TD
	id0([Simulated Samples file])
	id1([Simulated unique file])
	id2([cellmaps_imagedownloader output file])
	id3([cellmaps_imagedownloader output file])
	id4([B2AI_1_untreated_G1_R2_red.jpg red channel image])
	id5[cellmaps_imagedownloader]
	id6[\IF Image Loader/]
	id7([cellmaps_image_embedding output file])
	id8([Densenet model file])
	id9[cellmaps_image_embedding]
	id10[\IF Image Embedding/]
	id11([cellmaps_image_embedding output file])
	id12([Densenet model file])
	id13[cellmaps_image_embedding]
	id14[\IF Image Embedding/]
	id15([coembedding_emd.tsv coembedding output file])
	id16[cellmaps_coembedding]
	id17[\Merged Embedding/]
	id18([coembedding_emd.tsv coembedding output file])
	id19[cellmaps_coembedding]
	id20[\Merged Embedding/]
	id21([etc.])
	id5 -->|usedSoftware| id6
	id6 -->|usedDataset| id1
	id6 -->|usedDataset| id0
	id21 -->|generatedBy| id6
	id2 -->|generatedBy| id6
	id3 -->|generatedBy| id6
	id4 -->|generatedBy| id6
	id9 -->|usedSoftware| id10
	id10 -->|usedDat

In [281]:

def trim(self):

	# trim edges and nodes for high throughput graphs	
	for idx, node_id in enumerate(self.computation_nodes):

		# for this node id
		#used_dataset_edges = list(filter(
		#	lambda row: f"{node_id} -->|usedDataset|" in row,
		#	edges
		#))

		generated_edges = list(filter(
			lambda row: f" -->|generatedBy| {node_id}" in row,
			self.edges
		))

		# decide to trim nodes
		#if len(used_dataset_edges) > 5 :
			#nodes_to_remove = [ elem.strip(f"{node_id} -->|usedDataset| ") for elem in used_dataset_edges])


		if len(generated_edges) > 3:

			nodes.append(f"etc{idx}[etc.]")
			edges.append(f"{node_id} -->|generatedBy| etc{idx}")

			# get the ids of nodes to trim
			#nodes_to_remove = [ elem.strip(f"-->|generated| {node_id}") for elem in generated_edges] 

			nodes_to_remove = [] 

			for generated_edge in generated_edges:
				generated_node_id = generated_edge.strip(f"-->|generated| {node_id}")
				if generated_node_id != "":
					nodes_to_remove.append(generated_node_id)


			etc_used_dataset = set()

			# remove  nodes
			for removal_node in nodes_to_remove[3::]:
				for delete_row in filter(lambda row: removal_node in row, edges):
					if node_id not in delete_row:
						next_computation = delete_row.strip(f"{removal_node}").strip("-->|usedDataset|")
						etc_used_dataset.add(next_computation)

					edges.remove(delete_row)

				for delete_row in filter(lambda row: removal_node in row, nodes):
					#print("Node to be removed: " + delete_row)
					nodes.remove(delete_row)


			print(etc_used_dataset)
			# insert replacement summary nodes
			for used_by_computation in etc_used_dataset:
				edges.append(f"{used_by_computation} -->|usedDataset| etc{idx}")

		#edges.append()



In [None]:
# only on first node do i actually trim usedDataset
if len(eg_computation['usedDataset'])>3 and idx==0:
	etc_used_dataset = {
		"@id": f"etc_used{idx}",
		"name": "etc.",
		"additionalType": "Dataset"
	}

	metadata.append(etc_used_dataset)

	remove_used_dataset = eg_computation['usedDataset'][4::]
	eg_computation['usedDataset'] = eg_computation['usedDataset'][0:3] + [etc_used_dataset['@id']]
	print(eg_computation['usedDataset'])

	# remove nodes
	for node in remove_used_dataset:	
		for i in range(len(metadata)):
			if metadata[i]['@id'] == node:
				del metadata[i]
				break




In [344]:
computations[1]

{'@id': 'ark:59852/30d1e880-f04e-4834-965c-36120b336526',
 '@context': {'@vocab': 'https://schema.org/', 'evi': 'https://w3id.org/EVI#'},
 'metadataType': 'https://w3id.org/EVI#Computation',
 'url': None,
 'name': 'IF Image Embedding',
 'keywords': ['cm4ai', 'alpha', 'data release', 'b2ai'],
 'description': 'Ideker Lab CM4AI 0.1 alpha MDA-MB-468 untreated chromatin Initial integration run IF Image Embedding IF microscopy images embedding fold1 run of cellmaps_image_embedding',
 'runBy': 'cchuras',
 'dateCreated': '2023-08-31',
 'associatedPublication': None,
 'additionalDocumentation': None,
 'command': "{'outdir': '0.1pipeline/2.cm4ai_chromatin_mda-mb-468_untreated_imageembedfold1_initialrun0.1alpha', 'inputdir': '0.1pipeline/1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha', 'model_path': 'https://github.com/CellProfiling/densenet/releases/download/v0.1.0/external_crop512_focal_slov_hardlog_class_densenet121_dropout_i768_aug2_5folds_fold0_final.pth', 'name': None

In [326]:
metadata

[{'@id': 'ark:59852/fd21fe5d-ca8c-463c-88a3-fe1035284819',
  '@context': {'@vocab': 'https://schema.org/',
   'evi': 'https://w3id.org/EVI#'},
  'metadataType': 'https://w3id.org/EVI#Dataset',
  'url': None,
  'name': 'Simulated Samples file',
  'keywords': ['cm4ai', 'alpha', 'data release', 'b2ai'],
  'description': 'Samples file derived from input RO-CRATE tsv file',
  'author': 'NA',
  'datePublished': '2023-08-28',
  'version': '0.1 alpha',
  'associatedPublication': None,
  'additionalDocumentation': None,
  'format': 'tsv',
  'schema': {},
  'generatedBy': [],
  'derivedFrom': [],
  'usedBy': ['ark:59852/623f4e05-a897-4c3c-8f6a-bdd81f036dd5'],
  'contentUrl': 'file:///samplescopy.csv',
  '@type': 'https://w3id.org/EVI#Dataset',
  'additionalType': 'Dataset',
  'isPartOf': {'@id': 'ark:59852/1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha',
   'name': '1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha'}},
 {'@id': 'ark:59852/3c159191-564c-

In [269]:
etc_edges

{}

In [289]:
len(metadata)

12

In [262]:
computations[1]

{'@id': 'ark:59852/30d1e880-f04e-4834-965c-36120b336526',
 '@context': {'@vocab': 'https://schema.org/', 'evi': 'https://w3id.org/EVI#'},
 'metadataType': 'https://w3id.org/EVI#Computation',
 'url': None,
 'name': 'IF Image Embedding',
 'keywords': ['cm4ai', 'alpha', 'data release', 'b2ai'],
 'description': 'Ideker Lab CM4AI 0.1 alpha MDA-MB-468 untreated chromatin Initial integration run IF Image Embedding IF microscopy images embedding fold1 run of cellmaps_image_embedding',
 'runBy': 'cchuras',
 'dateCreated': '2023-08-31',
 'associatedPublication': None,
 'additionalDocumentation': None,
 'command': "{'outdir': '0.1pipeline/2.cm4ai_chromatin_mda-mb-468_untreated_imageembedfold1_initialrun0.1alpha', 'inputdir': '0.1pipeline/1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha', 'model_path': 'https://github.com/CellProfiling/densenet/releases/download/v0.1.0/external_crop512_focal_slov_hardlog_class_densenet121_dropout_i768_aug2_5folds_fold0_final.pth', 'name': None

In [264]:
metadata

[{'@id': 'ark:59852/fd21fe5d-ca8c-463c-88a3-fe1035284819',
  '@context': {'@vocab': 'https://schema.org/',
   'evi': 'https://w3id.org/EVI#'},
  'metadataType': 'https://w3id.org/EVI#Dataset',
  'url': None,
  'name': 'Simulated Samples file',
  'keywords': ['cm4ai', 'alpha', 'data release', 'b2ai'],
  'description': 'Samples file derived from input RO-CRATE tsv file',
  'author': 'NA',
  'datePublished': '2023-08-28',
  'version': '0.1 alpha',
  'associatedPublication': None,
  'additionalDocumentation': None,
  'format': 'tsv',
  'schema': {},
  'generatedBy': [],
  'derivedFrom': [],
  'usedBy': ['ark:59852/623f4e05-a897-4c3c-8f6a-bdd81f036dd5'],
  'contentUrl': 'file:///samplescopy.csv',
  '@type': 'https://w3id.org/EVI#Dataset',
  'additionalType': 'Dataset',
  'isPartOf': {'@id': 'ark:59852/1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha',
   'name': '1.cm4ai_chromatin_mda-mb-468_untreated_imageloader_initialrun0.1alpha'}},
 {'@id': 'ark:59852/3c159191-564c-

In [91]:
def get_used_by(ark: str):
	for i in range(len(metadata)):
		if metadata[i]['@id'] == ark:
			return metadata[i]['usedBy']

In [60]:
print(MermaidDiagram(metadata=test_eg_graph, click=False).render_eg())

{'id113 -->|usedDataset| id69', 'id113 -->|usedDataset| id6', 'id113 -->|usedDataset| id78', 'id113 -->|usedDataset| id29', 'id113 -->|usedDataset| id51', 'id113 -->|usedDataset| id55', 'id113 -->|usedDataset| id3', 'id110 -->|generated| id11', 'id113 -->|usedDataset| id72', 'id113 -->|usedDataset| id39', 'id113 -->|usedDataset| id43', 'id113 -->|usedDataset| id54', 'id113 -->|usedDataset| id89', 'id113 -->|usedDataset| id61', 'id113 -->|usedDataset| id5', 'id113 -->|usedDataset| id80', 'id113 -->|usedDataset| id7', 'id113 -->|usedDataset| id10', 'id113 -->|usedDataset| id4', 'id113 -->|usedDataset| id101', 'id113 -->|usedDataset| id81', 'id113 -->|usedDataset| id31', 'id113 -->|usedDataset| id63', 'id113 -->|usedDataset| id40', 'id113 -->|usedDataset| id91', 'id113 -->|usedDataset| id1', 'id113 -->|usedDataset| id24', 'id113 -->|usedDataset| id52', 'id113 -->|usedDataset| id9', 'id113 -->|usedDataset| id64', 'id113 -->|usedDataset| id82', 'id112 -->|usedSoftware| id113', 'id113 -->|us

In [28]:
test_eg_diag = MermaidDiagram(metadata=mermaid_test_eg)

NameError: name 'mermaid_test_eg' is not defined

In [10]:
print(test_eg_diag.render_eg())

NameError: name 'test_eg_diag' is not defined

In [11]:
apms_loader = requests.get(	
	'https://fairscape.pods.uvarc.io/ark:59852/1.cm4ai_chromatin_mda-mb-468_untreated_apmsloader_initialrun0.1alpha'
	).json()
apms_embed = requests.get(
	"https://fairscape.pods.uvarc.io/ark:59852/2.cm4ai_chromatin_mda-mb-468_untreated_apmsembed_initialrun0.1alpha"
	).json()

apms_loader['@graph'].pop(0)
apms_embed['@graph'].pop(0)

{'@id': 'ark:59852/336360d9-40ff-45c4-9c39-2c05a7481f43',
 'conformsTo': {'@id': 'https://w3id.org/ro/crate/1.1'},
 'about': {'@id': 'ark:59852/2.cm4ai_chromatin_mda-mb-468_untreated_apmsembed_initialrun0.1alpha'},
 'isPartOf': {'@id': 'ark:59852/2.cm4ai_chromatin_mda-mb-468_untreated_apmsembed_initialrun0.1alpha',
  'name': '2.cm4ai_chromatin_mda-mb-468_untreated_apmsembed_initialrun0.1alpha'},
 'contentUrl': 'file:///ro-crate-metadata.json',
 '@type': 'https://w3id.org/EVI#Dataset',
 'additionalType': 'Dataset',
 'keywords': ['cm4ai', 'alpha', 'data release', 'b2ai'],
 'usedBy': []}

{'@id': 'ark:59852/2022e152-7a09-42f5-9066-685d8797ed4c',
 'conformsTo': {'@id': 'https://w3id.org/ro/crate/1.1'},
 'about': {'@id': 'ark:59852/2.cm4ai_chromatin_mda-mb-468_untreated_imageembedfold1_initialrun0.1alpha'},
 'isPartOf': {'@id': 'ark:59852/2.cm4ai_chromatin_mda-mb-468_untreated_imageembedfold1_initialrun0.1alpha',
  'name': '2.cm4ai_chromatin_mda-mb-468_untreated_imageembedfold1_initialrun0.1alpha'},
 'contentUrl': 'file:///ro-crate-metadata.json',
 '@type': 'https://w3id.org/EVI#Dataset',
 'additionalType': 'Dataset',
 'keywords': ['cm4ai', 'alpha', 'data release', 'b2ai'],
 'usedBy': []}

In [21]:
#test_eg_graph = apms_loader['@graph'] + apms_embed['@graph']

In [8]:
list_crate_requests = requests.get("https://fairscape.pods.uvarc.io/rocrate")

NameError: name 'requests' is not defined

In [9]:
list_crate_requests.json()

NameError: name 'list_crate_requests' is not defined

In [7]:
print(json.dumps(example_metadata, indent=2))

{
  "@id": "ark:59852/1.cm4ai_chromatin_mda-mb-468_untreated_apmsloader_initialrun0.1alpha",
  "@context": {
    "EVI": "https://w3id.org/EVI#",
    "@vocab": "https://schema.org/"
  },
  "@type": "Dataset",
  "name": "Initial integration run",
  "description": "Ideker Lab CM4AI 0.1 alpha MDA-MB-468 untreated chromatin Initial integration run AP-MS Edgelist",
  "keywords": [
    "Ideker Lab",
    "CM4AI",
    "0.1 alpha",
    "MDA-MB-468",
    "untreated",
    "chromatin",
    "Initial integration run",
    "AP-MS edgelist download"
  ],
  "isPartOf": [
    {
      "@id": "ark:/Ideker_Lab",
      "@type": "Organization",
      "name": "Ideker Lab"
    },
    {
      "@id": "ark:/Ideker_Lab/CM4AI",
      "@type": "Project",
      "name": "CM4AI"
    }
  ],
  "@graph": [
    {
      "@id": "ark:59852/f8ceefbc-fa52-495a-ac5f-5ccaafb9bcad",
      "@context": {
        "@vocab": "https://schema.org/",
        "evi": "https://w3id.org/EVI#"
      },
      "metadataType": "https://w3id.org/EV

In [125]:

test_diagram = MermaidDiagram(metadata=example_metadata)

In [126]:
print(test_diagram.render_rocrate())

graph TD
	subgraph Initial integration run
	id0([cellmaps_ppidownloader output file])
	id1[cellmaps_ppidownloader]
	id2[\AP-MS Loader/]
	id3([cellmaps_ppidownloader input file])
	click id0 "https://fairscape.pods.uvarc.io/ark:59852/f8ceefbc-fa52-495a-ac5f-5ccaafb9bcad"
	click id1 "https://fairscape.pods.uvarc.io/ark:59852/6790af66-db21-4273-a0cf-34993a4dd7b8"
	click id2 "https://fairscape.pods.uvarc.io/ark:59852/7ab9bf7e-a52d-47b0-881e-c835150d5f6a"
	click id3 "https://fairscape.pods.uvarc.io/ark:59852/62cbb46e-1d3d-4f72-b7e9-c7c140aa9367"
	id2 -->|generated| id0
	id1 -->|usedBy| id2
	id3 -->|usedBy| id2
	end


In [1]:
import json

json.dumps(
	example_metadata,
	indent=2
)

NameError: name 'example_metadata' is not defined

Subgraphs within Flowchart

```
flowchart TB
    c1-->a2
    subgraph one
    a1-->a2
    end
    subgraph two
    b1-->b2
    end
    subgraph three
    c1-->c2
    end
    one --> two
    three --> two
    two --> c2
```

Binding Clicks to Nodes

```
flowchart LR
    A-->B
    B-->C
    C-->D
    click A callback "Tooltip for a callback"
    click B "https://www.github.com" "This is a tooltip for a link"
    click C call callback() "Tooltip for a callback"
    click D href "https://www.github.com" "This is a tooltip for a link"
```

## Generate prov for ROCrate

In [None]:
print(template.render(mermaid_diagram=graph_text))

In [None]:
## object oriented approach

class MermaidGraph():
	def __init__(self, graph_metadata):
		self.graph_metadata = graph_metadata

	def parse(self):
		pass

class MermaidNode():

	def __init__(self, node_id, node_metadata):
		self.node_id = node_id
		self.node_metadata = node_metadata

	def render_node(self):
		pass

	def render_edges(self):

		self.node_metadata
		pass
