# Plot a Resource Model Entity-relationships diagram

Creates the Entity-relationships diagram (ERD) of EAMENA Heritage Places (HP) and color nodes according to the [mds-template-readonly.tsv](https://github.com/eamena-project/eamena-arches-dev/blob/main/dbs/database.eamena/data/reference_data/mds/mds-template-readonly.tsv) file (see: [GitHub documentation](https://github.com/eamena-project/eamena-arches-dev/tree/main/dbs/database.eamena/data/reference_data#templates))

Load GitHub repository, requirements and libraries

In [1]:
# download ACHP graph_mixer
!rm cultural-heritage -R
!git clone https://github.com/achp-project/cultural-heritage.git
!pip install  -q -r  cultural-heritage/graph-parser/requirements.txt
%cd /content/cultural-heritage/graph-parser/
import graph_mixer as gm

# download EAMENA erms
%cd /content/
!rm eamena-functions -R
!git clone https://github.com/eamena-project/eamena-functions.git
%cd /content/eamena-functions/mds
import mds

# librairies
import os
import urllib.request
import pandas as pd
import ipywidgets as widgets
from ipywidgets import interact, interactive
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import re
from IPython.display import HTML
from pyvis import network as net
import folium
import json
from google.colab import files

Cloning into 'cultural-heritage'...
remote: Enumerating objects: 3290, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (27/27), done.[K
remote: Total 3290 (delta 11), reused 7 (delta 3), pack-reused 3260[K
Receiving objects: 100% (3290/3290), 36.74 MiB | 12.99 MiB/s, done.
Resolving deltas: 100% (2397/2397), done.
/content/cultural-heritage/graph-parser
/content
Cloning into 'eamena-functions'...
remote: Enumerating objects: 287, done.[K
remote: Counting objects: 100% (77/77), done.[K
remote: Compressing objects: 100% (41/41), done.[K
remote: Total 287 (delta 40), reused 59 (delta 23), pack-reused 210[K
Receiving objects: 100% (287/287), 59.85 KiB | 5.98 MiB/s, done.
Resolving deltas: 100% (163/163), done.
/content/eamena-functions/mds


## Select

Select the EAMENA HP Resource Model (RM) within the listed RMs

List of available RMs

In [2]:
%cd /content/cultural-heritage/graph-parser/
!rm inputResourceModels -R
!mkdir -p inputResourceModels
remote_source_files_1 = dict()
# remote_source_files_1['EAMENA'] = 'https://raw.githubusercontent.com/eamena-project/eamena-arches-dev/main/dbs/ea.train/data/reference_data/rm/hp/Heritage%20Place.json'
remote_source_files_1['EAMENA'] = 'https://raw.githubusercontent.com/eamena-project/eamena-arches-dev/main/dbs/ea.train/data/reference_data/rm/hp/Site%20Feature%20Interpretation%20Type%20-%20branch.json'
##################################################
# Set these variables ###########################
##################################################
# "ontologyclass": "null",
# "parentproperty": "null",
##################################################
gm.rm_selected_one('EAMENA', remote_source_files_1)

/content/cultural-heritage/graph-parser
rm: cannot remove 'inputResourceModels': No such file or directory
EAMENA_Site%20Feature%20Interpretation%20Type%20-%20branch.json has been loaded into the folder inputResourceModels/


In [3]:
with open('/content/cultural-heritage/graph-parser/inputResourceModels/EAMENA_Site%20Feature%20Interpretation%20Type%20-%20branch.json', 'r') as file:
    data = json.load(file)
    print(json.dumps(data, indent=4))  # This prints the JSON data formatted nicely


{
    "graph": [
        {
            "author": " ",
            "cards": [
                {
                    "active": true,
                    "cardid": "ace89b5b-1967-4b00-95ba-76bc7b35d2f6",
                    "component_id": "f05e4d3a-53c1-11e8-b0ea-784f435179ea",
                    "config": null,
                    "constraints": [],
                    "cssclass": null,
                    "description": {
                        "en": ""
                    },
                    "graph_id": "bf24c55e-c18d-4d64-8c6e-932eb9c66eb5",
                    "helpenabled": false,
                    "helptext": {
                        "en": null
                    },
                    "helptitle": {
                        "en": null
                    },
                    "instructions": {
                        "en": null
                    },
                    "is_editable": true,
                    "name": {
                        "en": "Site Feature Interpr

## Read

Create subgraph and comparison dataframe csv files by running [graph-parser.py](https://github.com/achp-project/cultural-heritage/blob/main/graph-parser/graph_parser.py) and [graph-comparator.py](https://github.com/achp-project/cultural-heritage/blob/main/graph-parser/graph_comparator.py)

In [4]:
!python /content/cultural-heritage/graph-parser/graph_comparator.py -d -m list -o subgraphMetrics.csv inputResourceModels/*.json
subgraph_metrics = pd.read_csv('subgraphMetrics.csv')
subgraph_metrics

Unnamed: 0.1,Unnamed: 0,graph_name,graph_id,source_property,target_property,relation_type,source_id,target_id,source_name,target_name
0,0,EAMENA_Site%20Feature%20Interpretation%20Type%...,bf24c55e-c18d-4d64-8c6e-932eb9c66eb5,,,,362cd7de-12b6-11ef-a316-f3df69eec361,518ffb82-12b6-11ef-a316-f3df69eec361,Site Feature Interpretation Type,Sub-Typology
1,1,EAMENA_Site%20Feature%20Interpretation%20Type%...,bf24c55e-c18d-4d64-8c6e-932eb9c66eb5,,,,518ffb82-12b6-11ef-a316-f3df69eec361,ab87bf4a-12ba-11ef-a316-f3df69eec361,Sub-Typology,Kites
2,2,EAMENA_Site%20Feature%20Interpretation%20Type%...,bf24c55e-c18d-4d64-8c6e-932eb9c66eb5,,,,518ffb82-12b6-11ef-a316-f3df69eec361,7659a464-12c9-11ef-a316-f3df69eec361,Sub-Typology,Others


## Show

In [7]:
rm_project= 'EAMENA'
filename = rm_project + "-erd.html"
mds_template_df = mds.mds_template()
mds_template_df['Enhanced record minimum standard'] = mds_template_df['Enhanced record minimum standard'].str.contains(r'Yes', case = False, na = False, regex = True).astype(int)
mds_template_df = mds_template_df.loc[mds_template_df['Enhanced record minimum standard'] == 1]
in_erms = list(mds_template_df['uuid_sql'])
## previously with mds.mds_field_colors(), see ACHP ---------------------
# df_color = mds.mds_field_colors()
# G = gm.create_rm_graph(rm_project= rm_project, color_fields = df_color)
## ----------------------------------------------------------------------
def create_rm_graph(subgraph_metrics = 'subgraphMetrics.csv', rm_project = None, highlight_nodes = None, color_default = 'blue', color_highlight='red', color_fields = None, add_images = False):
	"""
	Table for one RM. Return a networkx graph. Optional: highlight nodes (fields) listed in a list (UUIDs)

	:param subgraph_metrics: a CSV file
	:param rm_project: the name of one RM (ex. EAMENA)
	:param highlight_nodes: optional. A list of UUIDs
	:param color_fields: optional. A dataframe of node UUIDs with their color
	:param add_images: if True, will modify the layout of the nodes to be embed images (i.e., leaves, values)

	:Example:
	>> # create graph
	>> rm_graph = create_rm_graph(rm_project = 'EAMENA')
	>> rm_graph
	>>
	>> # highlight nodes (fields), EAMENA example
	>> df_erms = erms_template()
	>> df_erms['Enhanced record minimum standard'] = df_erms['Enhanced record minimum standard'].str.contains(r'Yes', case = False, na = False, regex = True).astype(int)
	>> df_erms = df_erms.loc[df_erms['Enhanced record minimum standard'] == 1]
	>> in_erms = df_erms['uuid_sql'].tolist()
	>> rm_graph = create_rm_graph(rm_project = 'EAMENA', highlight_nodes = in_erms)
	>> rm_graph
	"""
	import pandas as pd
	import networkx as nx
	import re

	rm_graph = pd.read_csv(subgraph_metrics)
	rm_graph.fillna("NaN", inplace=True)
	rm_graph.rename(columns={'graph_name': 'G',
							'source_property': 'source_crm',
							'target_property': 'target_crm',
							'relation_type': 'property'}, inplace=True)
  ##############################################################################
  ## Only useful for branches ##################################################
  ##############################################################################
	rm_graph['property'] = "Pxx_XXX"
	rm_graph['source_crm'] = rm_graph['target_crm'] = "Eyy_YYY"
	##############################################################################
	##############################################################################
	col_order = ['G', 'source_crm', 'target_crm', 'property', 'source_id', 'target_id', 'source_name', 'target_name']
	rm_graph = rm_graph[col_order]
	rm_graph['G'] = rm_graph['G'].apply(lambda x: x.split('_')[0])
	rm_graph = rm_graph.loc[rm_graph['G'] == rm_project]
	# Create a directed graph from the DataFrame
	G = nx.from_pandas_edgelist(rm_graph, 'source_id', 'target_id', edge_attr=['property'], create_using=nx.DiGraph())
	# Populate node attributes
	for _, row in rm_graph.iterrows():
		source = row['source_id']
		target = row['target_id']
		source_attributes = {key[len('source_'):]: row[key] for key in rm_graph.columns if key.startswith('source_')}
		target_attributes = {key[len('target_'):]: row[key] for key in rm_graph.columns if key.startswith('target_')}
		# Update or add node attributes
		if G.has_node(source):
			G.nodes[source].update(source_attributes)
		if G.has_node(target):
			G.nodes[target].update(target_attributes)
  ## nodes
	for n in G.nodes(data=True):
		n[1]['label'] = n[1]['name'] # will show names
		n[1]['title'] = re.sub(r'_', ' ', n[1]['crm'])
	# TODO: if the has no incoming edges it has a semantic Datatype
	# if G.in_degree(n[1]) == 0:
	#   n[1]['shape'] = 'square'
	#   n[1]['color'] = 'grey'
	## node colors
	if color_fields is not None:
		for n in G.nodes(data=True):
			color_out = color_fields.loc[color_fields['uuid_sql'] == n[0], 'color']
			if len(color_out) > 0:
				color_out = color_out.iloc[0]
				n[1]['color'] = color_out
			else:
				n[1]['color'] = color_default
	# from highlight
	if type(highlight_nodes) == list:
		node_colors = {node: color_highlight if node in highlight_nodes else color_default for node in G.nodes}
		nx.set_node_attributes(G, values=node_colors, name='color')
	if add_images:
		for node in G.nodes:
			G.nodes[node]['size'] = 10
			G.nodes[node]['mass'] = 20
			G.nodes[node]['shape'] = 'circle'
			G.nodes[node]['image'] = 'None'
	## edges
	for e in G.edges(data=True):
		e[2]['title'] = re.sub(r'_', ' ', e[2]['property']) # replace _ by spaces
		# e[2]['title'] =  e[2]['label'] # popup labels: complete
		property_label = re.search(r'_(.*)', e[2]['property'])[1] # get text after P53_...
		property_label = re.sub(r'_', ' ', property_label) # replace _ by spaces
		e[2]['label'] = property_label # permanent labels (text)
		# print(e)
	return(G)

G = create_rm_graph(rm_project= rm_project, color_fields = mds_template_df)
hp_G = gm.plot_net_graph(G, filename = filename, width = "1200px", height = "1000px")
HTML(filename = filename)
## Erreurs
## There are 2 "Disturbance Event Timespan" subgraphs


## Export

In [None]:
files.download(filename)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>