# Transforming the Meta-DataFrame into a Neo4j Event Log Class Diagram Graph
The following script demonstrates how the Meta-DataFrame can be modelled in Neo4j as an Event Log Class Diagram Graph of a given event log.

In [28]:
# import the libraries
import json
import pandas as pd
import numpy
import re #regular expressions for working with text data, especially extracting information from a piece of text

In [29]:
# Change the title names to circumvent certain Neo4j whitespace issues. The index_to_neo4j dictionary  maps keys will map the values from the Meta-DataFrame to the Neo4j graph later
index_to_neo4j = {
    'Title': 'title',
    'Data_type': 'data_type',
    'Number_of_entries': 'num_entries',
    'Number_of_unique_entries': 'num_unique_entries',
    'Number_of_duplicate_entries':'num_duplicate_entries',
    'Number_of_undefined_entries': 'num_undefined_entries',
    'Percentage_of_undefined_entries': 'percentage_undefined_entries',
}
index_to_neo4j

{'Title': 'title',
 'Data_type': 'data_type',
 'Number_of_entries': 'num_entries',
 'Number_of_unique_entries': 'num_unique_entries',
 'Number_of_duplicate_entries': 'num_duplicate_entries',
 'Number_of_undefined_entries': 'num_undefined_entries',
 'Percentage_of_undefined_entries': 'percentage_undefined_entries'}

In [30]:
#Define the Meta-DataFrame and generate the first command to clear any related event data in Neo4j
event_log_name = 'BPI_C_2019_FEL_synthetic_' #change accordingly for each EL
print('// Delete all nodes and their relationships:')
print(f'MATCH (node:{event_log_name}) DETACH DELETE node;')
print()

// Delete all nodes and their relationships:
MATCH (node:BPI_C_2019_FEL_synthetic_) DETACH DELETE node;



In [31]:
#Import the event log Meta-DataFrame based on event_log_name above
overview_table = pd.read_csv(f'analysis/overview_{event_log_name}.csv', index_col = 'Property') #use Property as the index_column
overview_table = overview_table.T #switch table layout
overview_table

Property,caseID,event time:timestamp,event concept:name,eventID,case Spend area text,case Vendor,case Item Type,case Item Category,case Goods Receipt,event org:resource
Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Title,Case ID,Timestamp,Activity,Event ID,case Spend area text,case Vendor,case Item Type,case Item Category,case Goods Receipt,event org:resource
Importance,Mandatory attribute,Mandatory attribute,Mandatory attribute,Mandatory attribute,Additional attribute,Additional attribute,Additional attribute,Additional attribute,Additional attribute,Additional attribute
Level,Case level,Event level,Event level,Event level,Case level,Case level,Case level,Case level,Case level,Event level
Data type,string,datetime64[ns],string,float64,string,string,string,string,string,string
Number of entries,1,1,1,1,1,1,1,1,1,1
Number of unique entries,1,1,1,1,1,1,1,1,1,1
List up to 100 unique entries to view the structure of the event attributes,['4507004931_00020'],[numpy.datetime64('2018-02-01T09:04:00.0000000...,"[""Vendor creates invoice', 'Vendor creates deb...",[65800000000000.0],"[""['Packaging Type Case', 'Supply Chain Logist...",['vendorID'],"[""Standard', 'Service', 'Consignment', 'Third-...","[""3-way match, invoice before GR', '3-way matc...","['TRUE, FALSE']",['Sales Excellence']
Length of the list of up to 100 unique entries,1,1,1,1,1,1,1,1,1,1
Number of duplicate entries,0,0,0,0,0,0,0,0,0,0


In [32]:
# Gather overview_columns
overview_columns = overview_table.columns

#View
overview_columns


Index(['caseID', 'event time:timestamp', 'event concept:name', 'eventID ',
       'case Spend area text', 'case Vendor', 'case Item Type',
       'case Item Category', 'case Goods Receipt', 'event org:resource'],
      dtype='object', name='Property')

In [33]:
overview_table = pd.read_csv(f'analysis/overview_{event_log_name}.csv', index_col='Property')
overview_table = overview_table.T
overview_columns = overview_table.columns

neo4j_lines = []
mandatory_columns = []
for overview_column in overview_columns:
  spalte = overview_table[overview_column]
  spalte_json_string = spalte.to_json(default_handler=str)
  neo4j_properties = json.loads(spalte_json_string)

  overview_level = neo4j_properties['Level']

  property_label_identifier = neo4j_properties['Importance']
  if property_label_identifier == 'Mandatory attribute':
    mandatory_columns.append(overview_column)

  properties_list_of_strings = [f'`{key}`: "{value}"' for (key, value) in neo4j_properties.items()]

  neo4j_create_command = ''
  neo4j_create_command += f'''CREATE (`{overview_column}`:`{overview_column}`:`{overview_level}`:`{property_label_identifier}`:{event_log_name} '''
  neo4j_create_command += f'''{{name: '{overview_column}', {', '.join(properties_list_of_strings)}}})'''

  neo4j_lines.append(neo4j_create_command)

neo4j_lines.append('CREATE (`caseID`) - [:CASE_TO_EVENT] -> (`eventID `)')
neo4j_lines.append('CREATE (`eventID `) - [:EVENT_RELATIONSHIP] -> (`event concept:name`)')
neo4j_lines.append('CREATE (`eventID `) - [:EVENT_RELATIONSHIP] -> (`event time:timestamp`)')

for overview_column in overview_columns:
  if overview_column not in mandatory_columns and 'event' in overview_column:
    neo4j_create_command = f'''CREATE (`eventID `) - [:EVENT_RELATIONSHIP] -> (`{overview_column}`)'''
    neo4j_lines.append(neo4j_create_command)
  if overview_column not in mandatory_columns and 'case' in overview_column:
    neo4j_create_command = f'''CREATE (`caseID`) - [:CASE_RELATIONSHIP] -> (`{overview_column}`)'''
    neo4j_lines.append(neo4j_create_command)

# To add the preceding sequence of Cypher CREATE commands.
neo4j_lines.append(';')

neo4j_command = '\n'.join(neo4j_lines)

neo4j_command = '// Create all nodes and their relationships\n' + neo4j_command

with open(f'analysis/overview_{event_log_name}.cypher', 'w') as overview_file:
  print(neo4j_command, file=overview_file)

<h1>Creating the Neo4j Commands</h1>
The author hardcoded the Neo4j commands as changes could be made easily - when adjustments have to be made to the Neo4jDB instance.

In [34]:
# create a two lists
neo4j_lines = []
mandatory_columns = []

for overview_column in overview_columns:
  spalte = overview_table[overview_column]
  spalte_json_string = spalte.to_json(default_handler=str)
  neo4j_properties = json.loads(spalte_json_string)

  overview_level = neo4j_properties['Level']

  property_label_identifier = neo4j_properties['Importance']
  if property_label_identifier == 'Mandatory attribute':
    mandatory_columns.append(overview_column)

  properties_list_of_strings = [f'`{key}`: "{value}"' for (key, value) in neo4j_properties.items()]

  neo4j_create_command = ''
  neo4j_create_command += f'''CREATE (`{overview_column}`:`{overview_column}`:`{overview_level}`:`{property_label_identifier}`:{event_log_name} '''
  neo4j_create_command += f'''{{name: '{overview_column}', {', '.join(properties_list_of_strings)}}})'''

  neo4j_lines.append(neo4j_create_command)

neo4j_lines.append('CREATE (`caseID`) - [:CASE_TO_EVENT] -> (`eventID `)')
neo4j_lines.append('CREATE (`eventID `) - [:EVENT_RELATIONSHIP] -> (`event concept:name`)')
neo4j_lines.append('CREATE (`eventID `) - [:EVENT_RELATIONSHIP] -> (`event time:timestamp`)')

for overview_column in overview_columns:
  if overview_column not in mandatory_columns and 'event' in overview_column:
    neo4j_create_command = f'''CREATE (`eventID `) - [:EVENT_RELATIONSHIP] -> (`{overview_column}`)'''
    neo4j_lines.append(neo4j_create_command)
  if overview_column not in mandatory_columns and 'case' in overview_column:
    neo4j_create_command = f'''CREATE (`caseID`) - [:CASE_RELATIONSHIP] -> (`{overview_column}`)'''
    neo4j_lines.append(neo4j_create_command)

# To add the preceding sequence of Cypher CREATE commands.
neo4j_lines.append(';')

neo4j_command = '\n'.join(neo4j_lines)

neo4j_command = '// Create all nodes and their relationships\n' + neo4j_command

with open(f'analysis/overview_{event_log_name}.cypher', 'w') as overview_file:
  print(neo4j_command, file=overview_file)

In [35]:
print('// Show all nodes and their relationships:')
print(f'MATCH (event_log:{event_log_name}) RETURN event_log;')
print()

// Show all nodes and their relationships:
MATCH (event_log:BPI_C_2019_FEL_synthetic_) RETURN event_log;



# End of Script 2 Transforming the Meta-DataFrame into a Neo4j Event Log Class Diagram Graph
Author: Kyle Smith <br>
Script: For Masterthesis <br>
University of Camerino & University of Applied Sciences Northwestern Switzerland