In [136]:
import xml.etree.ElementTree as ET
import pandas as pd

Defining rules

In [137]:
log_df = pd.read_csv('log.csv', sep=';', usecols=['EventName','ID'])

# Function to parse the DCR XML and extract rules
def parse_dcr_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
log_df = log_df.groupby('ID')['EventName'].apply(list)

print(log_df)

ID
14a-026_0    [Fill out application, Approved - to board, Ch...
14a-027_0    [Fill out application, Approved - to board, Ch...
14a-031_2    [Fill out application, Screening reject, Appli...
14a-035_0    [Fill out application, Approved - to board, Ch...
14a-037_0    [Fill out application, Approved - to board, Ch...
                                   ...                        
14b-627_0    [Fill out application, Approved - to board, Ch...
14b-627_1    [Fill out application, Approved - to board, Ch...
14b-628_0    [Fill out application, Approved - to board, Ch...
14b-629_1    [Fill out application, Approved - to board, Ch...
14b-631_0    [Fill out application, Approved - to board, Ch...
Name: EventName, Length: 594, dtype: object


In [138]:
def check_conformance(log_df):
    
    
    rule_counts = {
        "Rule 1": {"Satisfied": 0, "Not Satisfied": 0},
        "Rule 2": {"Satisfied": 0, "Not Satisfied": 0},
        "Rule 3": {"Satisfied": 0, "Not Satisfied": 0},
        "Rule 4": {"Satisfied": 0, "Not Satisfied": 0},
        "Rule 5": {"Satisfied": 0, "Not Satisfied": 0},
        "Rule 6": {"Satisfied": 0, "Not Satisfied": 0},
        "Rule 7": {"Satisfied": 0, "Not Satisfied": 0},
        "Rule 8": {"Satisfied": 0, "Not Satisfied": 0},
    }

    
    

# Iterate through each event in the log and check rules    
    for events in log_df:
            for i, event in enumerate(events):
                # Rule 1: 'Fill out application' should always be the first event
                if i == 0:
                    if event == "Fill out application":
                        rule_counts["Rule 1"]["Satisfied"] +=1
                    else:
                        rule_counts["Rule 1"]["Not Satisfied"] += 1

                     
            # Rule 2: 'Lawyer Review' and 'Architect Review' should never occur together
            if "Lawyer Review" in events and "Architect Review" in events:
                rule_counts["Rule 2"]["Not Satisfied"] += 1
            else:
                rule_counts["Rule 2"]["Satisfied"] += 1

            # Rule 3: Reject should always eventually be followed by Applicant informed and Change phase to Abort. 
            if "Reject" in events:
                reject_index = events.index("Reject")
                # Check if either "Applicant informed" or "Change phase to Abort" occurs after "Reject"
                if "Applicant informed" not in events[reject_index] and "Change phase to Abort" not in events[reject_index]:
                    rule_counts["Rule 3"]["Not Satisfied"] += 1
            else:
                rule_counts["Rule 3"]["Satisfied"] += 1


            # Rule 4: First payment should only occur once, unless Undo payment is executed afterwards, in which case it may be repeated once.
            first_payment_count = events.count("First payment")
            if first_payment_count == 1 or (first_payment_count == 2 and "Undo payment" in events):
                rule_counts["Rule 4"]["Satisfied"] += 1
            elif first_payment_count == 0:
                rule_counts["Rule 4"]["Satisfied"] += 1  # Satisfies the rule by default
            elif first_payment_count not in [0, 1, 2]:
                rule_counts["Rule 4"]["Not Satisfied"] += 1
            else:
                rule_counts["Rule 4"]["Not Satisfied"] += 1
            
            # Rule 5: If Account number changed happens, then afterwards Approve changed account needs to 
            # be executed before one can execute First payment
            if "Account number changed" in events:
                account_index = events.index("Account number changed")
                if "Approve changed account" not in events[account_index:] or "First payment" not in events[account_index:]:
                    rule_counts["Rule 5"]["Not Satisfied"] += 1
            else:
                rule_counts["Rule 5"]["Satisfied"] += 1

            # Rule 6: Change Phase to Payout should always be followed by First payment
            if "Change Phase to Payout" in events:
                payout_index = events.index("Change Phase to Payout")
                if "First payment" not in events[payout_index:]:
                    rule_counts["Rule 6"]["Not Satisfied"] += 1
                else:
                    rule_counts["Rule 6"]["Satisfied"] += 1
            else:
                rule_counts["Rule 6"]["Satisfied"] += 1

            # Rule 7: Restrictions for Change Phase to End Report after Payout and First payment
            if "Change Phase to Payout" in events:
                payout_index = events.index("Change Phase to Payout")
                if "First payment" in events[payout_index:]:
                    if "Change Phase to End Report" in events[payout_index:]:
                        report_index = events.index("Change Phase to End Report")
                        if "First payment" not in events[report_index:]:
                            rule_counts["Rule 7"]["Not Satisfied"] += 1
                        else:
                            print("How did I get here?")
                    else:
                        rule_counts["Rule 7"]["Not Satisfied"] += 1
                elif "Change Phase to End Report" in events[payout_index:]: 
                    rule_counts["Rule 7"]["Not Satisfied"] += 1
                else:
                    rule_counts["Rule 7"]["Satisfied"] += 1
            else:
                rule_counts["Rule 7"]["Satisfied"] += 1

            # Rule 8: Execute Abandon and Change phase to Abandon
            if "Execute abandon" in events:
                abandon_index = events.index("Execute abandon")
                if "Change phase to abandon" not in events[abandon_index:]:
                    rule_counts["Rule 8"]["Not Satisfied"] += 1
            else:
                rule_counts["Rule 8"]["Satisfied"] += 1

    # Print or return the counts
    #print("This is what events mean:",events)
    #print("This is what event means:",event)
    return rule_counts

# conformance check
result = check_conformance(log_df)
print("Conformance Check Results:")
for rule, counts in result.items():
    total = counts['Satisfied'] + counts['Not Satisfied']
    print(f"{rule}: Satisfied {counts['Satisfied']}, Not Satisfied {counts['Not Satisfied']}, Total {total}")

Conformance Check Results:
Rule 1: Satisfied 594, Not Satisfied 0, Total 594
Rule 2: Satisfied 305, Not Satisfied 289, Total 594
Rule 3: Satisfied 369, Not Satisfied 225, Total 594
Rule 4: Satisfied 594, Not Satisfied 0, Total 594
Rule 5: Satisfied 591, Not Satisfied 3, Total 594
Rule 6: Satisfied 592, Not Satisfied 2, Total 594
Rule 7: Satisfied 576, Not Satisfied 18, Total 594
Rule 8: Satisfied 592, Not Satisfied 2, Total 594


Parsing and defining xml file

In [139]:
def parse_xml_file(file_path):
    # Parse the XML file
    tree = ET.parse(file_path)
    root = tree.getroot()

    #print("Root tag:", root.tag)
    for child in root.findall("labelMapping"):
        print("Child tag:", child.tag)

    mapping = dict()
    for activity in root.find("specification").find("resources").find("labelMappings").findall("labelMapping"):
        #print(activity.get("labelId"))
        #print(activity.get("eventId"))
        mapping[activity.get("labelId")] =activity.get("eventId") 
    print(mapping)
xml_file_path = "DCR-Task-2.xml"

parse_xml_file(xml_file_path)

{'Fill out application': 'Activity0', 'Lawyer review': 'Activity1', 'Architect Review': 'Activity2', 'Change phase to abandon': 'Activity3', 'Execute abandon': 'Activity4', 'Approve': 'Activity5', 'Reject': 'Activity6', 'Approve changed account': 'Activity7', 'Account number changed': 'Activity8', 'Change phase to payout': 'Activity9', 'First payment': 'Activity10', 'Undo Payment': 'Activity11', 'Change phase to aborted': 'Activity12', 'Applicant informed': 'Activity13', 'Change phase to end report': 'Activity14', 'Receive end report': 'Activity15', 'Change phase to completed': 'Activity16'}


Conformance check 

In [140]:

#print(log_df)
#parse_xml_file(xml_file_path)

pattern_counts = {
        "Pattern 1": {"Satisfied": 0, "Not Satisfied": 0,},
        "Pattern 2": {"Satisfied": 0, "Not Satisfied": 0,},
        "Pattern 3": {"Satisfied": 0, "Not Satisfied": 0,},
        "Pattern 4": {"Satisfied": 0, "Not Satisfied": 0,},
        "Pattern 5": {"Satisfied": 0, "Not Satisfied": 0,},
        "Pattern 6": {"Satisfied": 0, "Not Satisfied": 0,},
        "Pattern 7": {"Satisfied": 0, "Not Satisfied": 0,},
        "Pattern 8": {"Satisfied": 0, "Not Satisfied": 0,},
    }

def extract_label_names_from_xml(file_path):
    """
    Extract label names from the XML file located at the given file path.
    
    Args:
    - file_path (str): Path to the XML file.
    
    Returns:
    - dict: Dictionary with event IDs as keys and label names as values.
    """
    tree = ET.parse(file_path)
    root = tree.getroot()
    
    label_names = {}
    
    for label_mapping in root.findall('.//labelMapping'):
        event_id = label_mapping.get('eventId')
        label_id = label_mapping.get('labelId')
        label_names[event_id] = label_id
    
    return label_names

def print_relations(file_path, relation_type):
    label_names = extract_label_names_from_xml(file_path)
    
    tree = ET.parse(file_path)
    root = tree.getroot()

    relations = root.find("specification").find("constraints").find(relation_type).findall(relation_type[:-1])
    
    arrow_symbols = {
        "responses": "-->",
        "excludes": "--%",
        "includes": "--><>",
        "milestones": "--><>",
        "conditions": "-->*"
    }
    
    arrow = arrow_symbols.get(relation_type, "-->")
    
    for relation in relations:
        source_id = relation.get("sourceId")
        target_id = relation.get("targetId")
        
        source_name = label_names.get(source_id, source_id)
        target_name = label_names.get(target_id, target_id)
        
        print(f"{source_name} {arrow} {target_name}")

if __name__ == "__main__":
    xml_file_path = "DCR-Task-2.xml"
    
    relation_types = ["responses", "excludes", "includes", "milestones", "conditions"]
    
    for relation_type in relation_types:
        print(f'\n{relation_type.capitalize()} relations:')
        print_relations(xml_file_path, relation_type)






Responses relations:
Fill out application --> Execute abandon
Fill out application --> Lawyer review
Fill out application --> Architect Review
Lawyer review --> Approve
Lawyer review --> Reject
Architect Review --> Approve
Architect Review --> Reject
Reject --> Applicant informed
Reject --> Change phase to aborted
Approve --> Account number changed
Execute abandon --> Change phase to abandon
Receive end report --> Change phase to completed
Account number changed --> Approve changed account
Approve changed account --> Change phase to payout
Change phase to payout --> First payment
First payment --> Change phase to end report
Change phase to end report --> First payment
Change phase to end report --> Receive end report

Excludes relations:
Execute abandon --% Fill out application
Lawyer review --% Fill out application
Architect Review --% Fill out application
Lawyer review --% Architect Review
Architect Review --% Lawyer review
Reject --% Approve
Approve --% Reject
Execute abandon --% A