# Group Info

**Group:** GroupXY

**List of Members:**
- Member 1 (matriculation number)
- Member 2 (matriculation number)

# Setup

In [None]:
from datetime import datetime, timedelta, time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.objects.conversion.log import converter as log_converter

from sklearn.model_selection import ParameterGrid
import plotly.graph_objects as go
import time

def load_log_without_warnings(path) -> pd.DataFrame:
    return log_converter.to_data_frame.apply(xes_importer.apply(path))

In [None]:
log = load_log_without_warnings('udonya.xes')

# Process Overview

# Visual Analytics

# Process Discovery

# Conformance Checking

# Diving Into Some Details

# Technical Questions

## Conformance Checking

In the following, you find the implementation of the method described in the report.

Parts that require you to implement something a marked by **TODO**.

Your manipulation functions should satisfy the invariant that the structure of the input and output dataframe  have the same columns. More precisely, your output dataframe might have more columns, but it requires at least the columns contained in the input dataframe.


In [None]:
df = pd.read_csv('./input-logs/align-data.csv')

In [None]:
df = pm4py.format_dataframe(df)

In [None]:
tree = pm4py.read_ptml('./input-models/cc-technical-ground-truth-model.ptml')

In [None]:
net, im, fm = pm4py.convert_to_petri_net(tree)

## Manipulation Functions

In [None]:
def swap_events(df: pd.DataFrame, target_nbr_swaps: int) -> pd.DataFrame:
    """
    Perform target_nbr_swaps of event swaps in a 
    copy of the provided dataframe. 
    Swaps are executed within a case.
    """

    # Create a working compy
    df = df.copy()
    
    # TODO 
    # It is up to you how you sample events as long as it is reasonable.
    # The should be some randomness regarding the affected cases as well as the affected events.
    # Only swap events that have the same case id.

    return df


In [None]:
def drop_events(df: pd.DataFrame, nbr_drop: int) -> pd.DataFrame:
    """
    Drops the given number of events 
    (selected uniformly at random)
    """
    
    # TODO
    return df

## Grid Search

In [None]:
# Create a parameter grid
param_grid = {'frac_swaps': [0.0, 0.05, 0.1, 0.15], 'frac_drop': [0.0, 0.05, 0.1, 0.15]}
grid = ParameterGrid(param_grid)

In [None]:
l_res = []
nbr_events = len(df.index)
# Start timer - outer loop
start_time_outer_loop = time.time()
for config in grid:
    print(f'Creating log for {config}')
    # Calculate nbr of transformations
    nbr_swaps = int(config['frac_swaps'] * nbr_events)
    nbr_drop = int(config['frac_drop'] * nbr_events)
    # Swap first; dropping will invalidate the intra-case indexing
    df_tmp = swap_events(df, nbr_swaps)
    df_tmp = drop_events(df_tmp, nbr_drop)
    print(f'Running alignments for {config}')
    # Start timer
    start_time = time.time()
    
    # Calculate alignments
    alignments_diagnostics = pm4py.conformance_diagnostics_alignments(df_tmp, net, im, fm)
    assert alignments_diagnostics is not None
    # Sleep a random number of seconds (between 1 and 3)
    #time.sleep(randint(1,2))

    # End timer
    end_time = time.time()

    # Calculate elapsed time
    elapsed_time = end_time - start_time
    
    res = {**config, 'time': elapsed_time}
    
    l_res.append(res)
    print(config)
    
# End timer - outer loop
end_time_outer_loop = time.time()
print(f'Grid search took {end_time_outer_loop - start_time_outer_loop}s')

## Visualize Result as Heatmap

In [None]:
df_res = pd.DataFrame(l_res)

In [None]:
df_pivot = df_res.pivot(index='frac_drop', columns='frac_swaps', values='time')

In [None]:
fig = go.Figure(data=go.Heatmap(
    x=df_pivot.columns,
    y=df_pivot.index,
    z=df_pivot.values))
fig.update_layout(
    title="Alignment Time under Systematic Deviations",
    xaxis_title="Fraction of swapped events",
    yaxis_title="Fraction of dropped events",
    legend_title="Time in s",
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="RebeccaPurple"
    ),
    width=800,
    height=600
)
fig.show()

In [None]:
fig.write_image("submission-figures/alignment-times.svg")