### Initializations for python environment

In [1]:
# first time to create venv and install packages

# %conda create -n adrec-dev anaconda -y
# %conda activate adrec-dev -y

# %conda install -n adrec-dev pandas -y

# %conda install pip

### Load models and train/test logs
we assume that the trained models are trained for each train log and ready to be tested by test logs

In [2]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

models_folder = './models/'
logs_folder = './logs/'

def load_model(file_path):
    from joblib import load
    print(f'load_model: {file_path}')
    model = load(models_folder + file_path)
    return model

def load_logs(file_path):
    print(f'load_logs: {file_path}')
    data_df = pd.read_csv(logs_folder + file_path)
    return data_df

def classify_trace(running_trace, classifier_model):
    trace_class = classifier_model.predict([running_trace])[0]
    return trace_class

def recommend_adaptation_3(running_trace, classifier_model):

  recommended_adaptation_action = None

  trace_class = classify_trace(running_trace, classifier_model)

  recommended_adaptation_action = trace_class
  
  return recommended_adaptation_action

models_file_names = [
    'synthetic_logs_with_adaptations_20_percent.joblib',
    'synthetic_logs_with_adaptations_40_percent.joblib',
    'synthetic_logs_with_adaptations_66_percent.joblib',
    'bpic17_logs_with_interventions_20_percent.joblib',
    'bpic17_logs_with_interventions_40_percent.joblib',
    'bpic17_logs_with_interventions_66_percent.joblib',
]

train_logs_file_names = [
    'synthetic_logs_with_adaptations_20_percent_train.csv',
    'synthetic_logs_with_adaptations_40_percent_train.csv',
    'synthetic_logs_with_adaptations_66_percent_train.csv',
    'bpic17_logs_with_interventions_20_percent_train.csv',
    'bpic17_logs_with_interventions_40_percent_train.csv',
    'bpic17_logs_with_interventions_66_percent_train.csv',
]

test_logs_file_names = [
    'synthetic_logs_with_adaptations_20_percent_test.csv',
    'synthetic_logs_with_adaptations_40_percent_test.csv',
    'synthetic_logs_with_adaptations_66_percent_test.csv',
    'bpic17_logs_with_interventions_20_percent_test.csv',
    'bpic17_logs_with_interventions_40_percent_test.csv',
    'bpic17_logs_with_interventions_66_percent_test.csv',
]

output_file_names = [
    'synthetic_logs_with_adaptations_20_percent_output.csv',
    'synthetic_logs_with_adaptations_40_percent_output.csv',
    'synthetic_logs_with_adaptations_66_percent_output.csv',
    'bpic17_logs_with_interventions_20_percent_output.csv',
    'bpic17_logs_with_interventions_40_percent_output.csv',
    'bpic17_logs_with_interventions_66_percent_output.csv',
]

models = []
train_logs = []
test_logs = []

for index, model_file_name in enumerate(models_file_names):
    model = load_model(model_file_name)
    test_log = load_logs(test_logs_file_names[index])
    train_log = load_logs(train_logs_file_names[index])

    models.append(model)
    test_logs.append(test_log)
    train_logs.append(train_log)

# test with a model and a trace
print('\ntesting with a sample')
model = models[0]
trace = test_logs[0].iloc[2]
print('recommendation:', recommend_adaptation_3(trace[:-1], model))
print('actual class:', trace[-1])

load_model: synthetic_logs_with_adaptations_20_percent.joblib
load_logs: synthetic_logs_with_adaptations_20_percent_test.csv
load_logs: synthetic_logs_with_adaptations_20_percent_train.csv
load_model: synthetic_logs_with_adaptations_40_percent.joblib
load_logs: synthetic_logs_with_adaptations_40_percent_test.csv
load_logs: synthetic_logs_with_adaptations_40_percent_train.csv
load_model: synthetic_logs_with_adaptations_66_percent.joblib
load_logs: synthetic_logs_with_adaptations_66_percent_test.csv
load_logs: synthetic_logs_with_adaptations_66_percent_train.csv
load_model: bpic17_logs_with_interventions_20_percent.joblib
load_logs: bpic17_logs_with_interventions_20_percent_test.csv
load_logs: bpic17_logs_with_interventions_20_percent_train.csv
load_model: bpic17_logs_with_interventions_40_percent.joblib
load_logs: bpic17_logs_with_interventions_40_percent_test.csv
load_logs: bpic17_logs_with_interventions_40_percent_train.csv
load_model: bpic17_logs_with_interventions_66_percent.joblib


### Do recommendations with models and store in output log files similar to test logs

In [3]:
output_logs_folder = './output/'

def print_decision_path(clf, X_test):
    feature = clf.tree_.feature
    threshold = clf.tree_.threshold
    node_indicator = clf.decision_path(X_test)
    leaf_id = clf.apply(X_test)

    sample_id = 0
    # obtain ids of the nodes `sample_id` goes through, i.e., row `sample_id`
    node_index = node_indicator.indices[
        node_indicator.indptr[sample_id] : node_indicator.indptr[sample_id + 1]
    ]

    print("Rules used to predict sample {id}:\n".format(id=sample_id))
    for node_id in node_index:
        # continue to the next node if it is a leaf node
        if leaf_id[sample_id] == node_id:
            continue

        # check if value of the split feature for sample 0 is below threshold
        if X_test[sample_id, feature[node_id]] <= threshold[node_id]:
            threshold_sign = "<="
        else:
            threshold_sign = ">"

        print(
            "decision node {node} : (X_test[{sample}, {feature}] = {value}) "
            "{inequality} {threshold})".format(
                node=node_id,
                sample=sample_id,
                feature=feature[node_id],
                value=X_test[sample_id, feature[node_id]],
                inequality=threshold_sign,
                threshold=threshold[node_id],
            )
        )

def store_logs(dataframe, path):
    dataframe.to_csv(output_logs_folder + path, columns=dataframe.columns, index=False)




# initialize imputers
from sklearn.impute import KNNImputer

imputer_knn = KNNImputer(n_neighbors=2)


for index, test_logs in enumerate(test_logs):
    log_name = test_logs_file_names[index]
    print(f'logs: {log_name}')

    ml_model = models[index]
    train_logs_partition = train_logs[index].iloc[:,:-1] #train logs without class label

    results = {"adaptation_action_recommendation": [],
                "hit": []
    }
    hit_count = 0
    logs_length = len(test_logs)

    for row_index in range(logs_length):

        process_case = test_logs.iloc[row_index, :-1] #case without class label

        if index < 3: #synthetic_logs
            na_column_names = ['trace:cycle_time']
        else: #bpic17_logs_with_interventions
            na_column_names = ['duration']

        process_case_with_na = process_case.copy()
        for column_name in na_column_names:
            process_case_with_na.iloc[test_logs.columns.get_loc(column_name)] = np.nan

        process_case_with_na = process_case_with_na.to_frame().T #change format from Series to Dataframe
        # if index > 2:
        #     print(f'before interpolation: {process_case_with_na.values}')

        train_logs_partition_with_process_case = pd.concat([train_logs_partition, process_case_with_na])
        process_case_interpolated = imputer_knn.fit_transform(train_logs_partition_with_process_case)[-1]
        
        # if index > 2:
        #     print(f'after interpolation: {process_case_interpolated}')

        recommendation = recommend_adaptation_3(process_case_interpolated, ml_model)

        # print_decision_path(ml_model, [row[:-1]])

        adaptation = test_logs.iloc[[row_index], -1:].values[0]
        results["adaptation_action_recommendation"].append(recommendation)

        hit = 1 if recommendation == adaptation else 0
        hit_count += hit
        results["hit"].append(hit)
    
    output_logs = test_logs.join(pd.DataFrame(results))
    store_logs(output_logs, output_file_names[index])
    print('hit ratio:', hit_count / logs_length)



logs: synthetic_logs_with_adaptations_20_percent_test.csv
hit ratio: 0.53
logs: synthetic_logs_with_adaptations_40_percent_test.csv
hit ratio: 0.54
logs: synthetic_logs_with_adaptations_66_percent_test.csv
hit ratio: 0.5242424242424243
logs: bpic17_logs_with_interventions_20_percent_test.csv
hit ratio: 0.664756446991404
logs: bpic17_logs_with_interventions_40_percent_test.csv
hit ratio: 0.6859280483922318
logs: bpic17_logs_with_interventions_66_percent_test.csv
hit ratio: 0.5997491800115763
