In [1]:
from ppm_benchmark.core.benchmark_loader import BenchmarkLoader


loader = BenchmarkLoader()
benchmark = loader.load_from_config('../benchmark_configs/remote/mini/remote_next_attribute_classification.yaml', max_workers=2)

tasks = benchmark.get_tasks()
tasks

2025-03-24 12:23:35,503 - ppm_benchmark.core.benchmark_loader - INFO - Completed task: bpi_2015_1_next_attribute
2025-03-24 12:23:35,612 - ppm_benchmark.core.benchmark_loader - INFO - Completed task: bpi_2015_3_next_attribute
2025-03-24 12:23:35,834 - ppm_benchmark.core.benchmark_loader - INFO - Tasks initialized successfully.
2025-03-24 12:23:35,835 - ppm_benchmark.core.benchmark_loader - INFO - Initializing evaluator...
2025-03-24 12:23:46,268 - ppm_benchmark.core.benchmark_loader - INFO - Evaluator initialized successfully.


['bpi_2015_1_next_attribute', 'bpi_2015_3_next_attribute']

In [2]:
from ppm_benchmark.core.benchmark_loader import BenchmarkLoader
from sklearn.ensemble import RandomForestClassifier
from ppm_benchmark.utils.label_encoder import PPMLabelEncoder
from tqdm.notebook import tqdm

loader = BenchmarkLoader()
benchmark = loader.load_from_folder('next_attribute_classification')
tasks = benchmark.get_tasks()
results = dict()

for task_name in tqdm(tasks):
    print(task_name)
    if 'bpi_2015_1' in task_name or 'bpi_2015_3' in task_name:    
        print(f'Training for {task_name}')
        task = benchmark.load_task(task_name)
        train = task.get_train_data()

        original_cols = train.columns
        train = train[train['target'].notna()]
        train = train.dropna(axis=1)
        dropped_cols = [col for col in original_cols if col not in train.columns]
        dropped_cols.append('target')
        test = task.get_test_data().drop(dropped_cols, axis=1)
        
        string_cols = train.select_dtypes(include=['object']).columns
        encoder = PPMLabelEncoder()
        train[string_cols] = encoder.fit_transform(train[string_cols])
        string_cols = test.select_dtypes(include=['object']).columns
        test[string_cols] = encoder.transform_with_new_labels(test[string_cols])
        
        X = train.drop('target', axis=1)
        y = train['target']
        
        model = RandomForestClassifier()
        model.fit(X, y)
            
        probas = model.predict_proba(test)
        decoded_labels = encoder.inverse_transform_column('target', model.classes_)
        
        result = []
        for row in probas:
            row_dict = {decoded_labels[idx]: prob for idx, prob in enumerate(row)}
            result.append(row_dict)
        
        results[task.name] = result

  0%|          | 0/2 [00:00<?, ?it/s]

bpi_2015_1_next_attribute
Training for bpi_2015_1_next_attribute
bpi_2015_3_next_attribute
Training for bpi_2015_3_next_attribute


  train_df = pd.read_csv(os.path.join(self.save_folder, 'train.csv'))


In [3]:
import pickle

with open('next_attribute_classification/test_results.pkl', 'wb') as f:
    pickle.dump(results, f)

In [1]:
import pickle
from ppm_benchmark.core.benchmark_loader import BenchmarkLoader


loader = BenchmarkLoader()
benchmark = loader.load_from_folder('next_attribute_classification')
evaluator = benchmark.get_evaluator()

with open('next_attribute_classification/test_results.pkl', 'rb') as f:
    results = pickle.load(f)

In [2]:
for task_name, result in results.items():
    evaluator.add_predictions(task_name, result, 'RF', None)
    

Length before: 9407
Length after: 9407
Length before: 9886
Length after: 9886


In [3]:
evaluator.evaluate()

Unnamed: 0,task_name,metric,RF,naive_baseline,train_branches
0,bpi_2015_1_next_attribute,Accuracy,0.256511,0.05379,0.11332
1,bpi_2015_1_next_attribute,PRS,0.289927,0.318381,0.405586
2,bpi_2015_1_next_attribute,Precision,0.262128,0.063744,0.12418
3,bpi_2015_1_next_attribute,Recall,0.256511,0.05379,0.11332
4,bpi_2015_1_next_attribute,F1Score,0.253231,0.048812,0.113304
5,bpi_2015_3_next_attribute,Accuracy,0.431722,0.068278,0.226684
6,bpi_2015_3_next_attribute,PRS,0.276827,0.176556,0.217014
7,bpi_2015_3_next_attribute,Precision,0.408997,0.071389,0.237363
8,bpi_2015_3_next_attribute,Recall,0.431722,0.068278,0.226684
9,bpi_2015_3_next_attribute,F1Score,0.414009,0.059137,0.220152


In [5]:
import warnings
warnings.filterwarnings("ignore")

evaluator.plot_by_train_distance('Accuracy', 'next_attribute_classification', save=True, use_pgf=False, single_row=True)

plot_df length before dropping nan: 19293
plot_df length after dropping nan: 19293
Generating plot for bpi_2015_1_next_attribute
Total original traces in test data: 9407
Number of samples included: 9407
Generating plot for bpi_2015_3_next_attribute
Total original traces in test data: 9886
Number of samples included: 9886


In [6]:
evaluator.plot_attr_drift('Accuracy', 'next_attribute_classification', save=True, use_pgf=False, single_row=True)

plot_df length before dropping nan: 19293
plot_df length after dropping nan: 19293


In [7]:
evaluator.plot_by_fraction_completed('Accuracy', 'next_attribute_classification', save=True, use_pgf=False, single_row=True)

plot_df length before dropping nan: 19293
plot_df length after dropping nan: 19293
