In [1]:
from ppm_benchmark.core.benchmark_loader import BenchmarkLoader


loader = BenchmarkLoader()
benchmark = loader.load_from_config('../benchmark_configs/remote/mini/remote_outcome_classification.yaml', max_workers=2)

tasks = benchmark.get_tasks()
tasks

In [None]:
from ppm_benchmark.core.benchmark_loader import BenchmarkLoader
from sklearn.ensemble import RandomForestClassifier
from ppm_benchmark.utils.label_encoder import PPMLabelEncoder
from tqdm.notebook import tqdm

loader = BenchmarkLoader()
benchmark = loader.load_from_folder('outcome_classification')
tasks = benchmark.get_tasks()
results = dict()

for task_name in tqdm(tasks):
    task = benchmark.load_task(task_name)
    train = task.get_train_data()
    print(task.get_test_data()['case:concept:name'].nunique())
    test = task.get_test_data().drop('target', axis=1)
    
    string_cols = train.select_dtypes(include=['object']).columns
    encoder = PPMLabelEncoder()
    train[string_cols] = encoder.fit_transform(train[string_cols])
    string_cols = test.select_dtypes(include=['object']).columns
    test[string_cols] = encoder.transform_with_new_labels(test[string_cols])
    
    X = train.drop('target', axis=1)
    y = train['target']
    
    model = RandomForestClassifier()
    model.fit(X, y)
        
    probas = model.predict_proba(test)
    #decoded_labels = encoder.inverse_transform_column('target', model.classes_)
    
    result = []
    for row in probas:
        row_dict = {model.classes_[idx]: prob for idx, prob in enumerate(row)}
        result.append(row_dict)
    
    results[task.name] = result

In [None]:
import pickle

with open('outcome_classification/test_results.pkl', 'wb') as f:
    pickle.dump(results, f)

In [1]:
import pickle
from ppm_benchmark.core.benchmark_loader import BenchmarkLoader


loader = BenchmarkLoader()
benchmark = loader.load_from_folder('outcome_classification')
evaluator = benchmark.get_evaluator()

with open('outcome_classification/test_results.pkl', 'rb') as f:
    results = pickle.load(f)

In [2]:
for task_name, result in results.items():
    evaluator.add_predictions(task_name, result, 'RF')
    

In [None]:
evaluator.evaluate()

In [None]:
evaluator.plot_by_fraction_completed('Accuracy')

In [None]:
evaluator.plot_by_train_act_distance('Accuracy')

In [None]:
evaluator.plot_by_attr_drift_column('Accuracy')

In [None]:
evaluator.plot_lass_bar()