In [1]:
from ppm_benchmark.core.benchmark_loader import BenchmarkLoader


loader = BenchmarkLoader()
benchmark = loader.load_from_config('../benchmark_configs/remote/remote_next_attribute_regression.yaml', max_workers=2)

tasks = benchmark.get_tasks()
tasks

In [2]:
from ppm_benchmark.core.benchmark_loader import BenchmarkLoader
from sklearn.ensemble import RandomForestRegressor
from ppm_benchmark.utils.label_encoder import PPMLabelEncoder
from tqdm.notebook import tqdm

loader = BenchmarkLoader()
benchmark = loader.load_from_folder('next_attribute_regression')
tasks = benchmark.get_tasks()
results = dict()

for task_name in tqdm(tasks):
    task = benchmark.load_task(task_name)
    train = task.get_train_data()
    test = task.get_test_data().drop('target', axis=1)
    
    string_cols = train.select_dtypes(include=['object']).columns
    encoder = PPMLabelEncoder()
    train[string_cols] = encoder.fit_transform(train[string_cols])
    string_cols = test.select_dtypes(include=['object']).columns
    test[string_cols] = encoder.transform_with_new_labels(test[string_cols])
    
    X = train.drop('target', axis=1)
    y = train['target']
    
    model = RandomForestRegressor()
    model.fit(X, y)
        
    preds = model.predict(test)
    results[task.name] = preds

In [3]:
import pickle

with open('next_attribute_regression/test_results.pkl', 'wb') as f:
    pickle.dump(results, f)

In [1]:
import pickle
from ppm_benchmark.core.benchmark_loader import BenchmarkLoader


loader = BenchmarkLoader()
benchmark = loader.load_from_folder('next_attribute_regression')
evaluator = benchmark.get_evaluator()

with open('next_attribute_regression/test_results.pkl', 'rb') as f:
    results = pickle.load(f)

In [2]:
for task_name, result in results.items():
    evaluator.add_predictions(task_name, result, 'RF')
    

In [3]:
evaluator.evaluate()

In [4]:
evaluator.plot_by_fraction_completed('MAE')

In [5]:
evaluator.plot_by_train_distance('MAE')

In [6]:
evaluator.plot_attr_drift('MAE')