In [17]:
"""Analyze Study (Classification Task) - Jupyter notebook script."""

# ---
# jupyter:
#   jupytext:
#     formats: ipynb,py:percent
#     text_representation:
#       extension: .py
#       format_name: percent
#       format_version: '1.3'
#       jupytext_version: 1.18.1
#   kernelspec:
#     display_name: octopus
#     language: python
#     name: python3
# ---

'Analyze Study (Classification Task) - Jupyter notebook script.'

## Imports

In [18]:
from octopus.predict import OctoPredict
from octopus.predict.notebook_utils import (
    show_selected_features,
    show_study_details,
    show_target_metric_performance,
    testset_performance_overview,
)

# Analyze Study (Classification Task)
- version 0.1
- 2025.01.09

## ToDo

- create predict directory
- create utility functions in separate file
- functionality:
  1. study overview: which workflow tasks, number of splits
  2. performance overview for certain given metric
  3. provide feature lists for each task
- aucpr -- baseline

## Input

In [19]:
# INPUT: Select study
study_directory = "./studies/example_octo_autogluon_parallel/"

## Show Study Details

In [20]:
# Call the utility function to display and validate study details
study_info = show_study_details(study_directory, expected_ml_type="classification")

# Extract key variables for use in subsequent cells
# path_study = study_info["path"]
# config = study_info["config"]
# ml_type = study_info["ml_type"]
# n_folds_outer = study_info["n_folds_outer"]
# workflow_tasks = study_info["workflow_tasks"]
# outersplit = study_info["outersplit_dirs"]
# expected_task_ids = study_info["expected_task_ids"]
# octo_workflow_lst = study_info["octo_workflow_tasks"]

Selected study path: studies/example_octo_autogluon_parallel

Validate study....
ML Type: classification
Found 5 outersplit directory/directories
Expected outersplit IDs: [0, 1, 2, 3, 4]
All expected outersplit directories found
Expected workflow task IDs: [0, 1]
Study has completed workflow tasks - all expected directories found

Information on workflow tasks in this study
Number of workflow tasks: 2
Task 0: octo
Task 1: autogluon
Octo workflow tasks: [0]


## Show Target Metric Performance for all  Tasks

In [21]:
# Display performance (target metric) for all workflow tasks
df_performance = show_target_metric_performance(study_info, details=False)

[1mWorkflow task: 0[0m
Available results keys: ['best']
Selected results key: best
            train_avg   dev_avg  test_avg  train_pool  dev_pool  test_pool
OuterSplit                                                                
0            0.901713  0.734636  0.790278    0.917433  0.732466   0.810185
1            0.896212  0.778854  0.788657    0.908163  0.769546   0.807870
2            0.903516  0.752681  0.743981    0.921314  0.747126   0.765046
3            0.914822  0.811106  0.660571    0.922212  0.796482   0.673143
4            0.938804  0.759212  0.851657    0.963449  0.733256   0.872000
Mean         0.911014  0.767298  0.767029    0.926514  0.755775   0.785649
[1mWorkflow task: 1[0m
Available results keys: ['autogluon']
Selected results key: autogluon
            score_val_dev  pred_time_val_dev  pred_time_val_marginal_dev  \
OuterSplit                                                                 
0                0.780720           0.114040                    0.11

## Show Selected Features Summary

In [22]:
# Display the number of selected features across outer splits and tasks
# Returns two tables: feature counts and feature frequency
# sort_task parameter sorts the frequency table by the specified task
sort_by_task = None
feature_numbers_table, feature_frequency_table = show_selected_features(df_performance, sort_task=sort_by_task)


NUMBER OF SELECTED FEATURES
Rows: OuterSplit | Columns: Task ID
Task            0       1
OuterSplit               
0           169.0  1000.0
1           131.0  1000.0
2           173.0  1000.0
3           160.0  1000.0
4           174.0  1000.0
Mean        161.4  1000.0


FEATURE FREQUENCY ACROSS OUTER SPLITS
Rows: Features | Columns: Task ID
Sorted by Task 0 frequency (highest first)
                0  1
informative_22  5  5
informative_2   5  5
informative_11  5  5
informative_23  5  5
noise_124       5  5
...            .. ..
redundant_83    0  5
redundant_84    0  5
redundant_85    0  5
redundant_86    0  5
informative_0   0  5

[1000 rows x 2 columns]



## Evaluate Model Performance on Test Dataset for a given Task


In [29]:
# load predictor object
task_predictor_octo = OctoPredict(study_path=study_info["path"], task_id=0, results_key="best")
task_predictor_ag = OctoPredict(study_path=study_info["path"], task_id=1, results_key="autogluon")


Loading available experiments ......
Outersplit0, task0 found.
Outersplit1, task0 found.
Outersplit2, task0 found.
Outersplit3, task0 found.
Outersplit4, task0 found.
5 experiment(s) out of 5 found.

Loading available experiments ......
Outersplit0, task1 found.
Outersplit1, task1 found.
Outersplit2, task1 found.
Outersplit3, task1 found.
Outersplit4, task1 found.
5 experiment(s) out of 5 found.


### Testset Performance overview for Selected Metrics

In [27]:
# Input: selected metrics for performance overviwe
metrics = ["AUCROC", "ACCBAL", "ACC", "F1", "AUCPR", "NEGBRIERSCORE"]
print("Selected metrics: ", metrics)

Selected metrics:  ['AUCROC', 'ACCBAL', 'ACC', 'F1', 'AUCPR', 'NEGBRIERSCORE']


In [None]:
testset_performance_octo = testset_performance_overview(predictor=task_predictor_octo, metrics=metrics)


Performance on test dataset (pooling)
              AUCROC    ACCBAL       ACC        F1     AUCPR  NEGBRIERSCORE
outersplit                                                                 
0           0.810185  0.729167  0.733333  0.680000  0.772617       0.218037
1           0.807870  0.763889  0.750000  0.727273  0.682217       0.221833
2           0.765046  0.680556  0.700000  0.608696  0.758529       0.210986
3           0.673143  0.551429  0.583333  0.418605  0.591645       0.231660
4           0.872000  0.757143  0.750000  0.727273  0.828682       0.201398
Mean        0.785649  0.696437  0.703333  0.632369  0.726738       0.216783


In [30]:
testset_performance_ag = testset_performance_overview(predictor=task_predictor_ag, metrics=metrics)

Performance on test dataset (pooling)
              AUCROC    ACCBAL       ACC        F1     AUCPR  NEGBRIERSCORE
outersplit                                                                 
0           0.892940  0.715278  0.766667  0.611111  0.849247       0.180301
1           0.866319  0.743056  0.783333  0.666667  0.788766       0.197309
2           0.851273  0.743056  0.783333  0.666667  0.825113       0.180651
3           0.774857  0.585714  0.650000  0.322581  0.725365       0.207583
4           0.908000  0.805714  0.833333  0.761905  0.902734       0.177163
Mean        0.858678  0.718563  0.763333  0.605786  0.818245       0.188601
