In [1]:
import os
import sys

sys.path.insert(0, '..')

import pyarrow.parquet as pq

from pysalient import io as io
from pysalient import visualisation as vis
from pysalient.evaluation import evaluation
from pysalient.evaluation import compare_models

In [2]:
sample_data_path = os.path.join("data", "anonymised_sample.parquet")
# count rows
table = pq.read_table(sample_data_path)
print(f"Number of rows: {table.num_rows}")
# print column names
print(table.column_names)
# Convert the 'true_label' column to a pandas Series
true_label_series = table["true_label"].to_pandas()

# Count the number of true labels (1)
true_count = (true_label_series == 1).sum()

print(f"Number of true labels (1): {true_count}")

# Convert the table to a pandas DataFrame for easier grouping
df = table.to_pandas()

# show table
print(df.head(5))


grouped = df.groupby("encounter_id")

# Count the number of unique groups (encounters)
num_groups = df["encounter_id"].nunique()
print(f"Number of unique encounter groups: {num_groups}")

# Calculate the sum of 'true_label' for each group
group_sums = grouped["true_label"].sum()

# Count how many groups have at least one true positive (sum > 0)
groups_with_positives = (group_sums > 0).sum()
print(
    f"Number of encounter groups with at least one true positive: {groups_with_positives}"
)

Number of rows: 21356
['encounter_id', 'event_timestamp', 'culture_event', 'suspected_infection', 'true_label', 'prediction_proba_1', 'prediction_proba_2']
Number of true labels (1): 789
  encounter_id  event_timestamp  culture_event  suspected_infection  \
0     0666505c              2.0            1.0                  1.0   
1     0666505c              2.0            1.0                  1.0   
2     0666505c              3.0            1.0                  1.0   
3     0666505c              3.0            1.0                  1.0   
4     0666505c              4.0            1.0                  1.0   

   true_label  prediction_proba_1  prediction_proba_2  
0           1            0.185334            0.531925  
1           1            0.185334            0.531925  
2           1            0.134316            0.360046  
3           1            0.134316            0.360046  
4           1            0.118005            0.167655  
Number of unique encounter groups: 100
Number of e

In [3]:
sample_data_path = os.path.join("data", "anonymised_sample.parquet")
# count rows
table = pq.read_table(sample_data_path)
print(f"Number of rows: {table.num_rows}")
# print column names
print(table.column_names)
# Convert the 'true_label' column to a pandas Series
true_label_series = table["true_label"].to_pandas()

# Count the number of true labels (1)
true_count = (true_label_series == 1).sum()

print(f"Number of true labels (1): {true_count}")

# Convert the table to a pandas DataFrame for easier grouping
df = table.to_pandas()

# show table
print(df.head(5))


grouped = df.groupby("encounter_id")

# Count the number of unique groups (encounters)
num_groups = df["encounter_id"].nunique()
print(f"Number of unique encounter groups: {num_groups}")

# Calculate the sum of 'true_label' for each group
group_sums = grouped["true_label"].sum()

# Count how many groups have at least one true positive (sum > 0)
groups_with_positives = (group_sums > 0).sum()
print(
    f"Number of encounter groups with at least one true positive: {groups_with_positives}"
)

Number of rows: 21356
['encounter_id', 'event_timestamp', 'culture_event', 'suspected_infection', 'true_label', 'prediction_proba_1', 'prediction_proba_2']
Number of true labels (1): 789
  encounter_id  event_timestamp  culture_event  suspected_infection  \
0     0666505c              2.0            1.0                  1.0   
1     0666505c              2.0            1.0                  1.0   
2     0666505c              3.0            1.0                  1.0   
3     0666505c              3.0            1.0                  1.0   
4     0666505c              4.0            1.0                  1.0   

   true_label  prediction_proba_1  prediction_proba_2  
0           1            0.185334            0.531925  
1           1            0.185334            0.531925  
2           1            0.134316            0.360046  
3           1            0.134316            0.360046  
4           1            0.118005            0.167655  
Number of unique encounter groups: 100
Number of e

In [4]:
# Define the path relative to the project root
# Assuming the notebook is run from the project root or examples/ directory
sample_data_path = os.path.join("data", "anonymised_sample.parquet")

assigned_table_events = None

if os.path.exists(sample_data_path):
    # Use the actual column names identified during inspection directly
    # Ensure these names actually exist based on the printout above!
    model_1_evaluation = io.load_evaluation_data(
        source=sample_data_path,
        y_proba_col="prediction_proba_1",
        y_label_col="true_label",
        aggregation_cols=None,
        timeseries_col="event_timestamp",
        # We don't provide task_col or model_col from the source
        # assign_task_name="AKI",  # Assign this name to the new 'task' column
        # assign_model_name="LogRegress",  # Assign this name to the new 'model' column
    )

    print("\nSuccessfully loaded data with assigned names (Model 1):")
    print(model_1_evaluation.schema)
    print(f"\nNumber of rows: {model_1_evaluation.num_rows}")

    # Display first few rows to verify new columns
    print("\nFirst 5 rows (with added 'task' and 'model' columns):")
    print(model_1_evaluation.slice(0, 5).to_pandas())
    model_2_evaluation = io.load_evaluation_data(
        source=sample_data_path,
        y_proba_col="prediction_proba_2",
        y_label_col="true_label",
        aggregation_cols=None,
        timeseries_col="event_timestamp",
        # We don't provide task_col or model_col from the source
        # assign_task_name="AKI",  # Assign this name to the new 'task' column
        # assign_model_name="LogRegress",  # Assign this name to the new 'model' column
    )
    print("\nSuccessfully loaded data with assigned names (Model 1):")
    print(model_2_evaluation.schema)
    print(f"\nNumber of rows: {model_2_evaluation.num_rows}")

    # Display first few rows to verify new columns
    print("\nFirst 5 rows (with added 'task' and 'model' columns):")
    print(model_2_evaluation.slice(0, 5).to_pandas())

else:
    print(
        f"Skipping data loading as file was not found: {sample_data_path}"
    )



Successfully loaded data with assigned names (Model 1):
encounter_id: string
event_timestamp: double
culture_event: double
suspected_infection: double
true_label: int64
prediction_proba_1: float
prediction_proba_2: float
-- schema metadata --
pysalient.io.y_proba_col: 'prediction_proba_1'
pysalient.io.y_label_col: 'true_label'
pysalient.io.timeseries_col: 'event_timestamp'
pysalient.io.aggregation_cols: '[]'

Number of rows: 21356

First 5 rows (with added 'task' and 'model' columns):
  encounter_id  event_timestamp  culture_event  suspected_infection  \
0     0666505c              2.0            1.0                  1.0   
1     0666505c              2.0            1.0                  1.0   
2     0666505c              3.0            1.0                  1.0   
3     0666505c              3.0            1.0                  1.0   
4     0666505c              4.0            1.0                  1.0   

   true_label  prediction_proba_1  prediction_proba_2  
0           1            0

In [5]:
# Define evaluation parameters
eval_modelid_1= "LogRegress_01"  # Use a generic ID as model wasn't assigned here
eval_modelid_2= "LightGBM_01"  # Use a generic ID as model wasn't assigned here
eval_filter = "ExampleFilterDummy"  # Describe the data subset
eval_thresholds = (0.1, 0.9, 0.1)  # Range: 0.1, 0.2, ..., 0.9
# eval_thresholds=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] # Example: List of thresholds

# Run the evaluation
evaluation_results_1, bootstrap1 = evaluation(
    data=model_1_evaluation,  # Use the table loaded with col_map
    modelid=eval_modelid_1,
    filter_desc=eval_filter,
    thresholds=eval_thresholds,
    decimal_places=3,  # Control rounding of output floats # check that -1 is no rounding.
    calculate_au_ci=True,  # Enable AU CI calculation (uses bootstrap)
    calculate_threshold_ci=True,
    threshold_ci_method="bootstrap",  # Method for threshold CIs (ignored if calculate_threshold_ci=False)
    ci_alpha=0.05,  # 95% CI
    bootstrap_seed=42,  # For reproducible CIs
    bootstrap_rounds=1000,  # Fewer rounds for notebook speed
    force_threshold_zero=True,
    verbosity=1,
    return_bootstrap_samples=True,
)
evaluation_results_2, bootstrap2 = evaluation(
    data=model_2_evaluation,  # Use the table loaded with col_map
    modelid=eval_modelid_2,
    filter_desc=eval_filter,
    thresholds=eval_thresholds,
    decimal_places=3,  # Control rounding of output floats # check that -1 is no rounding.
    calculate_au_ci=True,  # Enable AU CI calculation (uses bootstrap)
    calculate_threshold_ci=True,
    threshold_ci_method="bootstrap",  # Method for threshold CIs (ignored if calculate_threshold_ci=False)
    ci_alpha=0.05,  # 95% CI
    bootstrap_seed=42,  # For reproducible CIs
    bootstrap_rounds=1000,  # Fewer rounds for notebook speed
    force_threshold_zero=True,
    verbosity=1,
    return_bootstrap_samples=True,
)

Metric func '_calculate_npv_boot': All bootstrap rounds failed calculation; cannot compute CI. This may indicate an issue with the metric calculation or the bootstrap sample characteristics.
Metric func '_calculate_ppv_boot': All bootstrap rounds failed calculation; cannot compute CI. This may indicate an issue with the metric calculation or the bootstrap sample characteristics.
Metric func '_calculate_ppv_boot': All bootstrap rounds failed calculation; cannot compute CI. This may indicate an issue with the metric calculation or the bootstrap sample characteristics.
Metric func '_calculate_ppv_boot': All bootstrap rounds failed calculation; cannot compute CI. This may indicate an issue with the metric calculation or the bootstrap sample characteristics.
Metric func '_calculate_ppv_boot': All bootstrap rounds failed calculation; cannot compute CI. This may indicate an issue with the metric calculation or the bootstrap sample characteristics.
Metric func '_calculate_ppv_boot': All bootst

In [7]:
model_compare = compare_models(
    evaluation_results=[evaluation_results_1,evaluation_results_2],
    model_labels=['lg','lgm'],
    include_metrics=['AUROC','AUPRC'],
    calculate_statistical_significance=True,  
    bootstrap_samples=[bootstrap1, bootstrap2],  
    significance_alpha=0.05,
    n_permutations=10000,
    )

In [None]:
# Visualisation
styled_results = vis.format_evaluation_table(
    evaluation_results_1, decimal_places=3, ci_column=False
)
display(styled_results)

In [None]:
# Visualisation
styled_results = vis.format_evaluation_table(
    evaluation_results_2, decimal_places=3, ci_column=False
)
display(styled_results)

In [8]:
styled_results = vis.format_evaluation_table(
    model_compare, decimal_places=3, ci_column=False
)
display(styled_results)

Unnamed: 0,threshold,model,metric,value,lower_ci,upper_ci,p_value
0,0.0,lg,AUROC,0.706,0.682,0.727,0.0
1,0.1,lg,AUROC,0.706,0.682,0.727,0.0
2,0.2,lg,AUROC,0.706,0.682,0.727,0.0
3,0.3,lg,AUROC,0.706,0.682,0.727,0.0
4,0.4,lg,AUROC,0.706,0.682,0.727,0.0
5,0.5,lg,AUROC,0.706,0.682,0.727,0.0
6,0.6,lg,AUROC,0.706,0.682,0.727,0.0
7,0.7,lg,AUROC,0.706,0.682,0.727,0.0
8,0.8,lg,AUROC,0.706,0.682,0.727,0.0
9,0.9,lg,AUROC,0.706,0.682,0.727,0.0
