In [1]:
import sys
sys.path.append('..')
import datetime
import time
from collections import Counter
from sklearn.metrics import log_loss
import matplotlib.pyplot as plt
from kneed import KneeLocator
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, f1_score, accuracy_score, log_loss, roc_auc_score
import json
from IPython.display import display, HTML
display(HTML("<style>:root { --jp-notebook-max-width: 100% !important; }</style>"))

In [2]:
def cross_entropy_loss(model, x_test, y_test):
    
    probs = model.predict_proba(x_test)
    
    predicted_probs = []
    for i, true_label in enumerate(y_test):
        idx_arr = np.where(model.classes_ == true_label)[0]
        if len(idx_arr) == 0:
            predicted_probs.append(log_loss(y_true = [1,0], y_pred=[0,1])+1)
        else:
            col_index = idx_arr[0]
            true_label_one_hot = np.zeros_like(probs[i])
            true_label_one_hot[idx_arr] = 1
            predicted_probs.append(log_loss(y_true = true_label_one_hot, y_pred = probs[i]))
            
    return np.array(predicted_probs)

In [3]:
def normal_loss(model, x_test, y_test):
    
    probs = model.predict_proba(x_test)

    predicted_probs = []
    for i, true_label in enumerate(y_test):
        idx_arr = np.where(model.classes_ == true_label)[0]
        if len(idx_arr) == 0:
            predicted_probs.append(1.1)
        else:
            col_index = idx_arr[0]
            
            true_label_one_hot = np.zeros_like(probs[i])
            true_label_one_hot[idx_arr] = 1
            predicted_probs.append(1-probs[i][col_index])
            
    return np.array(predicted_probs)

In [4]:
def safe_transform_target(encoder, targets, unknown_value=-1):
    classes = set(encoder.classes_)
    transformed = []
    for t in targets:
        if t in classes:
            transformed.append(encoder.transform([t])[0])
        else:
            transformed.append(unknown_value)
    return np.array(transformed)

In [5]:
def get_clean_loss(normal_loss_value, cross_entropy_loss_value):
    normal_loss_dist = []
    cross_loss_dist = []
    for pos, prediction in  enumerate(normal_loss_value):
        if prediction != 1:
            cross_loss_dist.append(cross_entropy_loss_value[pos])
            normal_loss_dist.append(prediction)

    return normal_loss_dist, cross_loss_dist

In [6]:
def cleaning_cls_result(classification_result):
    
    for i in classification_result.keys():
        print(i, classification_result[i].keys())

        if '1' not in classification_result[i].keys():
            classification_result[i]['1'] = {'precision': 0, 'recall': 0, 'f1-score': 0, 'support': 0.0}
    return classification_result

In [7]:
def sample_with_min_anomalies(gt_labels, num_samples=10, min_anomalies=3, random_state=None):
    """
    Randomly sample `num_samples` indices from gt_labels (0/1 array),
    ensuring at least `min_anomalies` true-anomaly (1) indices are included.

    Parameters
    ----------
    gt_labels : array-like, shape (n_samples,)
        Ground-truth labels (0 = normal, 1 = anomaly).
    num_samples : int, default=10
        Total number of indices to sample.
    min_anomalies : int, default=3
        Minimum number of anomaly indices to include.
    random_state : int or None
        Seed for reproducibility.

    Returns
    -------
    selected_indices : ndarray, shape (<= num_samples,)
        Shuffled indices, containing at least `min_anomalies` anomalies
        (or as many as available if fewer exist).
    """
    gt_labels = np.asarray(gt_labels)
    if random_state is not None:
        np.random.seed(random_state)

    # locate anomaly vs normal indices
    anomaly_idx = np.where(gt_labels == 1)[0]
    normal_idx  = np.where(gt_labels == 0)[0]

    # determine how many anomalies we can pick
    n_anom = min(len(anomaly_idx), min_anomalies)
    # pick anomalies without replacement
    picked_anom = np.random.choice(anomaly_idx, n_anom, replace=False) if n_anom > 0 else np.array([], dtype=int)

    # fill the rest from normals
    n_normal = num_samples - n_anom
    n_normal = min(n_normal, len(normal_idx))
    picked_norm = np.random.choice(normal_idx, n_normal, replace=False) if n_normal > 0 else np.array([], dtype=int)

    # combine and shuffle
    selected = np.concatenate([picked_anom, picked_norm])
    np.random.shuffle(selected)

    return selected

In [8]:
def select_top_ce_samples(ce_loss, y_true, n_samples=20, anomaly_ratio=0.5):
    """
    Select the samples with the highest cross-entropy loss, balanced between anomalies and normals.

    Parameters:
    - ce_loss: array-like of CE loss values
    - y_true: array-like of binary labels (1=anomaly, 0=normal)
    - n_samples: total number of samples to select
    - anomaly_ratio: fraction of anomalies in the selected set (e.g., 0.5 for half anomalies)

    Returns:
    - selected_indices: numpy array of indices (into ce_loss / y_true) of the chosen samples
    """
    ce_loss = np.asarray(ce_loss)
    y_true = np.asarray(y_true)
    n_anom = int(n_samples * anomaly_ratio)
    n_norm = n_samples - n_anom

    # Identify indices for each class
    anom_idx = np.where(y_true == 1)[0]
    norm_idx = np.where(y_true == 0)[0]

    # Sort each group by descending loss and pick top-k
    top_anom = anom_idx[np.argsort(-ce_loss[anom_idx])[:n_anom]]
    top_norm = norm_idx[np.argsort(-ce_loss[norm_idx])[:n_norm]]

    # Combine and return
    selected_indices = np.concatenate([top_anom, top_norm])
    return selected_indices

In [9]:
def find_largest_gap(losses):
    y = sorted(losses, reverse=True)
    diffs = abs(np.diff(y))
    idx = np.argmax(diffs) + 1   # +1 because diffs[i] = y[i+1]-y[i]
    return idx, y[idx]

In [10]:
# ----------------------------
# Step 1: Read and Process the Data
# ----------------------------
dataset = '0.099_noise.csv'
df = pd.read_csv("../data/%s" % (dataset))
df = df.sort_values(by='Timestamp')
# Process the 'noise' column:
# - If NaN, assume Normal (0).
# - Otherwise, treat True/1/'True' as anomaly (1); everything else as Normal (0).
df['noise'] = df['noise'].fillna(0).apply(lambda x: 1 if (x == True or x == 1 or x == 'True' or x=='true') else 0)
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
print(dataset)
# Calculate the cutoff time (e.g., the median of all timestamps)
cutoff_time = df['Timestamp'].median()

anomaly_f1_list = []
anomaly_support_list = []
prefix_range = range(2, 16)

0.099_noise.csv


In [11]:
training_size = 0.8
print('Training window size: %s' % (training_size))
loss_prefix_dict =dict()
classification_result = dict()

all_x_detect_train = []
all_y_detect_train = []
all_x_detect_test  = dict()
all_y_detect_test  = dict()

for prefix in prefix_range:    
    # Extract per case:
    # - The first (prefix-1) events (activities) as features.
    # - The prefix-th event's activity as the target.
    # - The prefix-th event's noise flag as the ground truth anomaly.
    case_features = []
    case_targets = []
    ground_truth_anomaly = []

    for case_id, group in df.groupby('Case ID'):
        group = group.sort_index()  # assuming the order in the file is the event order
        if len(group) >= prefix:
            events = group['Activity'].values  # adjust 'Activity' if needed
            features = events[:prefix-1]
            target_activity = events[prefix-1]  # prefix-th event's activity
            noise_flag = group['noise'].iloc[prefix-1]

            case_features.append(features)
            case_targets.append(target_activity)
            ground_truth_anomaly.append(noise_flag)

    # Convert to numpy arrays
    case_features = np.array(case_features)
    case_targets = np.array(case_targets)
    ground_truth_anomaly = np.array(ground_truth_anomaly)
    print("Total cases with at least %s events:" % (prefix), case_features.shape[0])
    
    n_cases = case_features.shape[0]
    split_index = int(training_size * n_cases)
    test_index = split_index
    X_train = case_features[:split_index]
    X_test = case_features[test_index:]
    y_train = case_targets[:split_index]
    y_test = case_targets[test_index:]
    gt_anomaly_train = ground_truth_anomaly[:split_index]
    gt_anomaly_test = ground_truth_anomaly[test_index:]
    print("Training cases:", X_train.shape[0], "Test cases:", X_test.shape[0])

    # ----------------------------
    # Step 2: Encode the Features and Target
    # ----------------------------
    encoder_features = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
    
    X_encoded = encoder_features.fit_transform(case_features)
    print("Encoded feature shape:", X_encoded.shape)

    # IMPORTANT: Fit LabelEncoder on the full set of target activities (all cases)
    target_encoder = LabelEncoder()
    target_encoder.fit(case_targets)
    y_encoded = target_encoder.transform(case_targets)
    
    # Train a RandomForest classifier with the training set.
    nap_x_train = X_encoded[:split_index]
    nap_y_train = y_encoded[:split_index]
    nap_x_test = X_encoded[test_index:]
    nap_y_test = y_encoded[test_index:]
    rf_model  = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(nap_x_train, nap_y_train)

    ce_loss = cross_entropy_loss(model=rf_model, x_test = nap_x_train, y_test = nap_y_train)       
    idx, cutoff = find_largest_gap(ce_loss)
    
    predicted_anomaly = (ce_loss > cutoff).astype(int)
    
    expert_anomaly_indices = select_top_ce_samples(
        ce_loss, gt_anomaly_train,
        n_samples=20,
        anomaly_ratio=0.5,
    )
    expert_anomaly_indices = sample_with_min_anomalies(
            gt_labels=gt_anomaly_train,
            num_samples=20,
            min_anomalies=10,
            random_state=42
    )
    
    # Modify training set for anomaly detection classifier
    x_detect_train = []
    y_detect_train = []
    for pos, idx in enumerate(expert_anomaly_indices):
        new_training = dict()
        new_training['activity_labels'] = X_train[idx]
        new_training['target_labels'] = y_train[idx]
        new_training['probability'] = rf_model.predict_proba([nap_x_train[idx]]).tolist()[0]
        new_training['ce_loss'] = predicted_anomaly[idx]
        
        x_detect_train.append(new_training)
        y_detect_train.append(gt_anomaly_train[idx])

    all_x_detect_train.extend(x_detect_train)
    all_y_detect_train.extend(y_detect_train)
    
    ce_loss = cross_entropy_loss(model=rf_model, x_test = nap_x_test, y_test = nap_y_test)
    predicted_anomaly = (ce_loss > cutoff).astype(int)

    # Modify test set for anomaly detection classifier
    x_detect_test = []
    y_detect_test = []
    for idx in range(len(X_test)):
        new_test = dict()
        new_test['activity_labels'] = X_test[idx]
        new_test['target_labels'] = y_test[idx]
        new_test['probability'] = rf_model.predict_proba([nap_x_test[idx]]).tolist()[0]
        new_test['ce_loss'] = predicted_anomaly[idx]
        x_detect_test.append(new_test)
        y_detect_test.append(gt_anomaly_test[idx])
    all_x_detect_test[prefix]=x_detect_test
    all_y_detect_test[prefix]=y_detect_test
    

Training window size: 0.8
Total cases with at least 2 events: 5000
Training cases: 4000 Test cases: 1000
Encoded feature shape: (5000, 1)
Total cases with at least 3 events: 5000
Training cases: 4000 Test cases: 1000
Encoded feature shape: (5000, 19)
Total cases with at least 4 events: 5000
Training cases: 4000 Test cases: 1000
Encoded feature shape: (5000, 37)
Total cases with at least 5 events: 5000
Training cases: 4000 Test cases: 1000
Encoded feature shape: (5000, 55)
Total cases with at least 6 events: 5000
Training cases: 4000 Test cases: 1000
Encoded feature shape: (5000, 73)
Total cases with at least 7 events: 5000
Training cases: 4000 Test cases: 1000
Encoded feature shape: (5000, 91)
Total cases with at least 8 events: 5000
Training cases: 4000 Test cases: 1000
Encoded feature shape: (5000, 109)
Total cases with at least 9 events: 5000
Training cases: 4000 Test cases: 1000
Encoded feature shape: (5000, 127)
Total cases with at least 10 events: 5000
Training cases: 4000 Test c

In [14]:
detect_encoder_features = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
x_detect_train = pd.DataFrame()
x_detect_train = detect_encoder_features.fit_transform(pd.DataFrame([i['activity_labels'] for i in all_x_detect_train]))
x_detect_train = pd.DataFrame(x_detect_train)
x_detect_train.columns = ['a%s'%(i) for i in x_detect_train.columns.values]

detect_target_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
target_label = detect_target_encoder.fit_transform(pd.DataFrame([i['target_labels'] for i in all_x_detect_train], columns=['target_labels']))
target_label = pd.DataFrame(target_label)
x_detect_train = pd.concat([x_detect_train,target_label], axis=1)

max_prob_length = max([len(i['probability']) for i in all_x_detect_train])
for pos, i in enumerate(all_x_detect_train):
    while len(i['probability']) < max_prob_length:
        all_x_detect_train[pos]['probability'].append(0)
x_detect_train = pd.concat([x_detect_train, pd.DataFrame([i['probability'] for i in all_x_detect_train], 
                           columns = ['p%s'%(i) for i in range(len(all_x_detect_train[0]['probability']))])], axis=1)
print(x_detect_train.shape)
x_detect_train = pd.concat([x_detect_train, pd.DataFrame([i['ce_loss'] for i in all_x_detect_train], columns=['ce_loss'])], axis=1)

x_detect_train.columns = x_detect_train.columns.astype(str)
print(x_detect_train.shape)
# ----------------------------
# Step 5: Making training set for the anomaly detection classifier 
# ----------------------------
# anom_clf = RandomForestClassifier(n_estimators=200, random_state=42)
# anom_clf = SVC(kernel='rbf', probability=True, random_state=42)
anom_clf = XGBClassifier(objective='binary:logistic', n_estimators=5, learning_rate=0.01, eval_metric='logloss',
                                         random_state=42)
anom_clf.fit(x_detect_train, all_y_detect_train)

(280, 233)
(280, 234)


In [15]:
print(classification_report(y_true= all_y_detect_train, y_pred = anom_clf.predict(x_detect_train)))

              precision    recall  f1-score   support

           0       0.82      0.89      0.85       140
           1       0.88      0.80      0.84       140

    accuracy                           0.85       280
   macro avg       0.85      0.85      0.85       280
weighted avg       0.85      0.85      0.85       280



In [16]:
original_detect_train_df = pd.DataFrame([i['activity_labels'] for i in all_x_detect_train])
for prefix in all_x_detect_test.keys():
    x_detect_test = pd.DataFrame()
    test_df_prefix =[]
    for i in all_x_detect_test[prefix]:
        s = i['activity_labels'].tolist()
        while len(s) < len(original_detect_train_df.columns.values):
            s.append(None)
        test_df_prefix.append(s)
        
    test_df_prefix = pd.DataFrame(test_df_prefix)
    x_detect_test = detect_encoder_features.transform(test_df_prefix)
    x_detect_test = pd.DataFrame(x_detect_test)
    x_detect_test.columns = ['a%s'%(i) for i in x_detect_test.columns.values]

    target_label = detect_target_encoder.transform(pd.DataFrame([i['target_labels'] for i in all_x_detect_test[prefix]]))
    target_label = pd.DataFrame(target_label)
    x_detect_test = pd.concat([x_detect_test, target_label], axis=1)
    for pos, i in enumerate(all_x_detect_test[prefix]):
        while len(i['probability']) < max_prob_length:
            all_x_detect_test[prefix][pos]['probability'].append(0)

    x_detect_test = pd.concat([x_detect_test, pd.DataFrame([i['probability'] for i in all_x_detect_test[prefix]], 
             columns = ['p%s'%(i) for i in range(len(all_x_detect_train[0]['probability']))])], axis=1)
#     x_detect_test = pd.concat([x_detect_test, pd.DataFrame([i['probability'] for i in all_x_detect_test[2]], 
#                            columns = ['p%s'%(i) for i in range(len(all_x_detect_train[0]['probability']))])], axis=1)
    x_detect_test = pd.concat([x_detect_test, pd.DataFrame([i['ce_loss'] for i in all_x_detect_test[prefix]], columns=['ce_loss'])], axis=1)
    x_detect_test.columns = x_detect_test.columns.astype(str)
    print(x_detect_test.shape)

    predicted_anomaly = anom_clf.predict(x_detect_test)
    gt_anomaly_test = all_y_detect_test[prefix]
    
    # ----------------------------
    # Step 5: Evaluate the Anomaly Detection
    # ----------------------------
    print("\n--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix %s ---" % prefix)
    classification = classification_report(gt_anomaly_test, predicted_anomaly, output_dict=True)
    f1 = classification.get('1', {}).get('f1-score', 0)
    support = classification.get('1', {}).get('support', 0)
    classification_result[prefix] = classification
    print(f"Classification Report: F1-score = {f1}, Support = {support}")
    classification_result[prefix]['ROC AUC'] = roc_auc_score(gt_anomaly_test, anom_clf.predict_proba(x_detect_test)[:,1])
revised_cls_result = {}
for i in classification_result.keys():
    revised_cls_result[i] = dict()
    revised_cls_result[i]['Normal precision'] =classification_result[i]['0']['precision']
    revised_cls_result[i]['Normal recall'] =classification_result[i]['0']['recall']
    revised_cls_result[i]['Normal f1-score'] =classification_result[i]['0']['f1-score']
    revised_cls_result[i]['Normal support'] =classification_result[i]['0']['support']

    revised_cls_result[i]['Anomal precision'] =classification_result[i]['1']['precision']
    revised_cls_result[i]['Anomal recall'] =classification_result[i]['1']['recall']
    revised_cls_result[i]['Anomal f1-score'] =classification_result[i]['1']['f1-score']
    revised_cls_result[i]['Anomal support'] =classification_result[i]['1']['support']    

    revised_cls_result[i]['Macro precision'] =classification_result[i]['macro avg']['precision']   
    revised_cls_result[i]['Macro recall'] =classification_result[i]['macro avg']['recall']   
    revised_cls_result[i]['Macro f1-score'] =classification_result[i]['macro avg']['f1-score']   
    revised_cls_result[i]['ROC AUC'] =classification_result[i]['ROC AUC']   


result_df = pd.DataFrame.from_dict(revised_cls_result).T
result_df.index = result_df.index.set_names(['Prefix length'])
result_df = result_df.reset_index(drop=False)
# result_file_title = '../result/%s_cross_entropy_%s_anomal_thr_result.csv'%(dataset, anomaly_thr_method)
# print(result_file_title)
result_df



(1000, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 2 ---
Classification Report: F1-score = 0.9852216748768473, Support = 103.0
(1000, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 3 ---
Classification Report: F1-score = 0.88, Support = 92.0




(1000, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 4 ---
Classification Report: F1-score = 0.9473684210526315, Support = 102.0
(1000, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 5 ---
Classification Report: F1-score = 0.4595744680851064, Support = 111.0
(1000, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 6 ---
Classification Report: F1-score = 0.6442953020134228, Support = 99.0
(1000, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 7 ---
Classification Report: F1-score = 0.3277591973244147, Support = 116.0




(1000, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 8 ---
Classification Report: F1-score = 0.2571428571428571, Support = 104.0
(1000, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 9 ---
Classification Report: F1-score = 0.3973509933774834, Support = 91.0




(1000, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 10 ---
Classification Report: F1-score = 0.4110854503464203, Support = 93.0
(905, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 11 ---
Classification Report: F1-score = 0.27655310621242485, Support = 81.0




(803, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 12 ---
Classification Report: F1-score = 0.2903885480572597, Support = 71.0
(759, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 13 ---
Classification Report: F1-score = 0.391644908616188, Support = 76.0
(651, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 14 ---
Classification Report: F1-score = 0.34972677595628415, Support = 73.0
(510, 234)

--- Anomaly Detection (Dynamic Threshold) Classification Report for prefix 15 ---
Classification Report: F1-score = 0.2939297124600639, Support = 46.0




Unnamed: 0,Prefix length,Normal precision,Normal recall,Normal f1-score,Normal support,Anomal precision,Anomal recall,Anomal f1-score,Anomal support,Macro precision,Macro recall,Macro f1-score,ROC AUC
0,2,0.996667,1.0,0.998331,897.0,1.0,0.970874,0.985222,103.0,0.998333,0.985437,0.991776,0.985437
1,3,0.995516,0.977974,0.986667,908.0,0.814815,0.956522,0.88,92.0,0.905165,0.967248,0.933333,0.974263
2,4,0.996641,0.991091,0.993858,898.0,0.925234,0.970588,0.947368,102.0,0.960937,0.98084,0.970613,0.990234
3,5,0.99532,0.71766,0.833987,889.0,0.300836,0.972973,0.459574,111.0,0.648078,0.845317,0.646781,0.981916
4,6,0.996255,0.885683,0.93772,901.0,0.482412,0.969697,0.644295,99.0,0.739333,0.92769,0.791008,0.983133
5,7,0.965251,0.565611,0.713267,884.0,0.20332,0.844828,0.327759,116.0,0.584285,0.705219,0.520513,0.785131
6,8,0.941176,0.571429,0.711111,896.0,0.157895,0.692308,0.257143,104.0,0.549536,0.631868,0.484127,0.724964
7,9,0.998433,0.70077,0.823529,909.0,0.248619,0.989011,0.397351,91.0,0.623526,0.844891,0.61044,0.966676
8,10,0.993939,0.723264,0.837269,907.0,0.261765,0.956989,0.411085,93.0,0.627852,0.840126,0.624177,0.96575
9,11,0.975359,0.576456,0.724638,824.0,0.165072,0.851852,0.276553,81.0,0.570216,0.714154,0.500595,0.865801


In [20]:
print(roc_auc_score(gt_anomaly_test, anom_clf.predict_proba(x_detect_test)[:,1]))
print(gt_anomaly_test)
anom_clf.predict_proba(x_detect_test)

0.8859398425787106
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

array([[0.15      , 0.85      ],
       [0.43166667, 0.56833333],
       [0.473125  , 0.526875  ],
       ...,
       [0.755     , 0.245     ],
       [0.53928571, 0.46071429],
       [0.625     , 0.375     ]])