## Isolation Forest

This notebook trains and evaluates the Isolation Forest.

In [2]:
%reload_ext autoreload
%autoreload 2

import sys
sys.path.append('../src_jobs/')

In [3]:
import pickle
import warnings
from datetime import datetime
from itertools import repeat
import numpy as np
import torch
from artifact import Saw_centered
from data import CenteredArtifactDataset, CachedArtifactDataset
from pytorch_lightning.loggers import MLFlowLogger
from torch.utils.data import DataLoader
from sklearn.ensemble import IsolationForest


In [4]:
# stop warnings
torch.set_float32_matmul_precision("high")
warnings.filterwarnings("ignore", ".*does not have many workers.*")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
np.random.seed(42)

In [5]:
n_estimators = [1,5,10,50,100]
max_samples = 'auto'
contamination = 0.4
train_size = 20000
# width of window
width = 512
artifact = Saw_centered()
batch_size = 32  # 'values': [32, 64, 128]

train_datasets = ["CinCECGTorso"]
input_path = "../data/processed"

In [6]:
def load_series(names: list[str], split: str, path: str):
    series: list[np.ndarray] = list()
    counts: list[float] = list()
    for name in names:
        with open(f"{path}/{name}_{split}.pickle", "rb") as f:
            raw = [a for a in pickle.load(f) if len(a) > width]
            series.extend(np.array(a).astype(np.float32) for a in raw)
            counts.extend(repeat(1 / len(raw), len(raw)))
    counts = np.array(counts)
    return series, np.divide(counts, np.sum(counts))

In [7]:
train_data, train_weights = load_series(train_datasets, "TRAIN", str(input_path))
train_dataset = CenteredArtifactDataset(
    train_data,
    width=width,
    padding=64,
    artifact=artifact,
    weight=train_weights,
    p_has_artifact=contamination # proportion of artifacts in manually labelled data
)
train_loader = DataLoader(train_dataset, batch_size=batch_size)
# sanity check
batch = next(iter(train_loader))
print(batch["data"] + batch["artifact"])
X_train = batch["data"] + batch["artifact"]

tensor([[-1.1612, -1.1951, -1.2188,  ...,  0.6996,  0.6773,  0.6464],
        [ 0.5270,  0.5339,  0.5373,  ...,  0.1900,  0.1947,  0.1938],
        [-0.5745, -0.5766, -0.5679,  ..., -0.3190, -0.3128, -0.3073],
        ...,
        [ 1.1745,  1.1738,  1.1746,  ..., -1.3591, -1.3848, -1.3791],
        [ 0.7430,  0.7773,  0.7390,  ..., -0.2473, -0.1665, -0.1506],
        [ 1.2499,  1.2414,  1.2414,  ..., -0.1198, -0.1283, -0.1368]])


In [8]:
print("Creating Dataset")
for step in range(train_size-1):
    batch = next(iter(train_loader))
    X_train = torch.concat((X_train, batch["data"] + batch["artifact"]))

Creating Dataset


In [51]:
from pathlib import Path

test_file = Path(f"../data/val_IF_CinCECGT_Cont{contamination}_{width}.pkl")

# validation
if not test_file.exists():
    test_data, test_weights = load_series(train_datasets, split="VAL", path=input_path)
    test_gen = CenteredArtifactDataset(test_data,
                              width=width,
                              padding=64,
                              artifact=artifact,
                              weight=test_weights,
                              p_has_artifact=contamination)
    test = CachedArtifactDataset.generate(test_gen,
                                         n=2048,
                                         to=test_file)
else:
    test = CachedArtifactDataset(file=test_file)
    
test_loader = DataLoader(test, batch_size=batch_size)

In [52]:
it = iter(test_loader)
X_test = torch.Tensor()
Y_test = torch.Tensor()
print("Creating Val Dataset")
for step in range(len(test_loader)):
    batch = next(it)
    X_test = torch.concat((X_test, batch["data"] + batch["artifact"]))
    Y_test = torch.concat((Y_test, batch["label"]))

Creating Val Dataset


In [30]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, mean_squared_error, confusion_matrix, fbeta_score

metrics = pd.DataFrame()
for n_est in n_estimators:
    # model
    model = IsolationForest(
        n_estimators=n_est,
        max_samples=max_samples,
        contamination=contamination, 
        random_state=42
    )

    model_name = f"{model.__class__.__name__}_{datetime.now().strftime('%d-%m-%Y_%H:%M:%S')}"

    model.fit(X_train)

    scores = pd.DataFrame()

    scores["labels"] = Y_test
    scores["scores"] = model.decision_function(X_test)
    scores["artifact"] = model.predict(X_test)

    predictions = (scores["artifact"] < 0).astype(int).tolist() # convert prediction to 0 - no artifact and 1 - artifact
    tn, fp, fn, tp = confusion_matrix(Y_test.tolist(), predictions).ravel()

    new_row = pd.DataFrame([{
        'name': model_name ,
        'n_estimators': n_est,
        'fbeta': fbeta_score(Y_test.tolist(), predictions, beta=0.5),
        'accuracy': accuracy_score(Y_test.tolist(), predictions),
        'precision': precision_score(Y_test.tolist(), predictions) ,
        'recall': recall_score(Y_test.tolist(), predictions),
        'tp': tp,
        'fp': fp,
        'fn': fn,
        'tn': tn,
    }])

    metrics = pd.concat([metrics,new_row], ignore_index=True)
    print(metrics)

# random guesses
scores["labels"] = Y_test
nums = np.random.choice([0, 1], size=Y_test.shape, p=[1-contamination, contamination])

tn, fp, fn, tp = confusion_matrix(Y_test.tolist(), nums).ravel()

new_row = pd.DataFrame([{
    'name': "random guessing",
    'n_estimators': n_est,
    'fbeta': fbeta_score(Y_test.tolist(), nums, beta=0.5),
    'accuracy': accuracy_score(Y_test.tolist(), nums),
    'precision': precision_score(Y_test.tolist(), nums) ,
    'recall': recall_score(Y_test.tolist(), nums),
    'tp': tp,
    'fp': fp,
    'fn': fn,
    'tn': tn,
}])
metrics = pd.concat([metrics,new_row], ignore_index=True)
print(metrics)

                                  name  n_estimators     fbeta  accuracy  \
0  IsolationForest_02-06-2024_13:43:29            10  0.547743  0.560059   

   precision    recall   tp   fp   fn   tn  
0   0.452514  0.496324  405  490  411  742  
                                  name  n_estimators     fbeta  accuracy  \
0  IsolationForest_02-06-2024_13:43:29            10  0.547743  0.560059   
1                      random guessing            10  0.498599  0.523926   

   precision    recall   tp   fp   fn   tn  
0   0.452514  0.496324  405  490  411  742  
1   0.396887  0.375000  306  465  510  767  


In [31]:
metrics

Unnamed: 0,name,n_estimators,fbeta,accuracy,precision,recall,tp,fp,fn,tn
0,IsolationForest_02-06-2024_13:43:29,10,0.547743,0.560059,0.452514,0.496324,405,490,411,742
1,random guessing,10,0.498599,0.523926,0.396887,0.375,306,465,510,767


In [35]:
import pandas as pd

real_data_df = pd.read_pickle('../data/real/normalized_deviation_updated_TEST.pickle') 
ground_truth = pd.read_csv('../data/real/gt_changes_only_relabeled_200K.csv')

In [36]:
ground_truth.drop(columns=["Unnamed: 0"], inplace=True)
width = 512
dist = 512//2

In [37]:
train_index = int(494 *0.8)
train_index

395

In [38]:
X_train = []
Y_train = list()
print("Creating Real Dataset")
for index, row in ground_truth[:train_index].iterrows():
    X_train.append(np.array(real_data_df[0][int(row["position"]-dist) : int(row["position"]+dist)]))
    Y_train = Y_train + [row["gt"]]

Creating Real Dataset


In [39]:
X_train = np.array(X_train)
X_train.shape

(395, 512)

In [40]:
X_test = []
Y_test = list()
print("Creating Real Dataset")
for index, row in ground_truth[train_index:494].iterrows():
    X_test.append(np.array(real_data_df[0][int(row["position"]-dist) : int(row["position"]+dist)]))
    Y_test = Y_test + [row["gt"]]

Creating Real Dataset


In [41]:
n_estimators = [1,5,10,50,100, 200, 500]
max_samples = 'auto'
contamination = 0.4

In [42]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, mean_squared_error, confusion_matrix, fbeta_score

metrics = pd.DataFrame()
for n_est in n_estimators:
    # model
    model = IsolationForest(
        n_estimators=n_est,
        max_samples=max_samples,
        contamination=contamination, 
        random_state=42,
        bootstrap=True
    )

    model_name = f"{model.__class__.__name__}_{datetime.now().strftime('%d-%m-%Y_%H:%M:%S')}"

    model.fit(X_train)

    scores = pd.DataFrame()

    scores["labels"] = Y_test
    scores["scores"] = model.decision_function(X_test)
    scores["artifact"] = model.predict(X_test)

    predictions = (scores["artifact"] < 0).astype(int).tolist() # convert prediction to 0 - no artifact and 1 - artifact
    tn, fp, fn, tp = confusion_matrix(Y_test, predictions).ravel()

    new_row = pd.DataFrame([{
        'name': model_name ,
        'n_estimators': n_est,
        'fbeta': fbeta_score(Y_test, predictions, beta=0.5),
        'accuracy': accuracy_score(Y_test, predictions),
        'precision': precision_score(Y_test, predictions) ,
        'recall': recall_score(Y_test, predictions),
        'tp': tp,
        'fp': fp,
        'fn': fn,
        'tn': tn,
    }])

    metrics = pd.concat([metrics,new_row], ignore_index=True)
    print(metrics)

# random guesses
scores["labels"] = Y_test
nums = np.random.choice([0, 1], size=len(Y_test), p=[1-contamination, contamination])

tn, fp, fn, tp = confusion_matrix(Y_test, nums).ravel()

new_row = pd.DataFrame([{
    'name': "random guessing",
    'n_estimators': n_est,
    'fbeta': fbeta_score(Y_test, nums, beta=0.5),
    'accuracy': accuracy_score(Y_test, nums),
    'precision': precision_score(Y_test, nums) ,
    'recall': recall_score(Y_test, nums),
    'tp': tp,
    'fp': fp,
    'fn': fn,
    'tn': tn,
}])
metrics = pd.concat([metrics,new_row], ignore_index=True)
print(metrics)

                                  name  n_estimators   fbeta  accuracy  \
0  IsolationForest_03-06-2024_14:09:42             1  0.3125  0.525253   

   precision    recall  tp  fp  fn  tn  
0   0.288889  0.464286  13  32  15  39  
                                  name  n_estimators     fbeta  accuracy  \
0  IsolationForest_03-06-2024_14:09:42             1  0.312500  0.525253   
1  IsolationForest_03-06-2024_14:09:42             5  0.273438  0.424242   

   precision    recall  tp  fp  fn  tn  
0   0.288889  0.464286  13  32  15  39  
1   0.245614  0.500000  14  43  14  28  
                                  name  n_estimators     fbeta  accuracy  \
0  IsolationForest_03-06-2024_14:09:42             1  0.312500  0.525253   
1  IsolationForest_03-06-2024_14:09:42             5  0.273438  0.424242   
2  IsolationForest_03-06-2024_14:09:42            10  0.329861  0.444444   

   precision    recall  tp  fp  fn  tn  
0   0.288889  0.464286  13  32  15  39  
1   0.245614  0.500000  14  43

In [43]:
metrics

Unnamed: 0,name,n_estimators,fbeta,accuracy,precision,recall,tp,fp,fn,tn
0,IsolationForest_03-06-2024_14:09:42,1,0.3125,0.525253,0.288889,0.464286,13,32,15,39
1,IsolationForest_03-06-2024_14:09:42,5,0.273438,0.424242,0.245614,0.5,14,43,14,28
2,IsolationForest_03-06-2024_14:09:42,10,0.329861,0.444444,0.292308,0.678571,19,46,9,25
3,IsolationForest_03-06-2024_14:09:42,50,0.348101,0.434343,0.305556,0.785714,22,50,6,21
4,IsolationForest_03-06-2024_14:09:42,100,0.335366,0.40404,0.293333,0.785714,22,53,6,18
5,IsolationForest_03-06-2024_14:09:42,200,0.346386,0.414141,0.302632,0.821429,23,53,5,18
6,IsolationForest_03-06-2024_14:09:43,500,0.346386,0.414141,0.302632,0.821429,23,53,5,18
7,random guessing,500,0.231481,0.444444,0.212766,0.357143,10,37,18,34


## Mask Approach

In [16]:
from data import TestArtifactDataset
from pathlib import Path

test_width = 512

test_file = Path(f"../data/test_real_{test_width}.pkl")
start_index = 13100
final_index = 145355 # end of last annotated artifact

n = 130 # 13100 + 130*1024 = 146220 - till there we have labels
# test_file = Path(f"../data/test_real{test_width}.pkl")
# test_dataset = ["normalized_deviation_updated"]
if not test_file.exists():
    real_data_df = pd.read_pickle('../data/real/normalized_deviation_updated_TEST.pickle') 
    real_data = real_data_df[0]
    ground_truth = pd.read_csv('../data/real/gt_realData.csv')
    test_ds = TestArtifactDataset.generate(
        start_index=start_index,
        labels=ground_truth,
        width=test_width,
        n=n,
        data=real_data,
        to=Path(f"../data/test_real_{test_width}.pkl"),
    )
else:
    test_ds = TestArtifactDataset(file=test_file)

In [17]:
test_index = int(len(test_ds) * 0.2)
test_index

26

In [18]:
X_train = np.array(test_ds[test_index]["data"])
for index in range(test_index+1, len(test_ds)):
    X_train = np.vstack([X_train, np.array(test_ds[index]["data"])])

In [19]:
X_test = np.array(test_ds[0]["data"])
Y_test = np.array(test_ds[0]["mask"])
for index in range(1, test_index):
    X_test = np.vstack([X_test, np.array(test_ds[index]["data"])])
    Y_test = np.vstack([Y_test, np.array(test_ds[index]["mask"])])

In [20]:
X_train = np.concatenate(X_train).reshape(-1,1)
X_test = np.concatenate(X_test).reshape(-1,1)
Y_test = np.concatenate(Y_test).reshape(-1,1).squeeze().astype(int)

In [21]:
n_estimators = [1,5,10,50,100, 200, 500, 1000, 5000, 10000]
max_samples = 'auto'
contamination = 0.03

In [22]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, mean_squared_error, confusion_matrix, fbeta_score

metrics = pd.DataFrame()
for n_est in n_estimators:
    # model
    model = IsolationForest(
        n_estimators=n_est,
        max_samples=max_samples,
        contamination=contamination, 
        random_state=42,
        bootstrap=True
    )

    model_name = f"{model.__class__.__name__}_{datetime.now().strftime('%d-%m-%Y_%H:%M:%S')}"

    model.fit(X_train)

    scores = pd.DataFrame()

    scores = model.decision_function(X_test)
    predictions = model.predict(X_test)

    predictions = (predictions < 0).astype(int).tolist() # convert prediction to 0 - no artifact and 1 - artifact
    tn, fp, fn, tp = confusion_matrix(Y_test, predictions).ravel()

    new_row = pd.DataFrame([{
        'name': model_name ,
        'n_estimators': n_est,
        'fbeta': fbeta_score(Y_test, predictions, beta=0.5),
        'accuracy': accuracy_score(Y_test, predictions),
        'precision': precision_score(Y_test, predictions) ,
        'recall': recall_score(Y_test, predictions),
        'tp': tp,
        'fp': fp,
        'fn': fn,
        'tn': tn,
    }])

    metrics = pd.concat([metrics,new_row], ignore_index=True)
    print(metrics)

# random guesses
nums = np.random.choice([0, 1], size=len(Y_test), p=[1-contamination, contamination])

tn, fp, fn, tp = confusion_matrix(Y_test, nums).ravel()

new_row = pd.DataFrame([{
    'name': "random guessing",
    'n_estimators': n_est,
    'fbeta': fbeta_score(Y_test, nums, beta=0.5),
    'accuracy': accuracy_score(Y_test, nums),
    'precision': precision_score(Y_test, nums) ,
    'recall': recall_score(Y_test, nums),
    'tp': tp,
    'fp': fp,
    'fn': fn,
    'tn': tn,
}])
metrics = pd.concat([metrics,new_row], ignore_index=True)
print(metrics)

                                  name  n_estimators     fbeta  accuracy  \
0  IsolationForest_03-06-2024_14:04:42             1  0.142162    0.9375   

   precision    recall  tp   fp   fn     tn  
0   0.134991  0.180523  76  487  345  12404  
                                  name  n_estimators     fbeta  accuracy  \
0  IsolationForest_03-06-2024_14:04:42             1  0.142162  0.937500   
1  IsolationForest_03-06-2024_14:04:42             5  0.211773  0.958984   

   precision    recall  tp   fp   fn     tn  
0   0.134991  0.180523  76  487  345  12404  
1   0.242798  0.140143  59  184  362  12707  
                                  name  n_estimators     fbeta  accuracy  \
0  IsolationForest_03-06-2024_14:04:42             1  0.142162  0.937500   
1  IsolationForest_03-06-2024_14:04:42             5  0.211773  0.958984   
2  IsolationForest_03-06-2024_14:04:42            10  0.291529  0.967172   

   precision    recall  tp   fp   fn     tn  
0   0.134991  0.180523  76  487  345 

In [23]:
metrics

Unnamed: 0,name,n_estimators,fbeta,accuracy,precision,recall,tp,fp,fn,tn
0,IsolationForest_03-06-2024_14:04:42,1,0.142162,0.9375,0.134991,0.180523,76,487,345,12404
1,IsolationForest_03-06-2024_14:04:42,5,0.211773,0.958984,0.242798,0.140143,59,184,362,12707
2,IsolationForest_03-06-2024_14:04:42,10,0.291529,0.967172,0.434426,0.125891,53,69,368,12822
3,IsolationForest_03-06-2024_14:04:42,50,0.245629,0.96259,0.302564,0.140143,59,136,362,12755
4,IsolationForest_03-06-2024_14:04:42,100,0.242272,0.962515,0.298969,0.137767,58,136,363,12755
5,IsolationForest_03-06-2024_14:04:42,200,0.251443,0.962665,0.308081,0.144893,61,137,360,12754
6,IsolationForest_03-06-2024_14:04:43,500,0.243902,0.962665,0.302083,0.137767,58,134,363,12757
7,IsolationForest_03-06-2024_14:04:44,1000,0.247276,0.96274,0.305699,0.140143,59,134,362,12757
8,IsolationForest_03-06-2024_14:04:46,5000,0.230179,0.962365,0.287234,0.128266,54,134,367,12757
9,IsolationForest_03-06-2024_14:04:59,10000,0.232854,0.962365,0.289474,0.130641,55,135,366,12756


In [5]:
from data import RealisticArtifactDataset
from artifact import Saw

train_data, train_weights = load_series(train_datasets, "TRAIN", str(input_path))
train_dataset = RealisticArtifactDataset(
    train_data,
    width=width,
    padding=64,
    artifact=Saw(),
    weight=train_weights,
)
train_loader = DataLoader(train_dataset, batch_size=1)
# sanity check


In [6]:
batch = next(iter(train_loader))
print(batch["data"] + batch["artifact"])
X_train = np.array(batch["data"] + batch["artifact"])

tensor([[ 4.8957e-01,  4.8844e-01,  4.9020e-01,  4.9099e-01,  4.8579e-01,
          4.8851e-01,  5.0405e-01,  5.1061e-01,  4.9701e-01,  4.8198e-01,
          4.7000e-01,  4.5321e-01,  4.4568e-01,  4.4822e-01,  4.3916e-01,
          4.2469e-01,  4.3091e-01,  4.4674e-01,  4.4763e-01,  4.4565e-01,
          4.4713e-01,  4.3663e-01,  4.2401e-01,  4.3227e-01,  4.5066e-01,
          4.5064e-01,  4.3506e-01,  4.2435e-01,  4.2209e-01,  4.2524e-01,
          4.3673e-01,  4.6028e-01,  4.8680e-01,  5.0391e-01,  5.1232e-01,
          5.1369e-01,  5.0606e-01,  4.9213e-01,  4.7869e-01,  4.6598e-01,
          4.5153e-01,  4.4344e-01,  4.4482e-01,  4.4704e-01,  4.4577e-01,
          4.4452e-01,  4.4647e-01,  4.4653e-01,  4.4371e-01,  4.4580e-01,
          4.5638e-01,  4.7211e-01,  4.8547e-01,  4.9270e-01,  4.9028e-01,
          4.7752e-01,  4.6555e-01,  4.6396e-01,  4.7374e-01,  4.8763e-01,
          4.9448e-01,  4.8744e-01,  4.7210e-01,  4.6809e-01,  4.8423e-01,
          5.0523e-01,  5.1098e-01,  4.

In [7]:
from pathlib import Path

test_file = Path(f"../data/val_IF_CinCECGT_mask_{width}.pkl")

# validation
if not test_file.exists():
    test_data, test_weights = load_series(train_datasets, split="VAL", path=input_path)
    test_gen = RealisticArtifactDataset(test_data,
                              width=width,
                              padding=64,
                              artifact=Saw(),
                              weight=test_weights,
                              )
    test = CachedArtifactDataset.generate(test_gen,
                                         n=2048,
                                         to=test_file)
else:
    test = CachedArtifactDataset(file=test_file)
    
test_loader = DataLoader(test, batch_size=batch_size)

In [8]:
print("Creating Dataset")
for step in range(train_size-1):
    batch = next(iter(train_loader))
    X_train = np.vstack([X_train, np.array(batch["data"] + batch["artifact"])])

Creating Dataset


In [9]:
it = iter(test_loader)
batch = next(it)
X_test = np.array(batch["data"] + batch["artifact"])
Y_test = np.array(batch["mask"])
print("Creating Val Dataset")
for step in range(1,len(test_loader)):
    batch = next(it)
    X_test = np.vstack([X_test, np.array(batch["data"] + batch["artifact"])])
    Y_test = np.vstack([Y_test, np.array(batch["mask"])])

Creating Val Dataset


In [10]:
X_train = np.concatenate(X_train).reshape(-1,1)
X_test = np.concatenate(X_test).reshape(-1,1)
Y_test = np.concatenate(Y_test).reshape(-1,1).squeeze().astype(int)

In [11]:
n_estimators = [1,5,10,50,100, 200]
max_samples = 'auto'
contamination = 0.03

In [14]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, mean_squared_error, confusion_matrix, fbeta_score

metrics = pd.DataFrame()
for n_est in n_estimators:
    # model
    model = IsolationForest(
        n_estimators=n_est,
        max_samples=max_samples,
        contamination=contamination, 
        random_state=42,
        bootstrap=True
    )

    model_name = f"{model.__class__.__name__}_{datetime.now().strftime('%d-%m-%Y_%H:%M:%S')}"

    model.fit(X_train)

    scores = pd.DataFrame()

    scores = model.decision_function(X_test)
    predictions = model.predict(X_test)

    predictions = (predictions < 0).astype(int).tolist() # convert prediction to 0 - no artifact and 1 - artifact
    tn, fp, fn, tp = confusion_matrix(Y_test, predictions).ravel()

    new_row = pd.DataFrame([{
        'name': model_name ,
        'n_estimators': n_est,
        'fbeta': fbeta_score(Y_test, predictions, beta=0.5),
        'accuracy': accuracy_score(Y_test, predictions),
        'precision': precision_score(Y_test, predictions) ,
        'recall': recall_score(Y_test, predictions),
        'tp': tp,
        'fp': fp,
        'fn': fn,
        'tn': tn,
    }])

    metrics = pd.concat([metrics,new_row], ignore_index=True)
    print(metrics)

# random guesses
nums = np.random.choice([0, 1], size=len(Y_test), p=[1-contamination, contamination])

tn, fp, fn, tp = confusion_matrix(Y_test, nums).ravel()

new_row = pd.DataFrame([{
    'name': "random guessing",
    'n_estimators': n_est,
    'fbeta': fbeta_score(Y_test, nums, beta=0.5),
    'accuracy': accuracy_score(Y_test, nums),
    'precision': precision_score(Y_test, nums) ,
    'recall': recall_score(Y_test, nums),
    'tp': tp,
    'fp': fp,
    'fn': fn,
    'tn': tn,
}])
metrics = pd.concat([metrics,new_row], ignore_index=True)
print(metrics)

                                  name  n_estimators     fbeta  accuracy  \
0  IsolationForest_03-06-2024_14:02:36             1  0.139741  0.943154   

   precision    recall    tp  fp     fn      tn  
0   0.997425  0.031476  1937   5  59602  987032  
                                  name  n_estimators     fbeta  accuracy  \
0  IsolationForest_03-06-2024_14:02:36             1  0.139741  0.943154   
1  IsolationForest_03-06-2024_14:02:38             5  0.355097  0.939067   

   precision    recall     tp     fp     fn      tn  
0   0.997425  0.031476   1937      5  59602  987032  
1   0.454188  0.189620  11669  14023  49870  973014  
                                  name  n_estimators     fbeta  accuracy  \
0  IsolationForest_03-06-2024_14:02:36             1  0.139741  0.943154   
1  IsolationForest_03-06-2024_14:02:38             5  0.355097  0.939067   
2  IsolationForest_03-06-2024_14:02:42            10  0.370379  0.937515   

   precision    recall     tp     fp     fn      tn

In [15]:
metrics

Unnamed: 0,name,n_estimators,fbeta,accuracy,precision,recall,tp,fp,fn,tn
0,IsolationForest_03-06-2024_14:02:36,1,0.139741,0.943154,0.997425,0.031476,1937,5,59602,987032
1,IsolationForest_03-06-2024_14:02:38,5,0.355097,0.939067,0.454188,0.18962,11669,14023,49870,973014
2,IsolationForest_03-06-2024_14:02:42,10,0.370379,0.937515,0.438089,0.228879,14085,18066,47454,968971
3,IsolationForest_03-06-2024_14:02:47,50,0.34412,0.935371,0.404782,0.215148,13240,19469,48299,967568
4,IsolationForest_03-06-2024_14:03:04,100,0.343871,0.935266,0.403695,0.215896,13286,19625,48253,967412
5,IsolationForest_03-06-2024_14:03:34,200,0.34827,0.935568,0.408646,0.218902,13471,19494,48068,967543
6,random guessing,200,0.050989,0.914972,0.060753,0.031037,1910,29529,59629,957508
