In [None]:
import warnings
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm

warnings.filterwarnings("ignore")
#plt.style.use('dark_background')

In [None]:
from utils import datasets
from utils.utils import *
from utils.distanceforest import DistanceIsolationForest
from utils.forests import ExtendedIsolationForest, IsolationForest, ExtendedIsolationForest

In [None]:
def plt_scatter_predictions(model, points=None, adaptive_range=False, resolution=50):
    grid = np.linspace(-0.9,0.9,resolution).astype(np.float64)
    heatmap = np.array([[model.predict(np.array([(x,y) for x in grid])) for y in grid]])[0]
    #x,y = np.meshgrid(grid,grid)
    #plt.contour(x,y,heatmap,levels=[0.30,0.40,0.50,0.60,0.70])
    
    if adaptive_range:
        plt.imshow(heatmap,extent=(-0.9,0.9,-0.9,0.9), origin = "lower", cmap="coolwarm") 
        plt.colorbar()
    else:
        plt.imshow(heatmap,extent=(-0.9,0.9,-0.9,0.9), vmin=0.25, vmax=0.75, origin = "lower", cmap="coolwarm") 
    
    if points is not None:
        x,y,l = points[:,0], points[:,1], points[:,2]
        plt.scatter(x[l==1],y[l==1], s=20, facecolors='firebrick', edgecolors='w')
        plt.scatter(x[l==0],y[l==0], s=20, facecolors='midnightblue', edgecolors='w')
    plt.xticks([])
    plt.yticks([])

In [None]:
from sklearn.metrics import average_precision_score
from sklearn.metrics import roc_auc_score
from joblib import Parallel, delayed

In [None]:
def perform_run(model, data, labels):
    model.fit(data)
    return average_precision_score(labels, model.predict(data))

models = [
    DistanceIsolationForest(n_estimators=128, max_samples=512),
    IsolationForest(n_estimators=128, max_samples=512),
    ExtendedIsolationForest(n_estimators=128, max_samples=512)
]
colors = ["tab:orange","tab:blue","tab:green"]

n_runs = 1
for i,dataset_name in enumerate(tqdm(datasets.datasets_names_short)):
    data, labels = datasets.load_dataset(dataset_name)
    for model, color in zip(models, colors):
        scores = [perform_run(model, data, labels) for run in range(n_runs)] 
        plt.plot(i,np.mean(scores),"o", color=color)

In [None]:
plt.figure(figsize=(16,6),dpi=80)
model = DistanceIsolationForest(n_estimators=10, max_samples=1024)
model.fit(data)
pred = model.predict(data)
plt.subplot(121)
plt_scatter_predictions(model, adaptive_range=True, resolution=100)
plt.subplot(122)
plt.scatter(np.zeros_like(pred[labels==0]), pred[labels==0],)
plt.scatter(np.ones_like(pred[labels==1]), pred[labels==1],)