In [1]:
from pycaret.datasets import get_data
data = get_data('anomaly')

Unnamed: 0,Col1,Col2,Col3,Col4,Col5,Col6,Col7,Col8,Col9,Col10
0,0.263995,0.764929,0.138424,0.935242,0.605867,0.51879,0.912225,0.608234,0.723782,0.733591
1,0.546092,0.653975,0.065575,0.227772,0.845269,0.837066,0.272379,0.331679,0.429297,0.367422
2,0.336714,0.538842,0.192801,0.553563,0.074515,0.332993,0.365792,0.861309,0.899017,0.0886
3,0.092108,0.995017,0.014465,0.176371,0.24153,0.514724,0.562208,0.158963,0.073715,0.208463
4,0.325261,0.805968,0.957033,0.331665,0.307923,0.355315,0.501899,0.558449,0.885169,0.182754


In [5]:

from pycaret.datasets import get_data
from pycaret.anomaly import *
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

data = get_data('anomaly')

exp = setup(data, session_id=123)

models = [
    'abod', 'cluster', 'cof', 'iforest', 'histogram',
    'knn', 'lof', 'svm', 'pca', 'mcd', 'sod', 'sos'
]

anomaly_counts = {}
model_params = {}
silhouette_scores = {}

for model_name in models:
    model = create_model(model_name)

    results = assign_model(model)

    anomaly_count = results['Anomaly'].sum()
    anomaly_counts[model_name] = anomaly_count

    params = model.get_params()
    model_params[model_name] = params

    clean_data = results[results['Anomaly'] == 0].drop(columns='Anomaly')

    kmeans = KMeans(n_clusters=3, random_state=123)
    kmeans.fit(clean_data)

    score = silhouette_score(clean_data, kmeans.labels_)
    silhouette_scores[model_name] = score

for model_name in anomaly_counts:
    print(f"Model: {model_name}")
    print(f"Anomaly Count: {anomaly_counts[model_name]}")
    print(f"Parameters: {model_params[model_name]}")
    print(f"Silhouette Score: {silhouette_scores[model_name]}\n")

Unnamed: 0,Col1,Col2,Col3,Col4,Col5,Col6,Col7,Col8,Col9,Col10
0,0.263995,0.764929,0.138424,0.935242,0.605867,0.51879,0.912225,0.608234,0.723782,0.733591
1,0.546092,0.653975,0.065575,0.227772,0.845269,0.837066,0.272379,0.331679,0.429297,0.367422
2,0.336714,0.538842,0.192801,0.553563,0.074515,0.332993,0.365792,0.861309,0.899017,0.0886
3,0.092108,0.995017,0.014465,0.176371,0.24153,0.514724,0.562208,0.158963,0.073715,0.208463
4,0.325261,0.805968,0.957033,0.331665,0.307923,0.355315,0.501899,0.558449,0.885169,0.182754


Unnamed: 0,Description,Value
0,Session id,123
1,Original data shape,"(1000, 10)"
2,Transformed data shape,"(1000, 10)"
3,Numeric features,10
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

Model: abod
Anomaly Count: 50
Parameters: {'contamination': 0.05, 'method': 'fast', 'n_neighbors': 5}
Silhouette Score: 0.655828089476082

Model: cluster
Anomaly Count: 50
Parameters: {'alpha': 0.9, 'beta': 5, 'check_estimator': False, 'clustering_estimator': None, 'contamination': 0.05, 'n_clusters': 8, 'n_jobs': None, 'random_state': 123, 'use_weights': False}
Silhouette Score: 0.23359422286441145

Model: cof
Anomaly Count: 50
Parameters: {'contamination': 0.05, 'method': 'fast', 'n_neighbors': 20}
Silhouette Score: 0.2364043609408421

Model: iforest
Anomaly Count: 50
Parameters: {'behaviour': 'new', 'bootstrap': False, 'contamination': 0.05, 'max_features': 1.0, 'max_samples': 'auto', 'n_estimators': 100, 'n_jobs': -1, 'random_state': 123, 'verbose': 0}
Silhouette Score: 0.2346580827955606

Model: histogram
Anomaly Count: 50
Parameters: {'alpha': 0.1, 'contamination': 0.05, 'n_bins': 10, 'tol': 0.5}
Silhouette Score: 0.3811381413217788

Model: knn
Anomaly Count: 50
Parameters: {'alg