In [11]:
pip install scikit-learn-extra

Note: you may need to restart the kernel to use updated packages.


In [12]:
import numpy as np
from sklearn_extra.cluster import KMedoids
from sklearn.metrics import silhouette_score
from tabulate import tabulate
from sklearn.metrics import pairwise_distances
import os

In [13]:
os.chdir('C:\\Users\\yamin\\OneDrive\\Desktop\\Features')

In [14]:
# Read in the text file
# load data from .txt file
with open('11.txt', "r") as infile:
    data = np.loadtxt(infile, delimiter=",")

In [15]:
# Extract the first 1000 rows of data
data = data[:1000, :]

In [16]:
# Split the data into features and labels
features = data[:, 1:]
labels = data[:, 0]

In [20]:
# Set the range of K values to try
k_values = range(3, 6)

# Initialize the KMedoids models
k_medoids_models = [KMedoids(n_clusters=k, init="random", metric="euclidean") for k in k_values]

table_data = []
# Fit the models to the data and get the predicted labels and medoids for each K value
predicted_labels_and_medoids = []
for k_medoids in k_medoids_models:
    k_medoids.fit(features)
    predicted_labels = k_medoids.labels_
    medoids = k_medoids.cluster_centers_
    predicted_labels_and_medoids.append((predicted_labels, medoids))

    # Compute Silhouette Coefficient and Inertia
    silhouette_coef = silhouette_score(features, predicted_labels)
    inertia = sum(np.min(k_medoids.transform(features), axis=1))

    # Compute distances between each point and its assigned medoid
    distances = pairwise_distances(features, medoids)
    
    # Set a threshold value for anomaly detection
    threshold = np.mean(distances) + 2*np.std(distances)
    # Identify anomalies as points that are farther away from their medoid than the threshold value
    anomalies = np.where(distances > threshold)
    anomalies_count = len(anomalies)

    print(f"K = {k_medoids.n_clusters}, Silhouette Coefficient: {silhouette_coef}, Inertia: {inertia}")
    #print(f"Predicted Labels: {predicted_labels}")
    print(f"Medoids: {medoids}")
    print(f"Anomalies: {anomalies_count}")
    print()
    

K = 3, Silhouette Coefficient: 0.785482658216711, Inertia: 1.1783316353258282e+16
Medoids: [[3.80116e+05 8.04530e+12 1.30108e+05 6.73698e+11 2.26656e+02 1.44612e+05
  3.32987e+05 6.17815e+12 9.34586e+04 3.10725e+11 1.09916e+03 6.96814e+07
  8.50156e+02 4.08252e+07 2.69655e+05 0.00000e+00 2.76465e+05 0.00000e+00
  9.68000e+02 0.00000e+00 2.45913e+05 0.00000e+00 2.55399e+05 0.00000e+00
  4.15000e+02 0.00000e+00 3.84000e+02 0.00000e+00]
 [0.00000e+00 0.00000e+00 1.64000e+02 1.28000e+03 0.00000e+00 0.00000e+00
  0.00000e+00 0.00000e+00 6.66667e+01 8.88889e+02 0.00000e+00 0.00000e+00
  2.33333e+00 5.55556e-01 3.61667e+02 6.54014e+05 1.23333e+02 3.04222e+03
  1.60333e+02 1.28534e+05 2.04667e+02 2.09442e+05 6.66667e+01 8.88889e+02
  5.16667e+00 1.33472e+02 1.66667e+00 5.55556e-01]
 [7.18000e+02 0.00000e+00 2.70000e+02 9.00000e+02 6.00000e+00 0.00000e+00
  4.90000e+02 0.00000e+00 0.00000e+00 0.00000e+00 4.00000e+00 0.00000e+00
  4.50000e+00 2.50000e-01 1.09278e+06 1.36310e+13 5.53307e+04 2.790