In [33]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.datasets import load_iris

In [5]:
dataset = load_iris()
iris_x = pd.DataFrame(dataset.data, columns=dataset.feature_names)
iris_y = pd.DataFrame(dataset.target, columns=["target"])
iris_y.target.value_counts().sort_index()

In [83]:
from sklearn.preprocessing import RobustScaler

In [84]:
sc = RobustScaler()
iris_x_scaled = sc.fit_transform(iris_x)

In [28]:
from sklearn.cluster import AffinityPropagation

In [164]:
model = AffinityPropagation(preference=-50,
                        damping=0.5,
                        affinity='euclidean',
                        max_iter=5000,
                        convergence_iter=2500,
                        verbose=False).fit(iris_x_scaled)
cluster_centers_indices = model.cluster_centers_indices_
labels = model.labels_
no_clusters = len(cluster_centers_indices)
no_clusters

3

In [165]:
model.n_iter_

2534

In [166]:
cluster, counts = np.unique(labels, return_counts=True)
dict(zip(unique, counts))

{0: 49, 1: 54, 2: 47}

In [167]:
actual, counts = iris_y.target.unique(), iris_y.target.value_counts().sort_index().values
dict(zip(actual, counts))

{0: 50, 1: 50, 2: 50}

In [119]:
from sklearn.metrics import accuracy_score, recall_score, silhouette_score, homogeneity_score, completeness_score, v_measure_score, adjusted_mutual_info_score

In [168]:
acc = round(accuracy_score(iris_y.target, labels),3)
rec = round(recall_score(iris_y.target, labels,average="macro"),3)
sil = round(silhouette_score(iris_x_scaled, labels,metric='sqeuclidean'),3)
homg = round(homogeneity_score(iris_y.target, labels),3)
comp = round(completeness_score(iris_y.target, labels),3)
vms = round(v_measure_score(iris_y.target, labels),3)
mis = round(adjusted_mutual_info_score(iris_y.target, labels),3)

In [169]:
print("The accuracy score for the model is: {}".format(acc))
print("The recall score for the model is: {}".format(rec))
print("The silhouette score for the model is: {}".format(sil))
print("The homogeneity score for the model is: {}".format(homg))
print("The completeness score for the model is: {}".format(homg))
print("The V-measure score for the model is: {}".format(vms))
print("The Adjusted Mutual Information score for the model is: {}".format(mis))

The accuracy score for the model is: 0.813
The recall score for the model is: 0.813
The silhouette score for the model is: 0.628
The homogeneity score for the model is: 0.616
The completeness score for the model is: 0.616
The V-measure score for the model is: 0.617
The Adjusted Mutual Information score for the model is: 0.612


https://www.geeksforgeeks.org/ml-v-measure-for-evaluating-clustering-performance/

 - __Search for best damping parameter__

In [170]:
damping = np.linspace(0.50, 1.0, 5, endpoint=False)
acc_score = []
vms_score = []
for i in damping:
    model = AffinityPropagation(preference=-50,
                        damping=round(i,1),
                        affinity='euclidean',
                        max_iter=5000,
                        convergence_iter=2500,
                        verbose=False).fit(iris_x_scaled)
    cluster_centers_indices = model.cluster_centers_indices_
    labels = model.labels_
    no_clusters = len(cluster_centers_indices)
    acc = round(accuracy_score(iris_y.target, labels),3)
    vms = round(v_measure_score(iris_y.target, labels),3)
    acc_score.append(acc)
    vms_score.append(vms)

In [171]:
acc_score

[0.813, 0.773, 0.66, 0.66, 0.66]

In [172]:
vms_score

[0.617, 0.603, 0.692, 0.692, 0.692]