# Affinity Propogation

In [24]:
import numpy as np
import json
from sklearn import metrics
from sklearn.cluster import AffinityPropagation
from sklearn.datasets import make_blobs
import random

# List of first names
first_names = [
    "Liam", "Olivia", "Noah", "Emma", "William", "Ava", "James", "Isabella",
    "Benjamin", "Sophia", "Lucas", "Mia", "Henry", "Charlotte", "Alexander",
    "Amelia", "Jacob", "Harper", "Michael", "Evelyn", "Daniel", "Abigail",
    "Matthew", "Emily", "Ethan", "Elizabeth", "Alexander", "Avery", "Jayden",
    "Sofia", "Sebastian", "Ella", "Jacob", "Aria", "William", "Lily",
    "Oliver", "Chloe", "Benjamin", "Isabella", "Elijah", "Avery", "Lucas"
]

# South-East Asian countries and asia pacific countries
country_names = [
    "Brunei", "Cambodia", "East Timor", "Indonesia", "Laos", "Malaysia", 
    "Myanmar", "Philippines", "Singapore", "Thailand", "Vietnam",
    "Australia", "Fiji", "Kiribati", "Marshall Islands", "Micronesia",
    "Nauru", "New Zealand", "Palau", "Papua New Guinea", "Samoa"
]

def generate_name():
    return random.choice(first_names)

# Generate sample data in 3D
centers = [[7, 7, 7], [-7, -7, -7], [7, -7, 7]]
X, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=2, random_state=0, n_features=3)

# Compute Affinity Propagation
af = AffinityPropagation(preference=-50, random_state=0, damping=0.5).fit(X)
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_

n_clusters_ = len(cluster_centers_indices)

print("Estimated number of clusters: %d" % n_clusters_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric="sqeuclidean"))

# Prepare data for export
model_data = {
    "points": [
        {
            "name": generate_name(),
            "country": country_names[label],
            "distance": np.linalg.norm(X[i] - af.cluster_centers_[label]),
            "x": float(point[0]), 
            "y": float(point[1]), 
            "z": float(point[2]), 
            "cluster": int(label), 
            "isExemplar": i in cluster_centers_indices
        }
        for i, (point, label) in enumerate(zip(X, labels))
    ],
    "parameters": {
        "preference": af.preference,
        "damping": af.damping,
        "n_clusters": n_clusters_
    },
}

# Export to JSON
with open('./public/api/aprop/model_data_3d.json', 'w') as json_file:
    json.dump(model_data, json_file, indent=4)

Estimated number of clusters: 17
Homogeneity: 1.000
Completeness: 0.396
V-measure: 0.567
Adjusted Rand Index: 0.237
Adjusted Mutual Information: 0.554
Silhouette Coefficient: 0.412


# KMeans Clustering

In [29]:
import numpy as np
import json
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
import random

# List of first names
first_names = [
    "Liam", "Olivia", "Noah", "Emma", "William", "Ava", "James", "Isabella",
    "Benjamin", "Sophia", "Lucas", "Mia", "Henry", "Charlotte", "Alexander",
    "Amelia", "Jacob", "Harper", "Michael", "Evelyn", "Daniel", "Abigail",
    "Matthew", "Emily", "Ethan", "Elizabeth", "Alexander", "Avery", "Jayden",
    "Sofia", "Sebastian", "Ella", "Jacob", "Aria", "William", "Lily",
    "Oliver", "Chloe", "Benjamin", "Isabella", "Elijah", "Avery", "Lucas"
]

# Country names, typically South-East Asian and Asia Pacific countries
country_names = [
    "Brunei", "Cambodia", "East Timor", "Indonesia", "Laos", "Malaysia", 
    "Myanmar", "Philippines", "Singapore", "Thailand", "Vietnam",
    "Australia", "Fiji", "Kiribati", "Marshall Islands", "Micronesia",
    "Nauru", "New Zealand", "Palau", "Papua New Guinea", "Samoa"
]

def generate_name():
    return random.choice(first_names)

# Generate sample data in 3D
centers = [[6, 6, 6], [-6, -6, -6], [6, -6, 6], [-6, 6, -6], [6, 6, -6]]
X, labels_true = make_blobs(n_samples=100, centers=centers, cluster_std=2, random_state=0, n_features=5)

# Compute K-Means clustering
km = KMeans(n_clusters=5, random_state=0).fit(X)
labels = km.labels_
cluster_centers = km.cluster_centers_

print("Number of clusters: %d" % km.n_clusters)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels, metric="sqeuclidean"))

# Prepare data for export
model_data = {
    "points": [
        {
            "name": generate_name(),
            "country": random.choice(country_names),
            "distance": np.linalg.norm(X[i] - cluster_centers[label]),
            "x": float(point[0]), 
            "y": float(point[1]), 
            "z": float(point[2]), 
            "cluster": int(label), 
            "isExemplar": i in [np.argmin(np.linalg.norm(X - center, axis=1)) for center in cluster_centers]
        }
        for i, (point, label) in enumerate(zip(X, labels))
    ],
    "parameters": {
        "n_clusters": km.n_clusters,
        "init": 'k-means++',
        "n_init": 10
    },
}

# Export to JSON
with open('./public/api/kmeans/model_data_3d.json', 'w') as json_file:
    json.dump(model_data, json_file, indent=4)


Number of clusters: 5
Homogeneity: 1.000
Completeness: 1.000
V-measure: 1.000
Adjusted Rand Index: 1.000
Adjusted Mutual Information: 1.000
Silhouette Coefficient: 0.876
