In [None]:
import numpy as np
import requests
import pandas as pd
import matplotlib.pyplot as plt
import genieclust
from sklearn.cluster import KMeans
from sklearn.metrics.cluster import adjusted_mutual_info_score, adjusted_rand_score, v_measure_score

# **<span style="color:#3c1518">2D Dataset</span>**

In [None]:
github = "https://raw.githubusercontent.com/gagolews/clustering-data-v1/master/"

dataset = "fcps/engytime" # Change dataset here 
data_url = github + dataset + ".data.gz"
labels_url = github + dataset + ".labels0.gz"

response = requests.get(data_url)
open("dataset.gz", "wb").write(response.content)
response = requests.get(labels_url)
open("labels.gz", "wb").write(response.content)

X = np.loadtxt("dataset.gz", ndmin=2)
y = np.loadtxt("labels.gz", dtype=np.intc)-1
n_clusters = len(np.unique(y))

metrics = pd.DataFrame(columns=['Clustering Algorithm', 'ARI', 'AMI', 'V-measure'])

## <span style="color:#69140e">K-Means</span>

In [None]:
clStr = 'K-Means'
cl = KMeans(n_clusters=2, random_state=0)
X = pd.DataFrame(X).astype(float)
cl.fit(X)
prediction = cl.predict(X)

ari = adjusted_rand_score(y, prediction)
ami = adjusted_mutual_info_score(y, prediction)
v_measure = v_measure_score(y, prediction)

if not (clStr in metrics['Clustering Algorithm'].unique()):
    metrics.loc[len(metrics)] = [clStr, ari, ami, v_measure]
else:
    index = metrics[metrics['Clustering Algorithm']== clStr].index.to_list()[0] 
    metrics.loc[index] = [clStr, ari, ami, v_measure]

genieclust.plots.plot_scatter(X, labels=y)
plt.title("True Clusters")
plt.axis("equal")
plt.show()

genieclust.plots.plot_scatter(X, labels=prediction)
plt.title("Predicted Clusters") 
plt.axis("equal")
plt.show()

## <span style="color:#69140e">HAC</span>