### Necessary Imports

In [1]:
#datasets
from sklearn import datasets
#preprocessing
from sklearn.preprocessing import StandardScaler
#clustering
from sklearn.cluster import KMeans, AgglomerativeClustering
#metrics
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import homogeneity_completeness_v_measure as hcv
#visualization
import matplotlib.pyplot as plt

### Load the dataset

In [2]:
bcw = datasets.load_breast_cancer()

### Separate the dataset into features and classes (X and y)

In [3]:
X = bcw.data
y = bcw.target

### K-Means Clustering

In [4]:
kmc = KMeans()
kmc.fit(X)
y_pred = kmc.predict(X)

### Agglomerative Clustering

In [5]:
agg_clus = AgglomerativeClustering()
y_pred_agg = agg_clus.fit_predict(X)

### Quantifying the results

In [6]:
def quantify_clustering(clustering,y_pred, y):
    print("Scores for ",clustering)
    print("Adjusted rand score: ",round(adjusted_rand_score(y_pred, y),3))
    score_names = ['homogeneity', 'completeness', 'vmeasure']
    for score,name in zip(hcv(y, y_pred),score_names):
        print(name,"score: ",round(score,3))
    print("*"*30)
    
quantify_clustering('KMeans Clustering',y_pred,y)
quantify_clustering('Agglomerative Clustering', y_pred_agg,y)

Scores for  KMeans Clustering
Adjusted rand score:  0.233
homogeneity score:  0.63
completeness score:  0.244
vmeasure score:  0.352
******************************
Scores for  Agglomerative Clustering
Adjusted rand score:  0.287
homogeneity score:  0.262
completeness score:  0.408
vmeasure score:  0.319
******************************


### Improving the results

### Scaling the dataset

In [7]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

### Changing the parameters KMeans

In [8]:
kmc2 = KMeans(n_clusters=2)
kmc2.fit(X)
y_pred_adjusted = kmc2.predict(X)

### Fitting Agglomerative Clustering again with the scaled data

In [10]:
agg_clus = AgglomerativeClustering()
y_pred_agg_adjusted = agg_clus.fit_predict(X)

### Results after making changes

In [11]:
quantify_clustering('KMeans Clustering',y_pred_adjusted,y)
quantify_clustering('Agglomerative Clustering', y_pred_agg_adjusted,y)

Scores for  KMeans Clustering
Adjusted rand score:  0.671
homogeneity score:  0.544
completeness score:  0.565
vmeasure score:  0.555
******************************
Scores for  Agglomerative Clustering
Adjusted rand score:  0.575
homogeneity score:  0.446
completeness score:  0.468
vmeasure score:  0.457
******************************


In [25]:
accuracies = []
for i in range(1, 10):
    kmc2 = KMeans(n_clusters=i)
    kmc2.fit(X)
    y_pred_adjusted = kmc2.predict(X)
    quantify_clustering('k ='+str(i), y_pred_adjusted, y)

Scores for  k =1
Adjusted rand score:  0.0
homogeneity score:  0.0
completeness score:  1.0
vmeasure score:  0.0
******************************
Scores for  k =2
Adjusted rand score:  0.671
homogeneity score:  0.544
completeness score:  0.565
vmeasure score:  0.555
******************************
Scores for  k =3
Adjusted rand score:  0.511
homogeneity score:  0.503
completeness score:  0.364
vmeasure score:  0.422
******************************
Scores for  k =4
Adjusted rand score:  0.609
homogeneity score:  0.646
completeness score:  0.403
vmeasure score:  0.496
******************************
Scores for  k =5
Adjusted rand score:  0.334
homogeneity score:  0.647
completeness score:  0.293
vmeasure score:  0.403
******************************
Scores for  k =6
Adjusted rand score:  0.332
homogeneity score:  0.686
completeness score:  0.294
vmeasure score:  0.412
******************************
Scores for  k =7
Adjusted rand score:  0.258
homogeneity score:  0.704
completeness score:  0.26