## 0.0 Imports

In [1]:
import json
import os

import pandas as pd
from sklearn.cluster import KMeans, AffinityPropagation
from sklearn.metrics import silhouette_score

## 0.1 Loading Dataset

In [2]:
home = os.path.dirname(os.getcwd())

data_path = os.path.join(home, 'data', 'X_dataset.csv')

data = pd.read_csv(data_path)



In [3]:
data.head()

Unnamed: 0,alcohol,malic_acid,ash,ash_alcanity,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280,proline
0,1.518613,0.1917,0.232053,-1.169593,1.913905,0.627586,0.57384,-0.659563,1.224884,0.251717,0.455285,0.970696,0.561341
1,0.24629,0.205534,-0.827996,-2.490847,0.018145,0.575862,0.510549,-0.820719,-0.544721,-0.293321,0.463415,0.78022,0.550642
2,0.196879,0.320158,1.109334,-0.268738,0.088358,0.627586,0.611814,-0.498407,2.135968,0.26902,0.447154,0.695971,0.646933
3,1.69155,0.23913,0.487926,-0.809251,0.930918,0.989655,0.664557,-0.981875,1.032155,1.186068,0.308943,0.798535,0.857347
4,0.2957,0.365613,1.840403,0.451946,1.281985,0.627586,0.495781,0.226796,0.401404,-0.319276,0.455285,0.608059,0.325963


## 0.2 Help Function

In [4]:
def classifier_evaluetion(model, param):

    model = model(**param)

    labels = model.fit_predict(data)
    
    metric = get_metric(data,labels)

    result = pd.DataFrame({
        'name': model.__class__.__name__,
        'silhouette_score': metric,
        'param': json.dumps(param)
    },index=[0])

    return result


def get_metric(data,labels):
    metric = silhouette_score(data, labels)
    return metric


# 1.0 K-Means

In [5]:
result_km = pd.DataFrame()

for i in range(2, 20, 1):
    # Define
    model = KMeans
    param= {
        'n_clusters': i
    }

    result = classifier_evaluetion(model, param)

    result_km = pd.concat([result_km, result]).reset_index(drop=True)

In [6]:
result_km

Unnamed: 0,name,silhouette_score,param
0,KMeans,0.214245,"{""n_clusters"": 2}"
1,KMeans,0.233105,"{""n_clusters"": 3}"
2,KMeans,0.224985,"{""n_clusters"": 4}"
3,KMeans,0.20853,"{""n_clusters"": 5}"
4,KMeans,0.16812,"{""n_clusters"": 6}"
5,KMeans,0.196706,"{""n_clusters"": 7}"
6,KMeans,0.18531,"{""n_clusters"": 8}"
7,KMeans,0.176401,"{""n_clusters"": 9}"
8,KMeans,0.175874,"{""n_clusters"": 10}"
9,KMeans,0.171391,"{""n_clusters"": 11}"


# 2.0 Affinity Propagation

In [None]:
result_af = pd.DataFrame()

for i in range(-1, -20, -1):
    # Define
    model = AffinityPropagation
    param= {
        'preference': i
    }

    
    result = classifier_evaluetion(model, param)

    result_af = pd.concat([result_af, result]).reset_index(drop=True)

In [8]:
result_af

Unnamed: 0,name,silhouette_score,param
0,AffinityPropagation,0.050588,"{""preference"": -1}"
1,AffinityPropagation,0.129858,"{""preference"": -2}"
2,AffinityPropagation,0.149879,"{""preference"": -3}"
3,AffinityPropagation,0.156879,"{""preference"": -4}"
4,AffinityPropagation,0.159334,"{""preference"": -5}"
5,AffinityPropagation,0.164841,"{""preference"": -6}"
6,AffinityPropagation,0.173583,"{""preference"": -7}"
7,AffinityPropagation,0.168146,"{""preference"": -8}"
8,AffinityPropagation,0.170898,"{""preference"": -9}"
9,AffinityPropagation,0.171268,"{""preference"": -10}"
