## Agglomerative Clustering

In [176]:
import pandas as pd
import numpy as np
from sklearn import datasets
from scipy.spatial.distance import cdist
import math

# Const
INF = 999

# fungsi mencari euclidian distance antar 2 data
def euclidian_distance(arr1, arr2):
    res = 0
    for i in range(len(arr1)):
        res += pow((arr1[i] - arr2[i]), 2)
    
    res = math.sqrt(res)
        
    return res

# fungsi untuk mendapatkan jarak antara 2 cluster sesuai mode
def generate_distance_minmax(cluster1, cluster2, mode):
    
    if mode == "single":
        minmax = INF
    else:
        minmax = 0
    
    for i in range(len(cluster1)):
        for j in range(len(cluster2)):
            dist = euclidian_distance(cluster1[i], cluster2[j])
            if mode == "single":
                if (minmax > dist):
                    minmax = dist
            elif mode == "complete":
                if (minmax < dist):
                    minmax = dist
            elif mode == "average":
                minmax += dist
    
    if mode == "average":
        minmax = minmax / (len(cluster1) * len(cluster2))
        
    return minmax

# fungsi untuk mendapatkan nilai mean dari anggota cluster
def get_mean_cluster(cluster):
    
    clusterMean = np.true_divide(np.sum(cluster, axis=0), len(cluster))
    
    return clusterMean

# fungsi untuk generate matriks jarak sesuai mode
def generate_distance_matrix(clusterList, mode):
    distMatrix = np.empty((len(clusterList), len(clusterList),))
    
    for i in range(len(clusterList)):
        for j in range(len(clusterList)):
            if i == j:
                distMatrix[i][j] = INF
            else:  
                if mode == "average_group":
                    mean1 = get_mean_cluster(clusterList[i])
                    mean2 = get_mean_cluster(clusterList[j])
                    distMatrix[i][j] = euclidian_distance(mean1, mean2)
                else:
                    distMatrix[i][j] = generate_distance_minmax(clusterList[i], clusterList[j], mode)
    
    return distMatrix

# Fungsi untuk inisialisasi array of cluster tiap cluster hanya terdiri dari tiap data
def init_cluster(dataset):
    clusterList = []
    
    for i in range(len(dataset)):
        cluster = []
        cluster.append(list(dataset[i]))
        clusterList.append(cluster)
    
    return clusterList

# fungsi untuk update cluster
def update_cluster(clusterList, indexCluster1, indexCluster2):
    data = clusterList.pop(indexCluster2)
    for i in range(len(data)):
        clusterList[indexCluster1].append(data[i])
    

# fungsi mengembalikan array of cluster hasil agglomerative clustering
# dimana cluster merupakan sebuah array yang berisi data yang termasuk cluster tersebut
def agglomerative_clustering(dataset, numCluster, mode):
    
    clusterList = init_cluster(dataset)
    
    while (len(clusterList) != numCluster):
        # generate matriks jarak berdasarkan mode-nya
        distMarix = generate_distance_matrix(clusterList, mode)
        
        # mendapatkan kemunculan pertama index data terkecil 
        minIndex = np.where(distMarix == distMarix.min())
        indexCluster1 = minIndex[0][0]
        indexCluster2 = minIndex[1][0]
        
        # update cluster
        update_cluster(clusterList, indexCluster1, indexCluster2)
    
    return clusterList

def print_cluster(clusterList):
    for i in range(len(clusterList)):
        print("cluster: " + str(i))
        print("jumlah anggota: " + str(len(clusterList[i])))
        print(clusterList[i])

# Mendapatkan nilai target berupa list of cluster dari tiap data sesuai urutan data tesebut
def get_target_cluster(dataset, numCluster, mode):
    clusterList  = agglomerative_clustering(dataset, numCluster, mode)
    targetList = [-1] * len(dataset) 
    for i in range(len(dataset)):
        data = list(dataset[i])
        for j in range(len(clusterList)):
            if data in clusterList[j]:
                targetList[i] = j
    
    return targetList

# import iris dataset
iris = datasets.load_iris()
dataset = iris.data

### Single Linkage

In [177]:
clusterList = agglomerative_clustering(dataset, 3, "single")
print_cluster(clusterList)

# list target cluster
print("\nTarget List")
target = get_target_cluster(dataset, 3, "single")
print(target)
print(len(target))

cluster: 0
jumlah anggota: 50
[[5.1, 3.5, 1.4, 0.2], [5.1, 3.5, 1.4, 0.3], [5.0, 3.5, 1.3, 0.3], [5.0, 3.6, 1.4, 0.2], [4.9, 3.6, 1.4, 0.1], [5.0, 3.4, 1.5, 0.2], [5.1, 3.4, 1.5, 0.2], [5.0, 3.3, 1.4, 0.2], [5.2, 3.5, 1.5, 0.2], [5.2, 3.4, 1.4, 0.2], [5.0, 3.2, 1.2, 0.2], [5.4, 3.7, 1.5, 0.2], [5.3, 3.7, 1.5, 0.2], [5.1, 3.3, 1.7, 0.5], [5.0, 3.4, 1.6, 0.4], [5.0, 3.5, 1.6, 0.6], [4.9, 3.0, 1.4, 0.2], [4.9, 3.1, 1.5, 0.1], [4.9, 3.1, 1.5, 0.2], [4.8, 3.0, 1.4, 0.3], [4.8, 3.0, 1.4, 0.1], [4.7, 3.2, 1.6, 0.2], [4.8, 3.1, 1.6, 0.2], [5.0, 3.0, 1.6, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [4.6, 3.2, 1.4, 0.2], [4.4, 2.9, 1.4, 0.2], [4.4, 3.0, 1.3, 0.2], [4.4, 3.2, 1.3, 0.2], [4.8, 3.4, 1.6, 0.2], [4.6, 3.4, 1.4, 0.3], [5.1, 3.8, 1.5, 0.3], [5.1, 3.7, 1.5, 0.4], [5.1, 3.8, 1.6, 0.2], [4.3, 3.0, 1.1, 0.1], [4.8, 3.4, 1.9, 0.2], [5.5, 3.5, 1.3, 0.2], [5.4, 3.4, 1.7, 0.2], [5.4, 3.4, 1.5, 0.4], [5.4, 3.9, 1.7, 0.4], [5.7, 3.8, 1.7, 0.3], [5.4, 3.9, 1.3, 0.4], [5.2, 4.1, 1.5, 0.1], [

### Complete Linkage

In [178]:
clusterList = agglomerative_clustering(dataset, 3, "complete")
print_cluster(clusterList)

# list target cluster
print("\nTarget List")
target = get_target_cluster(dataset, 3, "complete")
print(target)

cluster: 0
jumlah anggota: 50
[[5.1, 3.5, 1.4, 0.2], [5.1, 3.5, 1.4, 0.3], [5.0, 3.5, 1.3, 0.3], [5.0, 3.4, 1.5, 0.2], [5.1, 3.4, 1.5, 0.2], [5.0, 3.3, 1.4, 0.2], [5.2, 3.5, 1.5, 0.2], [5.2, 3.4, 1.4, 0.2], [5.0, 3.6, 1.4, 0.2], [4.9, 3.6, 1.4, 0.1], [5.0, 3.2, 1.2, 0.2], [4.9, 3.0, 1.4, 0.2], [4.8, 3.0, 1.4, 0.3], [4.8, 3.0, 1.4, 0.1], [4.9, 3.1, 1.5, 0.1], [4.9, 3.1, 1.5, 0.2], [5.0, 3.0, 1.6, 0.2], [4.7, 3.2, 1.6, 0.2], [4.8, 3.1, 1.6, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [4.6, 3.2, 1.4, 0.2], [4.4, 3.2, 1.3, 0.2], [4.6, 3.4, 1.4, 0.3], [4.4, 2.9, 1.4, 0.2], [4.4, 3.0, 1.3, 0.2], [4.3, 3.0, 1.1, 0.1], [4.6, 3.6, 1.0, 0.2], [4.5, 2.3, 1.3, 0.3], [5.4, 3.9, 1.7, 0.4], [5.7, 3.8, 1.7, 0.3], [5.4, 3.7, 1.5, 0.2], [5.3, 3.7, 1.5, 0.2], [5.5, 3.5, 1.3, 0.2], [5.4, 3.4, 1.7, 0.2], [5.4, 3.4, 1.5, 0.4], [4.8, 3.4, 1.6, 0.2], [4.8, 3.4, 1.9, 0.2], [5.1, 3.3, 1.7, 0.5], [5.0, 3.4, 1.6, 0.4], [5.0, 3.5, 1.6, 0.6], [5.1, 3.8, 1.5, 0.3], [5.1, 3.7, 1.5, 0.4], [5.1, 3.8, 1.6, 0.2], [

### Average Linkage

In [179]:
clusterList = agglomerative_clustering(dataset, 3, "average")
print_cluster(clusterList)

# list target cluster
print("\nTarget List")
target = get_target_cluster(dataset, 3, "average")
print(target)

cluster: 0
jumlah anggota: 50
[[5.4, 3.7, 1.5, 0.2], [5.3, 3.7, 1.5, 0.2], [5.1, 3.8, 1.5, 0.3], [5.1, 3.7, 1.5, 0.4], [5.1, 3.8, 1.6, 0.2], [5.4, 3.9, 1.3, 0.4], [5.4, 3.4, 1.7, 0.2], [5.4, 3.4, 1.5, 0.4], [5.5, 3.5, 1.3, 0.2], [5.1, 3.3, 1.7, 0.5], [5.0, 3.4, 1.6, 0.4], [5.0, 3.5, 1.6, 0.6], [5.2, 4.1, 1.5, 0.1], [5.5, 4.2, 1.4, 0.2], [4.6, 3.4, 1.4, 0.3], [5.0, 3.6, 1.4, 0.2], [4.9, 3.6, 1.4, 0.1], [4.8, 3.4, 1.6, 0.2], [4.8, 3.4, 1.9, 0.2], [5.4, 3.9, 1.7, 0.4], [5.1, 3.5, 1.4, 0.2], [5.1, 3.5, 1.4, 0.3], [5.0, 3.5, 1.3, 0.3], [5.2, 3.5, 1.5, 0.2], [5.2, 3.4, 1.4, 0.2], [5.0, 3.4, 1.5, 0.2], [5.1, 3.4, 1.5, 0.2], [5.0, 3.3, 1.4, 0.2], [5.1, 3.8, 1.9, 0.4], [5.8, 4.0, 1.2, 0.2], [5.7, 4.4, 1.5, 0.4], [5.7, 3.8, 1.7, 0.3], [4.9, 3.0, 1.4, 0.2], [4.8, 3.0, 1.4, 0.3], [4.8, 3.0, 1.4, 0.1], [4.9, 3.1, 1.5, 0.1], [4.9, 3.1, 1.5, 0.2], [5.0, 3.0, 1.6, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [4.6, 3.2, 1.4, 0.2], [4.7, 3.2, 1.6, 0.2], [4.8, 3.1, 1.6, 0.2], [5.0, 3.2, 1.2, 0.2], [

### Average Group Linkage

In [180]:
clusterList = agglomerative_clustering(dataset, 3, "average_group")
print_cluster(clusterList)

# list target cluster
print("\nTarget List")
target = get_target_cluster(dataset, 3, "average_group")
print(target)

cluster: 0
jumlah anggota: 50
[[5.1, 3.5, 1.4, 0.2], [5.1, 3.5, 1.4, 0.3], [5.2, 3.5, 1.5, 0.2], [5.2, 3.4, 1.4, 0.2], [5.0, 3.4, 1.5, 0.2], [5.1, 3.4, 1.5, 0.2], [5.0, 3.3, 1.4, 0.2], [5.0, 3.5, 1.3, 0.3], [5.0, 3.6, 1.4, 0.2], [4.9, 3.6, 1.4, 0.1], [5.4, 3.7, 1.5, 0.2], [5.3, 3.7, 1.5, 0.2], [5.1, 3.8, 1.5, 0.3], [5.1, 3.7, 1.5, 0.4], [5.1, 3.8, 1.6, 0.2], [5.4, 3.4, 1.7, 0.2], [5.4, 3.4, 1.5, 0.4], [5.5, 3.5, 1.3, 0.2], [5.1, 3.3, 1.7, 0.5], [5.0, 3.4, 1.6, 0.4], [5.0, 3.5, 1.6, 0.6], [5.1, 3.8, 1.9, 0.4], [4.9, 3.0, 1.4, 0.2], [4.8, 3.0, 1.4, 0.3], [4.8, 3.0, 1.4, 0.1], [4.9, 3.1, 1.5, 0.1], [4.9, 3.1, 1.5, 0.2], [5.0, 3.0, 1.6, 0.2], [4.7, 3.2, 1.6, 0.2], [4.8, 3.1, 1.6, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [4.6, 3.2, 1.4, 0.2], [4.6, 3.4, 1.4, 0.3], [5.0, 3.2, 1.2, 0.2], [4.8, 3.4, 1.6, 0.2], [4.8, 3.4, 1.9, 0.2], [4.4, 2.9, 1.4, 0.2], [4.4, 3.0, 1.3, 0.2], [4.4, 3.2, 1.3, 0.2], [4.3, 3.0, 1.1, 0.1], [4.6, 3.6, 1.0, 0.2], [5.4, 3.9, 1.7, 0.4], [5.7, 3.8, 1.7, 0.3], [