In [16]:
import numpy as np
import pandas as pd


def zero_list(size):
    return [0 for x in range(size)]


def zero_matrix(row, col):
    return [[0 for _ in range(col)] for _ in range(row)]


def rand_list_total_val(size, total):
    rand_list = (np.random.dirichlet(np.ones(size), size=total)).tolist()[0]
    return [x for x in rand_list]


def rand_matrix(row, col, maxValuePerRow=1):
    return [rand_list_total_val(col, maxValuePerRow) for _ in range(row)]


In [17]:
def count_miu_exp(rand_member):
    global CLUSTER_COUNT, EXPONENT

    row_len = len(rand_member)
    col_len = len(rand_member[0])

    miu_exp = zero_matrix(row_len, col_len)
    miu_exp_col_total = zero_list(CLUSTER_COUNT)

    for i in range(row_len):
        for j in range(col_len):
            miu_exp[i][j] = rand_member[i][j] ** EXPONENT
            miu_exp_col_total[j] = miu_exp_col_total[j] + miu_exp[i][j]

    return miu_exp, miu_exp_col_total


In [18]:
def count_data_times_miu_exp(data, miu_exp):
    global CLUSTER_COUNT

    data_row_len = len(data)
    data_col_len = len(data[0]) - 1

    data_miu = [
        zero_matrix(data_row_len, data_col_len) for _ in range(CLUSTER_COUNT)
    ]

    data_miu_col_total = zero_matrix(CLUSTER_COUNT, data_col_len)

    for i in range(len(miu_exp[0])):
        for j in range(data_row_len):
            for k in range(data_col_len):
                data_miu[i][j][k] = data[j][k + 1] * miu_exp[j][i]
                data_miu_col_total[i][k] = data_miu_col_total[i][k] + \
                    data_miu[i][j][k]

    return data_miu, data_miu_col_total


In [19]:
def count_cluster_center(data_miu_col_total, miu_exp_col_total):
    global data, CLUSTER_COUNT

    col_len = len(data[0]) - 1
    cluster_center = zero_matrix(CLUSTER_COUNT, col_len)

    for i in range(len(data_miu_col_total)):
        for j in range(len(data_miu_col_total[0])):
            cluster_center[i][j] = data_miu_col_total[i][j] / \
                miu_exp_col_total[i]

    return cluster_center


In [20]:
def count_obj_function(data, cluster_center, miu_exponent):
    global CLUSTER_COUNT
    """
    cc = cluster_center

    l1[0] = ((data[0][0] - cc[0][0]) + (data[0][1] - cc[0][1]) + ... + (data[0][n] - cc[0][n]) * miu_exponent[0][0] 
    l1[1] = ((data[1][0] - cc[1][0]) + (data[1][1] - cc[1][1]) + ... + (data[1][n] - cc[1][n]) * miu_exponent[1][0] 
    .
    .
    l1[n] = ((data[n][0] - cc[n][0]) + (data[n][1] - cc[n][1]) + ... + (data[n][n] - cc[n][n]) * miu_exponent[n][0] 

                        obj_function
    |   l1      |    l2     |    l3     |   .....   |    ln     |
    -------------------------------------------------------------
    |   l1[0]   |   .....   |   .....   |   .....   |   .....   |
    |   l1[1]   |   .....   |   .....   |   .....   |   .....   |
    |   .....   |   .....   |   .....   |   .....   |   .....   |
    |   l1[n]   |   .....   |   .....   |   .....   |   .....   |
    """

    obj_function = zero_matrix(len(data), CLUSTER_COUNT)

    for i in range(len(cluster_center)):
        for j in range(len(miu_exponent)):
            total = 0.0
            for k in range(1, len(data[0])):
                total += (data[j][k] - cluster_center[i][k - 1]) ** EXPONENT
            obj_function[j][i] = total * miu_exponent[j][i]

    return obj_function


In [21]:
def count_obj_function_total(obj_function):
    obj_func_len = len(obj_function)

    all_total = 0.0
    row_total = zero_list(obj_func_len)

    for i in range(obj_func_len):
        for col in obj_function[i]:
            row_total[i] = row_total[i] + col
            all_total += col

    return all_total, row_total


In [22]:
def count_deviation(obj_function_total, last_obj_function_total):
    return abs(obj_function_total - last_obj_function_total)


In [23]:
def count_partition_u(data, cluster_center):
    global CLUSTER_COUNT, EXPONENT
    PARTITION_CONSTANT = (-1 / (2 - 1))

    partition_u = zero_matrix(len(data), CLUSTER_COUNT)
    partition_u_total_row = zero_list(len(data))

    for i, cluster_row in enumerate(cluster_center):
        for j, data_row in enumerate(data):
            total = 0.0
            for k, data_col in enumerate(data_row[1:]):
                total += (data_col - cluster_row[k]) ** EXPONENT

            partition_u[j][i] = total ** PARTITION_CONSTANT
            partition_u_total_row[j] = partition_u_total_row[j] + \
                partition_u[j][i]
            partition_u[j][i]

    return partition_u, partition_u_total_row


In [24]:
def generate_new_member(partition_u, partition_u_total_row):
    new_member = zero_matrix(len(partition_u), len(partition_u[0]))

    for i, row in enumerate(partition_u):
        for j, col in enumerate(row):
            new_member[i][j] = col / partition_u_total_row[i]

    return new_member


In [25]:
def pick_cluster_from_partition(matrix_partition_u):
    BEST_CLUSTER_ROW = 3
    # name, best cluster value, best cluster index

    best_clusters = zero_matrix(len(data), BEST_CLUSTER_ROW)

    for i in range(len(matrix_partition_u)):
        best_index = 0
        best_val = -1
        for j, partition in enumerate(matrix_partition_u[i]):
            best_val = max(best_val, partition)
            best_index = j if best_val == partition else best_index

        best_clusters[i] = (data[i][0], best_val, best_index + 1)

    return best_clusters


In [26]:
"""
Program starts here
"""

# example data from
# https://www.youtube.com/watch?v=7b7vVhSaFFs&t=962s

CLUSTER_COUNT = 3
EXPONENT = 2
MAX_ITERATION = 100
EPSILON = 10 ** -4

example_data = [
    ['A', 1, 3],
    ['B', 3, 3],
    ['C', 4, 3],
    ['D', 5, 3],
    ['E', 1, 2],
    ['F', 4, 2],
    ['G', 1, 1],
    ['H', 2, 1],
    ['I', 5, 2],
    ['J', 2, 4]
]

example_member = [
    [0.29, 0.50, 0.21],
    [0.74, 0.11, 0.14],
    [0.02, 0.82, 0.15],
    [0.56, 0.02, 0.41],
    [0.23, 0.15, 0.62],
    [0.26, 0.52, 0.22],
    [0.68, 0.31, 0.01],
    [0.85, 0.12, 0.03],
    [0.19, 0.39, 0.42],
    [0.46, 0.34, 0.19]
]


In [27]:
# data from
# http://repository.ub.ac.id/id/eprint/3800/41/Skripsi%20Full%20Eka.pdf

CLUSTER_COUNT = 3
EXPONENT = 2
MAX_ITERATION = 100
EPSILON = 0.01

skripsi_data = [
    ['A ADMIN', 9, 4, 0, 32, 7, 23],
    ['ACA CELL', 8, 1, 1, 28, 3, 18],
    ['ADE CELL', 21, 23, 36, 33, 0, 15],
    ['AMANDA 1', 105, 14, 1, 52, 6, 9],
    ['AMANDA 2', 34, 4, 4, 16, 0, 9],
    ['ANAS RULI', 87, 13, 6, 72, 2, 3],
    ['ANOENG CELL', 54, 11, 2, 16, 0, 0],
    ['AREMA CELL', 37, 29, 0, 79, 1, 74],
    ['ARF DION', 52, 6, 2, 78, 7, 18],
    ['ARLIS CELL', 8, 2, 0, 44, 0, 10],
    ['ASSYIFA CELL', 24, 18, 2, 116, 4, 43],
    ['BANG IPUL', 32, 2, 5, 12, 2, 3],
    ['BELLA', 3, 1, 1, 40, 12, 11],
    ['BELLA CELL', 6, 0, 0, 31, 6, 7],
    ['BIMA CELL', 47, 8, 1, 98, 0, 59],
    ['BU JUNIK', 28, 2, 0, 101, 1, 22],
    ['CITRA CELL', 39, 9, 0, 6, 9, 10],
    ['DENIS CELL', 34, 39, 0, 149, 4, 91],
    ['DESY CELL', 5, 6, 0, 73, 5, 6],
    ['DIDIK P', 29, 28, 1, 78, 1, 34],
    ['DIEZERT', 7, 1, 0, 16, 12, 14],
    ['DIFA CELL', 2, 0, 0, 3, 0, 0],
    ['ELSA CELL', 7, 0, 0, 27, 0, 0],
    ['F12 COM', 6, 2, 0, 17, 1, 13],
    ['FANI CELL', 24, 1, 7, 61, 1, 25],
    ['HENY PUPUT CELL', 3, 4, 0, 36, 0, 2],
    ['HUTAMA CELL', 7, 1, 2, 16, 0, 9],
    ['ILLA CELL', 25, 0, 0, 21, 1, 4],
    ['INDRA PULSA', 84, 32, 9, 256, 16, 72],
    ['JONS CELL', 85, 28, 0, 203, 18, 44],
    ['KIBAL CELL', 35, 33, 16, 64, 12, 41],
    ['LIA CELL', 23, 18, 12, 57, 5, 7],
    ['LISA CELL', 12, 29, 0, 37, 1, 13],
    ['NING CELL', 94, 17, 4, 226, 21, 34],
    ['NOVINDA', 30, 1, 7, 45, 3, 51],
    ['NURHAYATI', 24, 16, 1, 158, 9, 19],
    ['PUTRI CELL', 83, 19, 0, 30, 1, 16],
    ['RITA CELL', 8, 2, 4, 19, 1, 10],
    ['SAIFUL ARIFIN', 0, 1, 0, 0, 1, 2],
    ['SUKESI', 9, 0, 0, 17, 1, 0],
    ['SULIKAH', 44, 1, 3, 18, 3, 19],
    ['SYIFA CELL', 1, 2, 1, 24, 0, 8],
    ['TOMEN CELL', 42, 15, 2, 40, 2, 5],
    ['YANTO', 9, 3, 0, 46, 3, 7],
    ['YAZID', 6, 7, 0, 30, 2, 11],
    ['YENIS', 3, 0, 0, 2, 0, 9],
    ['YULIA', 3, 0, 0, 5, 0, 5],
    ['YUYUN CELL', 60, 24, 0, 100, 23, 15],
]

data_df = pd.DataFrame(skripsi_data, columns=[
                       'Nama', 'ISAT', 'PLN', 'SF', 'TSEL', 'TRI', 'XL'])
data_df.head(48)
data_df.head(48)


Unnamed: 0,Nama,ISAT,PLN,SF,TSEL,TRI,XL
0,A ADMIN,9,4,0,32,7,23
1,ACA CELL,8,1,1,28,3,18
2,ADE CELL,21,23,36,33,0,15
3,AMANDA 1,105,14,1,52,6,9
4,AMANDA 2,34,4,4,16,0,9
5,ANAS RULI,87,13,6,72,2,3
6,ANOENG CELL,54,11,2,16,0,0
7,AREMA CELL,37,29,0,79,1,74
8,ARF DION,52,6,2,78,7,18
9,ARLIS CELL,8,2,0,44,0,10


In [28]:
skripsi_member = [
    [0.1818, 0.3636, 0.4545],
    [0.2619, 0.4762, 0.2619],
    [0.3261, 0.4348, 0.2391],
    [0.5000, 0.3056, 0.1944],
    [0.2414, 0.6897, 0.069],
    [0.6667, 0.1429, 0.1905],
    [0.8333, 0.0833, 0.0833],
    [0.1053, 0.3684, 0.5263],
    [0.4800, 0.4800, 0.0400],
    [0.2308, 0.2821, 0.4872],
    [0.1795, 0.4872, 0.3333],
    [0.3333, 0.6111, 0.0556],
    [0.6154, 0.3462, 0.0385],
    [0.5556, 0.3056, 0.1389],
    [0.1379, 0.5172, 0.3448],
    [0.8, 0.05, 0.15],
    [0.1818, 0.7727, 0.0455],
    [0.5263, 0.0789, 0.3947],
    [0.4857, 0.2857, 0.2286],
    [0.2903, 0.1935, 0.5161],
    [0.4688, 0.25, 0.2813],
    [0.303, 0.4242, 0.2727],
    [0.7727, 0.1818, 0.0455],
    [0.5333, 0.4, 0.0667],
    [0.25, 0.5, 0.25],
    [0.4286, 0.1429, 0.4286],
    [0.6129, 0.2258, 0.1613],
    [0.1026, 0.4615, 0.4359],
    [0.2432, 0.2703, 0.4865],
    [0.3409, 0.2045, 0.4545],
    [0.0667, 0.2667, 0.6667],
    [0.4318, 0.4545, 0.1136],
    [0.5333, 0.3, 0.1667],
    [0.3, 0.425, 0.275],
    [0.1212, 0.3939, 0.4848],
    [0.4, 0.32, 0.28],
    [0.2821, 0.4615, 0.2564],
    [0.4082, 0.3265, 0.2653],
    [0.383, 0.2128, 0.4043],
    [0.4878, 0.4146, 0.0976],
    [0.2979, 0.383, 0.3191],
    [0.3, 0.3, 0.4],
    [0.5429, 0.2, 0.2571],
    [0.25, 0.3, 0.45],
    [0.1563, 0.5938, 0.25],
    [0.2963, 0.5556, 0.1481],
    [0.4167, 0.2222, 0.3611],
    [0.3, 0.375, 0.325]
]


In [29]:
data = skripsi_data
member = skripsi_member

last_obj_function_total = 0.0
total_iteration = 0

for i in range(MAX_ITERATION):
    total_iteration = i + 1

    miu_exp, miu_exp_col_total = count_miu_exp(member)
    data_miu, data_miu_col_total = count_data_times_miu_exp(data, miu_exp)
    cluster_center = count_cluster_center(
        data_miu_col_total, miu_exp_col_total)

    obj_function = count_obj_function(data, cluster_center, miu_exp)
    obj_function_total, obj_function_row_total = count_obj_function_total(
        obj_function)

    deviation = count_deviation(obj_function_total, last_obj_function_total)
    if deviation <= EPSILON:
        break

    last_obj_function_total = obj_function_total

    matrix_partition_u, matrix_partition_u_total_row = count_partition_u(
        data, cluster_center)
    member = generate_new_member(
        matrix_partition_u, matrix_partition_u_total_row)


In [30]:
best_clusters = pick_cluster_from_partition(matrix_partition_u)
df = pd.DataFrame(best_clusters, columns=[
                  'name', 'cluster_value', 'cluster_index'])

print(f'total iteration: {total_iteration}')
df.head(48)


total iteration: 21


Unnamed: 0,name,cluster_value,cluster_index
0,A ADMIN,0.003255,2
1,ACA CELL,0.006911,2
2,ADE CELL,0.000591,2
3,AMANDA 1,0.00019,1
4,AMANDA 2,0.00217,2
5,ANAS RULI,0.000347,1
6,ANOENG CELL,0.000564,2
7,AREMA CELL,0.000476,1
8,ARF DION,0.002692,1
9,ARLIS CELL,0.002214,2
