In [64]:
import numpy as np
import pandas as pd
import csv
import math
import random

In [65]:
def zero_list(size):
    return [0 for x in range(size)]


def zero_matrix(row, col):
    return [[0 for _ in range(col)] for _ in range(row)]


def rand_list_total_val(size, total):
    rand_list = (np.random.dirichlet(np.ones(size), size=total)).tolist()[0]
    return [x for x in rand_list]


def rand_matrix(row, col, maxValuePerRow=1):
    return [rand_list_total_val(col, maxValuePerRow) for _ in range(row)]


In [66]:
def count_miu_exp(rand_member):
    global CLUSTER_COUNT, EXPONENT

    row_len = len(rand_member)
    col_len = len(rand_member[0])

    miu_exp = zero_matrix(row_len, col_len)
    miu_exp_col_total = zero_list(CLUSTER_COUNT)

    for i in range(row_len):
        for j in range(col_len):
            miu_exp[i][j] = rand_member[i][j] ** EXPONENT
            miu_exp_col_total[j] = miu_exp_col_total[j] + miu_exp[i][j]

    return miu_exp, miu_exp_col_total

In [67]:
def count_data_times_miu_exp(data, miu_exp):
    global CLUSTER_COUNT

    data_row_len = len(data)
    data_col_len = len(data[0]) - 1

    data_miu = [
        zero_matrix(data_row_len, data_col_len) for _ in range(CLUSTER_COUNT)
    ]

    data_miu_col_total = zero_matrix(CLUSTER_COUNT, data_col_len)

    for i in range(len(miu_exp[0])):
        for j in range(data_row_len):
            for k in range(data_col_len):
                data_miu[i][j][k] = data[j][k + 1] * miu_exp[j][i]
                data_miu_col_total[i][k] = data_miu_col_total[i][k] + \
                    data_miu[i][j][k]

    return data_miu, data_miu_col_total

In [68]:
def count_cluster_center(data_miu_col_total, miu_exp_col_total):
    global data, CLUSTER_COUNT

    col_len = len(data[0]) - 1
    cluster_center = zero_matrix(CLUSTER_COUNT, col_len)

    for i in range(len(data_miu_col_total)):
        for j in range(len(data_miu_col_total[0])):
            cluster_center[i][j] = data_miu_col_total[i][j] / \
                miu_exp_col_total[i]

    return cluster_center

In [69]:
def count_obj_function(data, cluster_center, miu_exponent):
    global CLUSTER_COUNT
    obj_function = zero_matrix(len(data), CLUSTER_COUNT)

    for i in range(len(cluster_center)):
        for j in range(len(miu_exponent)):
            total = 0.0
            for k in range(1, len(data[0])):
                total += (data[j][k] - cluster_center[i][k - 1]) ** EXPONENT
            obj_function[j][i] = total * miu_exponent[j][i]

    return obj_function

In [70]:
def count_obj_function_total(obj_function):
    obj_func_len = len(obj_function)

    all_total = 0.0
    row_total = zero_list(obj_func_len)

    for i in range(obj_func_len):
        for col in obj_function[i]:
            row_total[i] = row_total[i] + col
            all_total += col

    return all_total, row_total

In [71]:
def count_deviation(obj_function_total, last_obj_function_total):
    return abs(obj_function_total - last_obj_function_total)

In [72]:
def count_partition_u(data, cluster_center):
    global CLUSTER_COUNT, EXPONENT
    PARTITION_CONSTANT = (-1 / (2 - 1))

    partition_u = zero_matrix(len(data), CLUSTER_COUNT)
    partition_u_total_row = zero_list(len(data))

    for i, cluster_row in enumerate(cluster_center):
        for j, data_row in enumerate(data):
            total = 0.0
            for k, data_col in enumerate(data_row[1:]):
                total += (data_col - cluster_row[k]) ** EXPONENT

            partition_u[j][i] = total ** PARTITION_CONSTANT
            partition_u_total_row[j] = partition_u_total_row[j] + \
                partition_u[j][i]
            partition_u[j][i]

    return partition_u, partition_u_total_row

In [73]:
def generate_new_member(partition_u, partition_u_total_row):
    new_member = zero_matrix(len(partition_u), len(partition_u[0]))

    for i, row in enumerate(partition_u):
        for j, col in enumerate(row):
            new_member[i][j] = col / partition_u_total_row[i]

    return new_member

In [74]:
def pick_cluster_from_partition(matrix_partition_u):
    BEST_CLUSTER_ROW = 2
    # name, best cluster value, best cluster index

    best_clusters = zero_matrix(len(data), BEST_CLUSTER_ROW)

    for i in range(len(matrix_partition_u)):
        best_index = 0
        best_val = -1
        for j, partition in enumerate(matrix_partition_u[i]):
            best_val = max(best_val, partition)
            best_index = j if best_val == partition else best_index

        best_clusters[i] = (data[i][0], best_val, best_index + 1)

    return best_clusters

In [75]:
CLUSTER_COUNT = 2
EXPONENT = 2
MAX_ITERATION = 100
EPSILON = 0.0001

In [76]:
file = open("data skripsiFIX.csv")
datax = csv.reader(file,delimiter=",")
datax = list(datax)
datax.pop(0)
data_df = pd.DataFrame(datax, columns=[
                       'Nama', 'Penghasilan', 'papan', 'sumber air minum', 'WC'])
data_df.head(48)

Unnamed: 0,Nama,Penghasilan,papan,sumber air minum,WC
0,SUWARDI,1.0,0.0,0,0
1,SUPARNI,1.0,0.6,0,0
2,WAGIMAN,1.0,0.6,1,0
3,HARJO SARNO,0.5,0.0,1,0
4,AHMAD SUPARNO,0.0,0.0,0,0
5,ASMO RIMIN,1.0,1.0,1,1
6,PAIMAN,1.0,0.0,1,0
7,SUMARTO WARNO,0.5,0.6,0,1
8,KARNI,1.0,0.0,1,0
9,ASMO SUPAR,0.5,0.6,1,0


In [77]:
file = open("data skripsiFIX1.csv")
skripsi_data = csv.reader(file,delimiter=",")
skripsi_data = list(skripsi_data)


In [78]:
skripsi_data = [x for x in skripsi_data[1:]]


In [79]:
skripsi_data = np.asarray(skripsi_data,dtype=float)
skripsi_data

array([[1. , 0. , 0. , 0. ],
       [1. , 0.6, 0. , 0. ],
       [1. , 0.6, 1. , 0. ],
       [0.5, 0. , 1. , 0. ],
       [0. , 0. , 0. , 0. ],
       [1. , 1. , 1. , 1. ],
       [1. , 0. , 1. , 0. ],
       [0.5, 0.6, 0. , 1. ],
       [1. , 0. , 1. , 0. ],
       [0.5, 0.6, 1. , 0. ],
       [0.5, 1. , 1. , 1. ],
       [0. , 0. , 0. , 0. ],
       [1. , 0.6, 0. , 0. ],
       [1. , 1. , 0. , 1. ],
       [0. , 0. , 0. , 0. ],
       [0.5, 0. , 0. , 0. ],
       [0.5, 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. ],
       [1. , 0. , 0. , 0. ],
       [0. , 0.6, 0. , 0. ],
       [1. , 0.6, 1. , 0. ],
       [0.5, 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. ],
       [1. , 0.6, 0. , 0. ],
       [0. , 0. , 0. , 0. ],
       [1. , 0.6, 0. , 0. ],
       [1. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. ],
       [0. , 0. , 1. , 0. ],
       [1. , 0

In [80]:
def matrikskeanggotaan():
    matriksu = list()
    for i in range(len(skripsi_data)):
        randomnumber = [random.random() for x in range(CLUSTER_COUNT)]
        jumlah = sum(randomnumber)
        temp = list()
        for x in randomnumber:
            temp.append(round(x/jumlah,3))
        matriksu.append(temp)
    return matriksu

In [81]:
skripsi_member = matrikskeanggotaan()

In [82]:
skripsi_member

[[0.015, 0.985],
 [0.543, 0.457],
 [0.215, 0.785],
 [0.731, 0.269],
 [0.318, 0.682],
 [0.358, 0.642],
 [0.46, 0.54],
 [0.111, 0.889],
 [0.59, 0.41],
 [0.902, 0.098],
 [0.525, 0.475],
 [0.8, 0.2],
 [0.481, 0.519],
 [0.818, 0.182],
 [0.508, 0.492],
 [0.53, 0.47],
 [0.516, 0.484],
 [0.771, 0.229],
 [0.4, 0.6],
 [0.446, 0.554],
 [0.489, 0.511],
 [0.448, 0.552],
 [0.371, 0.629],
 [0.852, 0.148],
 [0.69, 0.31],
 [0.865, 0.135],
 [0.756, 0.244],
 [0.567, 0.433],
 [0.618, 0.382],
 [0.547, 0.453],
 [0.533, 0.467],
 [0.621, 0.379],
 [0.456, 0.544],
 [0.762, 0.238],
 [0.367, 0.633],
 [0.57, 0.43],
 [0.288, 0.712],
 [0.269, 0.731],
 [0.214, 0.786],
 [0.0, 1.0],
 [0.657, 0.343],
 [0.458, 0.542],
 [0.288, 0.712],
 [0.609, 0.391],
 [0.468, 0.532],
 [0.423, 0.577],
 [0.365, 0.635],
 [0.214, 0.786],
 [0.047, 0.953],
 [0.502, 0.498],
 [0.61, 0.39],
 [0.681, 0.319],
 [0.312, 0.688],
 [0.591, 0.409],
 [0.494, 0.506],
 [0.296, 0.704],
 [1.0, 0.0],
 [0.82, 0.18],
 [0.862, 0.138],
 [0.03, 0.97],
 [0.918, 0.0

In [83]:
data = skripsi_data
member = skripsi_member

last_obj_function_total = 0.0
total_iteration = 0

for i in range(MAX_ITERATION):
    total_iteration = i + 1

    miu_exp, miu_exp_col_total = count_miu_exp(member)
    data_miu, data_miu_col_total = count_data_times_miu_exp(data, miu_exp)
    cluster_center = count_cluster_center(
        data_miu_col_total, miu_exp_col_total)

    obj_function = count_obj_function(data, cluster_center, miu_exp)
    obj_function_total, obj_function_row_total = count_obj_function_total(
        obj_function)

    deviation = count_deviation(obj_function_total, last_obj_function_total)
    if deviation <= EPSILON:
        break

    last_obj_function_total = obj_function_total

    matrix_partition_u, matrix_partition_u_total_row = count_partition_u(
        data, cluster_center)
    member = generate_new_member(
        matrix_partition_u, matrix_partition_u_total_row)

In [84]:
best_clusters = pick_cluster_from_partition(matrix_partition_u)
df = pd.DataFrame(best_clusters, columns=[
                  'name', 'cluster_value', 'cluster_index'])

print(f'total iteration: {total_iteration}')
df.head(50)

total iteration: 14


Unnamed: 0,name,cluster_value,cluster_index
0,1.0,65.605604,1
1,1.0,4.304835,1
2,1.0,9.448862,2
3,0.5,5.489341,2
4,0.0,65.605604,1
5,1.0,0.884132,2
6,1.0,5.489341,2
7,0.5,0.855319,1
8,1.0,5.489341,2
9,0.5,9.448862,2


In [63]:
df.tail(50)

Unnamed: 0,name,cluster_value,cluster_index
65,0.0,5.581896,1
66,1.0,64.548527,2
67,0.0,9.478796,1
68,0.0,4.322272,2
69,0.0,64.548527,2
70,0.0,64.548527,2
71,0.0,64.548527,2
72,0.5,64.548527,2
73,0.5,64.548527,2
74,0.5,4.322272,2
