# Fuzzy C-Means Clustering

In [103]:
# Library imports
import numpy as np
import pandas as pd
import math
from functools import reduce

In [18]:
# Data imports
data1 = pd.read_csv("data/data1.csv", header=None)
data2 = pd.read_csv("data/data2.csv", header=None)
data3 = pd.read_csv("data/data3.csv", header=None)
data4 = pd.read_csv("data/data4.csv", header=None)

In [99]:
## Definition of constants
CONST_M = 1.4
CONST_CLUSTERING_ITERATION_NUMBER = 10

In [109]:
## Definition of general fuctions

def distance(point1, point2, to_power2=False):
    assert len(point1) == len(point2), "Points dimentions are different."
    
    dist = 0;
    for i in range(len(point1)):
        dist = dist + (point1[i] - point2[i])**2
        
    return dist if to_power2 else math.sqrt(dist)


def choose_random(data, number_of_samples):
    sample_dataframe = data.sample(n = number_of_samples)
    return sample_dataframe.iloc[:,:-1] # cut the last column

def product_with_tuple(input_scalar, input_tuple):
    return tuple([ val * input_scalar for val in input_tuple ])
    
def sum_of_tuples(tuple1, tuple2):
    return tuple([ x + y for x, y in zip(tuple1,tuple2)])
    
    
def update_cluster_values(data, centroids):
    for data_index, data_row in data.iterrows():
        point = tuple(data_row[:-1]) # cut the last column and convert to tuple
                
        distance_sum_inverse = 0
        for center_tuple in centroids:
            distance_sum_inverse = distance_sum_inverse + (1 / (distance(point,center_tuple,to_power2=True))**(1/(CONST_M-1)) )
            
        belonging_value_to_clusters = []
        for center in centroids:
            numerator = 1 / (distance(point,center,to_power2=True))**(1/(CONST_M-1))    
            belonging_value_to_clusters.append(numerator/distance_sum_inverse)
      
        data.at[data_index,'fuzzy_cluster'] = belonging_value_to_clusters
        
    
def calculate_and_get_new_centers(data, centroids):
    new_centers = []
    
    for center_index, center in enumerate(centroids):
        
        belonging_values_sum = 0
        for data_index, data_row in data.iterrows():
            belonging_values_sum = belonging_values_sum + (data_row['fuzzy_cluster'][center_index])**CONST_M
        
        share_of_each_data_in_center = []
        for data_index, data_row in data.iterrows():
            point_share_in_center = (data_row['fuzzy_cluster'][center_index]**CONST_M) / belonging_values_sum
            share_of_each_data_in_center.append(product_with_tuple(point_share_in_center, data_row[:-1]))
            
        cluster_center = reduce(lambda t1, t2: sum_of_tuples(t1, t2), share_of_each_data_in_center)
        
        new_centers.append(cluster_center)
        
    return new_centers
        
def get_cost(data, centroids):
    cost = 0
    for data_index, data_row in data.iterrows():
        for center_index, center in enumerate(centroids):
            cost = cost + ((data_row['fuzzy_cluster'][center_index]**CONST_M) * distance(data_row[:-1],center,to_power2=True))
    return cost


In [110]:
def fuzzy_C_means(input_data, clusters_number):
        
    input_data['fuzzy_cluster'] = [[]] * len(input_data)
    random_sample_dataframe = choose_random(input_data, clusters_number)
    centroids = [ tuple(d[1]) for d in random_sample_dataframe.iterrows()]
    input_data = input_data.drop(random_sample_dataframe.index) # delete selected rows because of adding additioanl comlplexity to clculation in fuctions (0 distance to itself)
        
    for iteration in range(CONST_CLUSTERING_ITERATION_NUMBER):
        update_cluster_values(input_data, centroids)
        centroids = calculate_and_get_new_centers(input_data, centroids)
        
    print(centroids)
    print(input_data)

    
fuzzy_C_means(data1, 3)
fuzzy_C_means(data2, 2)

[(207.1838192132928, 235.1024549737849), (457.36715906671986, 363.0208639548901), (480.33103368177, 115.60007771522862)]
              0           1                                      fuzzy_cluster
0    232.061376  359.002631  [0.9422358553890704, 0.05181475061331284, 0.00...
1    174.883332  310.063063  [0.9975885225712041, 0.0018326894759332574, 0....
2    229.630561  295.897638  [0.9980918324747375, 0.0015037036984425964, 0....
3    186.194687  288.962504  [0.999495951407451, 0.00036550869525729624, 0....
4    198.785931  286.676203  [0.9996283450450494, 0.0002717356675042768, 9....
..          ...         ...                                                ...
490  574.853762  167.548293  [0.0019930386014336116, 0.02393293919269849, 0...
491  568.949373  170.252091  [0.0018244537177110577, 0.02265906212606718, 0...
492  581.590653  141.458266  [0.001462666432135216, 0.012001817127226002, 0...
493  518.868724  153.304693  [0.00013831663865015775, 0.0009783594925629972...
494  558.4