# Fuzzy C-Means Clustering

In [6]:
# Library imports
import numpy as np
import pandas as pd
import math

In [18]:
# Data imports
data1 = pd.read_csv("data/data1.csv", header=None)
data2 = pd.read_csv("data/data2.csv", header=None)
data3 = pd.read_csv("data/data3.csv", header=None)
data4 = pd.read_csv("data/data4.csv", header=None)

In [8]:
## Definition of constants
CONST_M = 1.4
CONST_CLUSTERING_ITERATION_NUMBER = 100

In [77]:
## Definition of general fuctions

def distance(point1, point2, to_power2=False):
    assert len(point1) == len(point2), "Points dimentions are different."
    
    dist = 0;
    for i in range(len(point1)):
        dist = dist + (point1[i] - point2[i])**2
        
    return dist if to_power2 else math.sqrt(dist)


def choose_random(data, number_of_samples):
    sample_dataframe = data.sample(n = number_of_samples)
    return sample_dataframe.iloc[:,:-1] # cut the last column
    
def update_cluster_values(data, centroids):
    for data_index, data_row in data.iterrows():
        point = tuple(data_row[:-1]) # cut the last column and convert to tuple
                
        distance_sum_inverse = 0
        for center_tuple in centroids:
            distance_sum_inverse = distance_sum_inverse + (1 / (distance(point,center_tuple,to_power2=True))**(1/(CONST_M-1)) )
            
        belonging_value_to_clusters = []
        for center in centroids:
            numerator = 1 / (distance(point,center,to_power2=True))**(1/(CONST_M-1))    
            belonging_value_to_clusters.append(numerator/distance_sum_inverse)
      
        data.at[data_index,'fuzzy_cluster'] = belonging_value_to_clusters
        
    
def update_centers(data, centroids):
    pass

In [78]:
def fuzzy_C_means(input_data, clusters_number):
    
    centroids = [ tuple(d[1]) for d in choose_random(input_data, clusters_number).iterrows()]
    input_data['fuzzy_cluster'] = [[]] * len(input_data)
        
    for iteration in range(CONST_CLUSTERING_ITERATION_NUMBER):
        update_cluster_values(input_data, centroids)
        update_centers(input_data, centroids)
        
    print(input_data)
        
        
fuzzy_C_means(data1, 2)

              0           1                              fuzzy_cluster
0    232.061376  359.002631  [0.9404177479242578, 0.05958225207574213]
1    174.883332  310.063063                                         []
2    229.630561  295.897638                                         []
3    186.194687  288.962504                                         []
4    198.785931  286.676203                                         []
..          ...         ...                                        ...
490  574.853762  167.548293                                         []
491  568.949373  170.252091                                         []
492  581.590653  141.458266                                         []
493  518.868724  153.304693                                         []
494  558.484666  141.331815                                         []

[495 rows x 3 columns]
