<a href="https://colab.research.google.com/github/anu-shree-anil/Machine-learning-Algorithms/blob/main/FCM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
import math
from statistics import mean
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, classification_report
from sklearn.metrics import silhouette_samples, silhouette_score


In [2]:
#loading the data
iris=datasets.load_iris()
print(iris.DESCR)
df=pd.DataFrame(data=np.c_[iris['data'],iris['target']],columns=iris['feature_names']+['target'])
X=pd.DataFrame(iris.data)
print("ORIGINAL DATA:")
print(df)

#normalize the dataset
for column in X.columns:
    X[column] = (X[column] - X[column].min()) / (X[column].max() - X[column].min()) 

df=X.copy()

print("Dataset without the labels: \n",df)
df=df.to_numpy()

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [3]:
def centroids(dataset,k):

  number_of_rows = dataset.shape[0]
  random_indices = np.random.choice(number_of_rows, size=k, replace=False)
  centroids = dataset[random_indices, :]

  return centroids 

def euclidean(x1,x2):
    square = np.square(x1 - x2)
    # Get the sum of the square
    sum_square = np.sum(square)
    distance = np.sqrt(sum_square)

    return distance

def sse_score(dataset,distance_data,cluster):
   sse=0
   for j in range(len(cluster)):
      error=0
      for i in range(len(distance_data)):
          if distance_data[i][-1]==j+1:
            error=error+euclidean(dataset[i],cluster[j])
      sse=sse+error      

   return sse

def distance_chart(dataset,cluster):
    distance_data=[]
    for i in range(len(dataset)):
      l=[]
      for j in range(len(cluster)):
        l.append(euclidean(dataset[i],cluster[j]))
    
      l.append(l.index(min(l))+1)
      distance_data.append(l)
    
    return distance_data

def labels(dataset,cluster):
    
    label=[]
    for i in range(len(dataset)):
      l=[]
      for j in range(len(cluster)):
        l.append(euclidean(dataset[i],cluster[j]))
    
      label.append(l.index(min(l))+1)
    
    return label

def FCM(dataset,k,epochs):
  epoch=0
  cluster=np.array(centroids(dataset,k))
  converged=True
  partition_matrix=[[0.1]*k]*len(dataset)

  # for i in range(len(dataset)):
  #   for j in range(k):
  #     partition_matrix[i][j]=1/k

  # to ensure that sum of all memership values  of a particilar pattern must be one
  for i in range(len(dataset)):
    sum = 0
    for j in range(k-1):
      partition_matrix[i][j] = random.uniform(0, 1-sum)
      sum += partition_matrix[i][j]
    partition_matrix[i][k-1] = 1 - sum


  m=2
  
  while converged:
    
    #updating the clusters
    print(cluster)
    new_cluster=[]
    new_cluster=cluster.copy()
    temp=[]
    for j in range(k):
      c=0
      d=0
      for i in range(len(dataset)):
        c+=(((partition_matrix[i][j])**m)*dataset[i])
        d+=(partition_matrix[i][j]**m)
      temp.append(c/d) 
    new_cluster=np.array(temp) 
    print(new_cluster) 
    

    for i in range(len(dataset)):
      for j in range(k):
        p=0
        for l in range(k):
          p=p+((euclidean(dataset[i],cluster[j])/euclidean(dataset[i],cluster[l]))**(2/(m-1)))
          partition_matrix[i][j]=1/p  
  
    
    if((new_cluster==cluster).all() or epoch>epochs):
        converged=False

  
    for j in range(k):
      cluster[j]=new_cluster[j].copy()
 
    epoch=epoch+1  
    
    
  return cluster,partition_matrix

fcm=FCM(df,3,300)
distance_data=distance_chart(df,fcm[0])
sse=sse_score(df,distance_data,fcm[0])

print("Updated partition matrix: ",fcm[1])
print("SSE: ",sse)

label=labels(df,fcm[0])
silhouette_avg = silhouette_score(df,label)
print("Silhouette Score: ",silhouette_avg)

[[0.19444444 0.54166667 0.06779661 0.04166667]
 [0.61111111 0.41666667 0.81355932 0.875     ]
 [0.94444444 0.25       1.         0.91666667]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.42



[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]
 [0.4287037  0.44055556 0.46745763 0.45805556]]
[[0.42

In [4]:
#using inbuilt functions
from sklearn.cluster import KMeans
Kmean = KMeans(n_clusters=3)
Kmean.fit(df)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=3, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

In [5]:
print(Kmean.inertia_)
cluster_labels = Kmean.fit_predict(df)
silhouette_avg = silhouette_score(df, cluster_labels)
print("Silhouette Score: ",silhouette_avg)

6.982216473785234
Silhouette Score:  0.5047687565398588
