In [1]:
import numpy as np
import pandas as pd
import random
from sklearn.metrics import davies_bouldin_score,silhouette_score
import tensorflow as tf
from scipy.spatial import distance

In [3]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
def initialization (rows,c,state):
  random.seed(state)
  return np.random.dirichlet(np.ones(c),size=rows) #random Dirichlet distribution (sum = 1)

In [5]:
def calc_centroid(X,wt,c,m,rows,features):
  centroids=np.zeros((c,features)) 
  for j in range(c):                              
    sum_of_num =np.zeros((1,features))
    for i in range(rows):           
      temp_sum = np.multiply(np.power(wt[i,j],m),X[i,:])      #sum of ((weight^m)*x)
      sum_of_num +=temp_sum  
    sum_of_denom = np.sum(np.power(wt[:,j],m))      #sum of (weight^m)
    centroids[j] = np.divide(sum_of_num,sum_of_denom)
  return centroids
      

In [6]:
def calc_mvm(X,c,rows,m,centroids,wt): #calculate membership value matrix
  for i in range(rows):
    temp_sum = 0
    for j in range(c):                      #formula of membership-value calculation used
      temp_sum += np.power(1/distance.euclidean(centroids[j,:],X[i,:]),2/(m-1))     
    for j in range(c):
      new_wt = np.power((1/distance.euclidean(centroids[j,:],X[i,:])),2/(m-1))/temp_sum    
      wt[i,j] = new_wt

In [14]:
def fuzzycmeans(X,c,iterations,m,state):
  features =len(X[0])  #no. of features (dimensions)
  rows = len(X)        #no. of rows
  wt = initialization(rows,c,state) #wt[i][j] means weight of ith data in jth cluster 
  old_centroids=calc_centroid(X,wt,c,m,rows,features)
  for itr in range(iterations-1):
      calc_mvm(X,c,rows,m,old_centroids,wt)
      new_centroids = calc_centroid(X,wt,c,m,rows,features)
      if np.all(np.less_equal((np.sum((new_centroids-old_centroids)**2))**0.5,np.full((c,1),0.0001))):
        return wt
      old_centroids= new_centroids
  return wt


In [15]:
with tf.device('/device:GPU:0'):
  sv=np.zeros((56,9))
  db=np.zeros((56,9))
  for i in range(1,57):
      print(i)
      df=pd.read_csv('/content/drive/My Drive/DM_assignment/'+str(i)+'.csv',header= None)
      X=np.array(df.iloc[:,0:-1])
      for j in range(2,11):
        wt = fuzzycmeans(X,j,300,2,0)
        labels=np.argmax(wt,axis=1)
        sv[i-1,j-2]=silhouette_score(X,labels)
        db[i-1,j-2]=davies_bouldin_score(X,labels)


1
2
3
4
5
6
7
8
9


In [None]:
df1=pd.DataFrame(sv)
df2=pd.DataFrame(db)
df1.to_csv('/content/drive/My Drive/DM_assignment_/svfcmeans.csv')
df2.to_csv('/content/drive/My Drive/DM_assignment_/dbfcmeans.csv')
