In [0]:
import csv
import urllib2
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
# used for not displaying scientific e notation
np.set_printoptions(precision=8,suppress=True,linewidth=150)


In [0]:
def clean_data(data):
    data.pop(len(data)-1)  # remove last null item in data
    datac = []
    for item in data:
        del item[4]  # remove the class name
        item = np.array(item)
        item = item.astype(np.float)
        datac.append(item)
    return datac


def get_data(url):
    response = urllib2.urlopen(url)
    data_file = csv.reader(response)
    # print(type(data_file))
    data = []
    for row in data_file:
        data.append(row)
    return data


def init_particles(data, clusters, particles):
    swarm = []
    for i in range(0, particles):
        group = []
        for j in range(0, clusters):
            index = np.random.randint(0, len(data))
            centers = data[index]
            group.append(centers)
        swarm.append(np.array(group))
    return np.array(swarm)

    
  

In [0]:
def distance(data, centers, metric='euclidean'):
    """
    Euclidean distance from each point to each cluster center.
    Parameters
    ----------
    data : 2d array (N x Q)
        Data to be analyzed. There are N data points.
    centers : 2d array (C x Q)
        Cluster centers. There are C clusters, with Q features.
    metric: string
        By default is set to euclidean. Passes any option accepted by
        ``scipy.spatial.distance.cdist``.
    Returns
    -------
    dist : 2d array (N x C)
        Euclidean distance from each point, to each cluster center.
    """
    return cdist(data, centers, metric=metric)

"""
Distance Matrix
Output: P x N x C
P: Particle count
N: no.of Datapoints
C: no.of clusters

"""
def cal_distance_mat(data,particles):
    dist = np.zeros((particles.shape[0], data.shape[0], particles.shape[1]))
    for i in range(0, particles.shape[0]):
        dist[i] = distance(data, particles[i])
        # to avoid dision by zero exception, adding negligible value
        dist[i] = dist[i] + .00000001 
    return dist
        

In [0]:
"""
Membership Matrix
Output: P x N x C
P: Particle count
N: no.of Datapoints
C: no.of clusters

"""
def cal_membership_mat(dist, particles, m=2):
    mem_mat = np.zeros(dist.shape)
    exp = 2/(m-1)
    for p in range(0, particles.shape[0]):
        for i in range(0, dist.shape[1]):
            for j in range(0, dist.shape[2]):
                sum = 0.0
                for k in range(0, dist.shape[2]):
                    div = (dist[p][i][j]/dist[p][i][k])
                    sum = sum + pow(div,exp)
                ans = 1.0/sum
                mem_mat[p][i][j] = ans
    return mem_mat

In [0]:
"""
Centers Matrix
Output: P x C x D
P: Particle count
C: no.of clusters
D: Datapoints second dimension

"""
def update_centers(mem_mat,data,m=2):
    new_centers = np.zeros((mem_mat.shape[0],mem_mat.shape[2],data.shape[1]))
    for i in range(0,mem_mat.shape[0]):
        for j in range(0,mem_mat.shape[2]):
            num = np.zeros((1,data.shape[1]))
            denom = 0.0
            for k in range(0,data.shape[0]):
                p = pow(mem_mat[i][k][j],m)
                denom = denom + p
                num = num + p * data[k]
            new_centers[i][j] = np.array(num/denom)
    return np.array(new_centers)
    
  

In [0]:
def jmeasure(dist, mem_mat, m=2):
    jm = np.zeros(dist.shape[0])
    for p in range(0,dist.shape[0]):
        jm[p] = 0.0
        for i in range(0, dist.shape[1]):
            sum = 0.0
            for j in range(0, dist.shape[2]):
                sum = sum + (pow(mem_mat[p][i][j], m) * pow(dist[p][i][j], 2))
            jm[p] = jm[p] + sum
    return jm


In [0]:
def get_labels(cluster_membership):
    labels = []
    for p in cluster_membership:
        labels.append(np.argmax(p))
    return labels

In [218]:
!pip install scikit-fuzzy

import skfuzzy as fuzzy

def lib_fcm(data,cluster_count,m):
    cntr, u, u0, d, jm, p, fpc = fuzzy.cmeans(
        data=data.T, c=cluster_count, m=m, error=0.001, maxiter=1000, init=None, seed=None)
    return cntr,u.T,jm



In [212]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
m = 2
cluster_count = 3
particle_count = 20

data = get_data(url)
data = clean_data(data)
data = np.array(data)

particles = init_particles(data,cluster_count,particle_count)

print(particles.tolist())

[[[5.6, 2.9, 3.6, 1.3], [4.9, 3.1, 1.5, 0.1], [5.8, 2.7, 5.1, 1.9]], [[5.1, 3.4, 1.5, 0.2], [6.4, 3.1, 5.5, 1.8], [6.5, 3.0, 5.8, 2.2]], [[5.1, 3.4, 1.5, 0.2], [5.5, 2.4, 3.7, 1.0], [5.5, 2.6, 4.4, 1.2]], [[4.8, 3.4, 1.9, 0.2], [4.4, 3.2, 1.3, 0.2], [6.5, 3.0, 5.5, 1.8]], [[7.6, 3.0, 6.6, 2.1], [6.1, 3.0, 4.9, 1.8], [5.6, 2.9, 3.6, 1.3]], [[6.0, 3.4, 4.5, 1.6], [5.0, 3.4, 1.6, 0.4], [4.6, 3.2, 1.4, 0.2]], [[5.7, 2.9, 4.2, 1.3], [5.6, 2.9, 3.6, 1.3], [6.0, 2.7, 5.1, 1.6]], [[5.1, 3.5, 1.4, 0.2], [4.6, 3.1, 1.5, 0.2], [5.5, 4.2, 1.4, 0.2]], [[6.4, 3.2, 4.5, 1.5], [5.5, 2.3, 4.0, 1.3], [5.1, 3.5, 1.4, 0.3]], [[4.8, 3.0, 1.4, 0.3], [6.2, 2.8, 4.8, 1.8], [6.7, 3.3, 5.7, 2.5]], [[5.0, 3.6, 1.4, 0.2], [5.0, 3.6, 1.4, 0.2], [5.8, 2.8, 5.1, 2.4]], [[7.6, 3.0, 6.6, 2.1], [6.4, 3.2, 5.3, 2.3], [6.1, 2.8, 4.0, 1.3]], [[6.0, 3.4, 4.5, 1.6], [6.9, 3.1, 4.9, 1.5], [7.2, 3.6, 6.1, 2.5]], [[4.8, 3.4, 1.6, 0.2], [6.3, 2.8, 5.1, 1.5], [4.8, 3.4, 1.9, 0.2]], [[6.3, 2.5, 5.0, 1.9], [4.9, 3.0, 1.4, 0.2], [6

In [213]:
dist = cal_distance_mat(data,particles)
mem = cal_membership_mat(dist,particles,m)
jmo = jmeasure(dist,mem,m)

for i in range(0,1000):
    dist = cal_distance_mat(data,particles)
    mem = cal_membership_mat(dist,particles,m)
    jm = jmeasure(dist,mem,m)
    particles = update_centers(mem,data,m)
    
# for i in range(0, len(particles)):
#     print("Particle "+str(i+1)+": "+ str(particles[i].tolist())+"\n" +"JMeasure(old) : "+str(jmo[i])+"\n" +"JMeasure(new) : "+str(jm[i])+"\n")

min_jm = np.amin(jm)
min_jm_id = np.where(jm==min_jm)
bp_id = min_jm_id[0][0]

print("Particle(best) "+": "+ str(particles[bp_id].tolist())+"\n" +"JMeasure(old) : "+str(jmo[bp_id])+"\n" +"JMeasure(new) : "+str(jm[bp_id])+"\n")


Particle(best) : [[5.889199790337225, 2.7612349508229586, 4.364255127978977, 1.397446546763746], [5.003561368096462, 3.4030356673989006, 1.4850015644189853, 0.25154107484721255], [6.775118989795105, 3.0524309140434496, 5.646914423781907, 2.053608511504043]]
JMeasure(old) : 124.36661027966848
JMeasure(new) : 60.57595693585616



In [222]:
bp_mem = mem[bp_id] 
labels = get_labels(bp_mem)
print(labels)

cntr,u,j=lib_fcm(data,3,m)
print(get_labels(u))

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0]
