In [36]:
# MLP with automatic validation set
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn import metrics
import time

In [37]:
def purity_score(y_true, y_pred):
    # compute contingency matrix (also called confusion matrix)
    contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred)
    # return purity
    return np.sum(np.amax(contingency_matrix, axis=0)) / np.sum(contingency_matrix) 

In [38]:
def purity_score_per_cluster(data, N):
    return (data['y'].value_counts().max() / N)

In [39]:
# load pima indians dataset
#dataset = numpy.loadtxt("D:/Neural_nets_course_Dataset/pima_indians_diabetes_data.csv", delimiter=",")
dataset = pd.read_csv('E:/Academic/Neural Networks/Assignment_2/iris.txt', delimiter=',', names = ['a','b','c','d','y'])

In [40]:
dataset.head()

Unnamed: 0,a,b,c,d,y
0,5.1,3.5,1.4,0.2,1
1,4.9,3.0,1.4,0.2,1
2,4.6,3.1,1.5,0.2,1
3,5.0,3.6,1.4,0.2,1
4,4.6,3.4,1.4,0.3,1


In [41]:
X = dataset.iloc[:,0:4].values
Y = dataset.iloc[:,4].values

In [56]:
weight_matrix = np.array([[0.80272936, 0.53990134, 0.24938941, 0.04401186], 
                         [0.70353431, 0.32379285, 0.59497289, 0.2149533], 
                         [0.62375013, 0.34441463, 0.17431373, 0.67965364]])

In [57]:
# l2-normalize the samples (rows).
X_normalized = preprocessing.normalize(X, norm='l2')
X_normalized = np.array(X_normalized)

In [58]:
cluster_indexes = []
alpha = 0.3
ephsilon = 0.32

start_time = time.time()
for i in range(500):
    cluster_indexes = []
    for index,iterator in enumerate(X_normalized):
        # to keep weighted summation for inputs
        out_neuron_vector = np.dot(weight_matrix, X_normalized[index])
        # to keep updated weighted summation including lateral inhibitory connections 
        out_neuron_vector_updated = np.zeros(out_neuron_vector.shape)
        # to keep lateral inhibitory connections summation for each output index
        lateral_inhibit_summation = np.zeros(out_neuron_vector.shape)
        for idx, itr in enumerate(out_neuron_vector):
            indexes_for_sum = [i for i in range(np.shape(out_neuron_vector)[0]) if i not in [idx]]
            lateral_inhibit_summation[idx] = sum(out_neuron_vector[i] for i in indexes_for_sum)
        # update after including lateral inhibitory connections
        out_neuron_vector_updated = np.add(out_neuron_vector, (-ephsilon * lateral_inhibit_summation))
        max_index = np.argmax(out_neuron_vector_updated)
        # add cluster value for each tuple
        cluster_indexes.append(max_index + 1)
        # retrieve Wk vector
        Wk = weight_matrix[max_index,:]
        # update the Wk vector
        pwx = alpha * np.subtract(X_normalized[index], Wk)
        Wk = np.add(Wk, pwx)
        # normalize Wk
        euclidean_norm = np.sqrt(sum(Wk**2))
        Wk = Wk / euclidean_norm
        # add Wk to original weight matrix location
        weight_matrix[max_index,:] = Wk
print("--- %s seconds ---" % (time.time() - start_time))

--- 1.981698751449585 seconds ---


In [59]:
np.unique(cluster_indexes)

array([1, 2], dtype=int64)

In [60]:
temp_dataset = dataset
temp_dataset['cluster'] = cluster_indexes

purity_score_cluster1 = 0.0
purity_score_cluster2 = 0.0
purity_score_cluster3 = 0.0

#purity score for cluster 1
cluster1_dataset = temp_dataset[temp_dataset['cluster'] == 1]
if len(cluster1_dataset) != 0:
    purity_score_cluster1 = purity_score_per_cluster(cluster1_dataset, len(dataset))

#purity score for cluster 2
cluster2_dataset = temp_dataset[temp_dataset['cluster'] == 2]
if len(cluster2_dataset) != 0:
    purity_score_cluster2 = purity_score_per_cluster(cluster2_dataset, len(dataset))

#purity score for cluster 3
cluster3_dataset = temp_dataset[temp_dataset['cluster'] == 3]
if len(cluster3_dataset) != 0:
    purity_score_cluster3 = purity_score_per_cluster(cluster3_dataset, len(dataset))

print('Purity score for cluster 1: ', purity_score_cluster1)
print('Purity score for cluster 2: ', purity_score_cluster2)
print('Purity score for cluster 3: ', purity_score_cluster3)
print('Total purity score: ', purity_score(Y, cluster_indexes))

Purity score for cluster 1:  0.3333333333333333
Purity score for cluster 2:  0.3333333333333333
Purity score for cluster 3:  0.0
Total purity score:  0.6666666666666666


In [9]:
out = np.array([1, 2 , 3])

In [10]:
arr = np.zeros(out.shape)
for index, it in enumerate(out):
    if index == 0:
        arr[index] = out[index + 1]

(3,)

In [11]:
inn = np.zeros(out.shape)

In [12]:
inn

array([0., 0., 0.])

In [34]:
out_neuron_vector = np.array([4, 1 , 2])
# to keep lateral inhibitory connections summation for each output index
lateral_inhibit_summation = np.zeros(out_neuron_vector.shape)
for idx, itr in enumerate(out_neuron_vector):
    indexes_for_sum = [i for i in range(np.shape(out_neuron_vector)[0]) if i not in [idx]]
    lateral_inhibit_summation[idx] = sum(out_neuron_vector[i] for i in indexes_for_sum)

In [35]:
lateral_inhibit_summation

array([3., 6., 5.])