# Importing Necessary Libraries

In [8]:
# Datasets
import numpy as np
import pandas as pd
from copy import deepcopy

# Scikit Learn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from numpy.linalg import inv,pinv
from sklearn.cross_validation  import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Initializing Parameters

In [9]:
# Path of the folder
path = 'C:/Users/prash/Downloads/ML ALGORITHMS/'

# Number of clusters required
k = 3

# Number of Neurons in Hidden Layer
h = 5

# Importing and Cleaning Dataset

In [13]:
# Import Iris Dataset
iris_dataset = pd.read_csv(path + 'DATASETS/' + 'Iris.csv')
# Removing Index Column
iris_dataset = iris_dataset.iloc[:,1:]

# Input Dataframe
X = iris_dataset.iloc[:,:-1]
X = np.array(X)

# Encode the Output labels
Y = iris_dataset.iloc[:,-1]
for i in range(len(Y.unique())):
    Y = Y.replace(Y.unique()[i],i)
Y = np.array(Y)

# One Hot Encode the Output Labels
enc = OneHotEncoder()
Y = enc.fit_transform(Y.reshape(-1,1)).toarray()

# Normalize the Dataframe
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Divide into train and test datasets
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=0)

# Implementation From Scratch

In [14]:
# Euclidean Distance Function
def dist(a, b,ax=1):
    
    # Euclidian Distance
    distance = np.sqrt(np.sum((a - b)**2,axis=ax))
    
    return distance


def modified_kmeans(X,k):
    
    m = X.shape[0]
    n = X.shape[1]
    
    # Initialize Centroids randomly in C (will contain updated Centroids value)
    C = np.random.uniform(0, np.max(X), size=(k,X.shape[1]))

    # Store old Centroid values in C_old
    C_old = np.zeros(C.shape)

    # Initialize target clusters for each instance as 0
    clusters = np.zeros(X.shape[0])

    # Error - Distance between new centroids and old centroids
    error = dist(C, C_old,None)

    # Loop will run till the error becomes zero
    while error != 0:

        # Assigning each value to its closest cluster
        for i in range(X.shape[0]):
            distances = dist(C, X[i],1)
            cluster = np.argmin(distances)
            clusters[i] = cluster

        # Storing the old centroid values
        C_old = deepcopy(C)

        # Finding the new centroids by taking the mean of all the instances in that cluster
        for i in range(k):
            points = [X[j] for j in range(X.shape[0]) if clusters[j] == i]
            if(len(points) > 0):
                C[i] = np.mean(points, axis=0)

        # Calculating Error 
        error = dist(C, C_old,None)
    
    # Assigning a Cluster for each Instance/Row
    I = np.empty([1,m])
    for i in range(m):
        minim = dist(X[i,:],C[0,:],0)
        s=0
        for j in range(1,k):
            t = dist(X[i,:],C[j,:],0)
            if(t<minim):
                s = j
        I[0,i] = s
    
    # Calculating mu for each cluster
    mu = np.ones([k,n])-1
    count = np.ones([k,1])-1
    for i in range(m):
        for j in range(k):
            if(I[0,i]==j):
                mu[j,:] = mu[j,:]+X[i,:]
                count[j,0] = count[j,0]+1
    for i in range(k):
        if(count[i,0]!=0):
            mu[i,0] = mu[i,0]/count[i,0]

    # Calculating sigma for each cluster 
    sigma = np.ones([k,n])-1
    for i in range(m):
        for j in range(k):
            if(I[0,i]==j):
                sigma[j,:] = sigma[j,:] + np.square(X[i,:] - mu[j,:])
    for i in range(k):
        if(count[i,0]!=0):
            sigma[i,0] = sigma[i,0]/count[i,0]
    sigma = np.sqrt(sigma)
    
    # Calculating beta for each cluster
    beta = np.ones([k,n])-1
    for i in range(k):
        if(count[i,0]!=0):
            beta[i,:] = 1/(2 * np.square(sigma[i,:]))
     
    
    return beta,mu

In [19]:
# Calculate the Beta and Mu Parameters
beta,mu = modified_kmeans(X_train,h)
print('Obtained Beta Values are:\n\n',beta)
print('\nObtained Mu Values are:\n\n',mu)

# Remove unnecessary centroids 
mu_sum = np.sum(mu,axis=1)
for j in range(10):
    for i in range(mu_sum.size):
        if(mu_sum[i]==0):
            mu = np.delete(mu,i,0)
            beta = np.delete(beta,i,0)
            mu_sum=np.delete(mu_sum,i,0)
            h = h-1
            break


# Calculating the Values in Hidden Layer
m = X_train[:,0].size
H_train = np.empty([m,h])
for i in range(m):
    for j in range(h):
        H_train[i,j] = np.exp(-1*np.dot(beta[j],np.square(X_train[i] - mu[j])))

# Calculating the weights        
W = np.dot(pinv(H_train),Y_train)
print('\nThe Weights Obtained are:\n\n',W)


# Calculating the same for Test Values

m = X_test[:,0].size
H_test = np.empty([m,h])
for i in range(m):
    for j in range(h):
        H_test[i,j] = np.exp(-1*np.dot(beta[j],np.square(X_test[i] - mu[j])))
        
# Predicting the Clusters        
Y_pred = np.dot(H_test, W)
Y_pred = enc.fit_transform(Y_pred.argmax(axis=1).reshape(-1,1)).toarray()

# Finding the accuracy
print('\nThe Final Accuracy Obtained is:\n\n',accuracy_score(Y_test,Y_pred))

Obtained Beta Values are:

 [[2.06487064e+00 1.03519069e-05 8.93462496e-05 1.33238027e-04]
 [2.83196225e+00 1.72631401e-05 5.62765899e-06 5.97779790e-06]
 [1.26485918e+00 2.09626243e-04 7.15440541e-06 6.53039348e-06]]

Obtained Mu Values are:

 [[ -0.0991091  -36.11274674  12.28981595  10.06031167]
 [ -1.01866041  28.34223517 -49.66044019 -48.18396703]
 [  1.11690477   7.5853892   41.27161154  43.198512  ]]

The Weights Obtained are:

 [[ 0.02457132  0.65149213  0.064032  ]
 [ 1.00070152  0.01975333  0.00230499]
 [-0.0052479   0.19162741  0.92427988]]

The Final Accuracy Obtained is:

 0.6666666666666666


### References:
https://towardsdatascience.com/most-effective-way-to-implement-radial-basis-function-neural-network-for-classification-problem-33c467803319 <br>
https://mccormickml.com/2013/08/15/radial-basis-function-network-rbfn-tutorial/ <br>
