<a href="https://colab.research.google.com/github/anu-shree-anil/Machine-learning-Algorithms/blob/main/Multi_Layer_Perceptron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
import math
from statistics import mean
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, classification_report


In [None]:
#loading the data
wine=datasets.load_wine()
print(wine.DESCR)
df=pd.DataFrame(data=np.c_[wine['data'],wine['target']],columns=wine['feature_names']+['target'])
X=pd.DataFrame(wine.data)
y=pd.DataFrame(wine.target)
print("ORIGINAL DATA:")
print(df)

#normalize the dataset
for column in X.columns:
    X[column] = (X[column] - X[column].min()) / (X[column].max() - X[column].min()) 


df=X.copy()
df['target']=y

#DATA PREPROCESSING- ONE HOT ENCODING
y = pd.get_dummies(df.target, prefix='target')
y = LabelBinarizer().fit_transform(df.target)
X=X.to_numpy()
X, y = shuffle(X, y)

print("After one hot encoding: \n",X,y)      

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

In [None]:
def MLP(X,y,learning_rate,epochs):
  
  X = np.insert(X, 0 , 1 , axis=1)
  l=learning_rate
  #weights for hidden layer
  W_hidden=[]
  for i in range(X.shape[1]):
    w=[]
    for j in range(X.shape[1]):
       w.append(0.1)
    W_hidden.append(w)   
 
  W_hidden= np.array(W_hidden)  
  
  #weights for output layer
  W_output=[]
  for i in range(3):
    w=[]
    for j in range(X.shape[1]+1):
       w.append(0.1)
    W_output.append(w)   
 
  W_output= np.array(W_output)  
  visited_index=[]
  index = random.randint(0, len(X)-1)
  epoch=1
  converged=True
  err_in=0
  err=0
  while converged:

    #input to hidden
    h=1/(1+np.exp(-np.dot(W_hidden,np.transpose(X[index]))))
    
    h1 = np.insert(h, 0 , 1)
    #hidden to output
    d=1/(1+np.exp(-np.dot(W_output,np.transpose(h1))))
    
    err_in= err_in+(1/2)*sum((d-y[index])**2)
    
    #local gradient error in output neuron
    grad_err_output=[]
    for i in range(len(d)):
      grad_err_output.append(d[i]*(1-d[i])*(y[index][i]-d[i]))


    #local gradient error in hidden neuron
    grad_err_hidden=[]
    for i in range(len(h)):
      grad_err_hidden.append(float(W_output[0][i]*grad_err_output[0] + W_output[1][i]*grad_err_output[1] + W_output[2][i]*grad_err_output[2])* (float)(h[i]) * (float)(1 - h[i]))

     #update weights between hidden and output
    for i in range(W_output.shape[0]):
        for j in range(W_output.shape[1]):
          W_output[i][j]=W_output[i][j]+(l*grad_err_output[i]*h1[j])  

    #update weights between input and hidden
    for i in range(W_hidden.shape[0]):
        for j in range(W_hidden.shape[1]):
          W_hidden[i][j]=W_hidden[i][j]+(l*grad_err_hidden[i]*X[index][j])      
    
    index = random.randint(0, len(X)-1)
    visited_index.append(index)
    
    if (len(set(visited_index))==len(X)):
      epoch=epoch+1
      d=1/(1+np.exp(-np.dot(W_output,np.transpose(h1))))
      err= err+(1/2)*sum((d-y[index])**2)
    
      if(abs(err-err_in)<0.001 or epoch==epochs):
        print("Successfully converged")
        print("No of epochs: ",epoch)
        converged=False
      else:
        visited_index=[]
        err_in=0
        err=0  

  return W_hidden , W_output , err

In [None]:
def predict(X, y ,w1, w2):
  
  X = np.insert(X, 0 , 1 , axis=1)
  Y_predict=np.zeros(len(y))
  Y=np.zeros(len(y))
  h=[]
  d=[]
  for i in range(len(y)):
     d1= 1/(1+np.exp(-(np.dot(w1,np.transpose(X[i])))))
     h.append(d1)

  for i in range(len(h)):
     h1 = np.insert(h[i], 0 , 1)
     d1= 1/(1+np.exp(-(np.dot(w2,np.transpose(h1)))))
     d.append(d1)   
 
  for i in range(len(d)):
     max_index= np.argmax(d[i], axis=0)
     Y_predict[i]=max_index

  for i in range(len(y)):
    max_index= np.argmax(y[i], axis=0)
    Y[i]=max_index    

  test_acc=accuracy_score(Y,Y_predict)  
  
  return test_acc,Y,Y_predict

In [None]:
#splitting the dataset

# In the first step we will split the data in training and remaining dataset
X_train, X_rem, y_train, y_rem = train_test_split(X,y, train_size=0.6)

# Now since we want the valid and test size to be equal (10% each of overall data). 
# we have to define valid_size=0.5 (that is 50% of remaining data)
test_size = 0.5
X_valid, X_test, y_valid, y_test = train_test_split(X_rem,y_rem, test_size=0.5)

#Training the model
w=MLP(X_train,y_train,0.2,70)
print("Accuracy for the test set: ",predict(X_test,y_test,w[0],w[1])[0]*100)

Successfully converged
No of epochs:  70
Accuracy for the test set:  97.22222222222221


In [None]:
k = 5
kf = KFold(n_splits=k, random_state=None) 
acc_score = []
 
for train_index , test_index in kf.split(X):
    X_train , X_test = X[train_index,:],X[test_index,:]
    y_train , y_test = y[train_index,:] , y[test_index,:]
     
    w=MLP(X_train,y_train,0.2,70)
     
    acc = predict(X_test , y_test,w[0],w[1])
    acc_score.append(acc[0]*100)
    confusion(acc[1],acc[2])
     
avg_acc_score = sum(acc_score)/k
 
print('accuracy of each fold - {}'.format(acc_score))
print('Avg accuracy : {}'.format(avg_acc_score))


Successfully converged
No of epochs:  70
Class-wise Accuracy:  [1. 1. 1.]
Class-wise Precision:  [1. 1. 1.]
Class-wise Recall:  [1. 1. 1.]
Successfully converged
No of epochs:  70
Class-wise Accuracy:  [1.         0.93333333 0.93333333]
Class-wise Precision:  [0.85714286 0.93333333 1.        ]
Class-wise Recall:  [1.         0.93333333 0.93333333]
Successfully converged
No of epochs:  70
Class-wise Accuracy:  [1. 1. 1.]
Class-wise Precision:  [1. 1. 1.]
Class-wise Recall:  [1. 1. 1.]
Successfully converged
No of epochs:  70
Class-wise Accuracy:  [0.93333333 1.         1.        ]
Class-wise Precision:  [1.         0.92307692 1.        ]
Class-wise Recall:  [0.93333333 1.         1.        ]
Successfully converged
No of epochs:  70
Class-wise Accuracy:  [1.         0.90909091 1.        ]
Class-wise Precision:  [1.         1.         0.91666667]
Class-wise Recall:  [1.         0.90909091 1.        ]
accuracy of each fold - [100.0, 94.44444444444444, 100.0, 97.14285714285714, 97.142857142

In [None]:
def confusion(Y,Y_predict):

  cm = confusion_matrix(Y, Y_predict) 
  p=precision_score(Y, Y_predict, average=None, zero_division=1)
  r=recall_score(Y, Y_predict, average=None, zero_division=1)
  
  #Now the normalize the diagonal entries

  cm = cm.astype('float') / cm.sum(axis=1) 
  print("Class-wise Accuracy: ",cm.diagonal())
  print("Class-wise Precision: ",p)
  print("Class-wise Recall: ",r)
 

In [None]:
#using inbuilt function
from sklearn.neural_network import MLPClassifier
#loading the data
wine=datasets.load_wine()
df=pd.DataFrame(data=np.c_[wine['data'],wine['target']],columns=wine['feature_names']+['target'])
X_1=pd.DataFrame(wine.data)
y_1=pd.DataFrame(wine.target)

#normalize the dataset
for column in X_1.columns:
    X_1[column] = (X_1[column] - X_1[column].min()) / (X_1[column].max() - X_1[column].min()) 

X_1=X_1.to_numpy()
y_1=y_1.to_numpy()
#splitting the dataset

# In the first step we will split the data in training and remaining dataset
X_train_1, X_rem, y_train_1, y_rem = train_test_split(X_1,y_1, train_size=0.6)

# Now since we want the valid and test size to be equal (10% each of overall data). 
# we have to define valid_size=0.5 (that is 50% of remaining data)
test_size = 0.5
X_valid_1, X_test_1, y_valid_1, y_test_1 = train_test_split(X_rem,y_rem, test_size=0.5)
clf = MLPClassifier(random_state=1, max_iter=700).fit(X_train_1, y_train_1.ravel())

print("Accuracy: ",clf.score(X_test_1, y_test_1)*100)

Accuracy:  100.0


In [None]:
k = 5
kf = KFold(n_splits=k, random_state=None,shuffle=True) 
acc_score = []
i=1
for train_index , test_index in kf.split(X_1):
    X_train_1 , X_test_1 = X_1[train_index],X_1[test_index]
    y_train_1 , y_test_1 = y_1[train_index] , y_1[test_index]
     
    clf = MLPClassifier(random_state=1, max_iter=700).fit(X_train_1, y_train_1.ravel())
    y_pred=clf.predict(X_test_1)
    acc_score.append(clf.score(X_test_1, y_test_1)*100)
    print("For ",i," fold: \n")
    confusion(y_test_1,y_pred)
    i=i+1 
avg_acc_score = sum(acc_score)/k

print('accuracy of each fold - {}'.format(acc_score))
print('Avg accuracy : {}'.format(avg_acc_score))

For  1  fold: 

Class-wise Accuracy:  [1. 1. 1.]
Class-wise Precision:  [1. 1. 1.]
Class-wise Recall:  [1. 1. 1.]
For  2  fold: 

Class-wise Accuracy:  [1.     0.9375 1.    ]
Class-wise Precision:  [0.93333333 1.         1.        ]
Class-wise Recall:  [1.     0.9375 1.    ]
For  3  fold: 

Class-wise Accuracy:  [0.90909091 0.90909091 0.92857143]
Class-wise Precision:  [1.         0.83333333 0.92857143]
Class-wise Recall:  [0.90909091 0.90909091 0.92857143]
For  4  fold: 

Class-wise Accuracy:  [1. 1. 1.]
Class-wise Precision:  [1. 1. 1.]
Class-wise Recall:  [1. 1. 1.]
For  5  fold: 

Class-wise Accuracy:  [1. 1. 1.]
Class-wise Precision:  [1. 1. 1.]
Class-wise Recall:  [1. 1. 1.]
accuracy of each fold - [100.0, 97.22222222222221, 91.66666666666666, 100.0, 100.0]
Avg accuracy : 97.77777777777779
