# Particle Swarm Optimization (PSO) for ANN image recognition

In [3]:
import math, random
import pandas as pd
#from sklearn.datasets import load_files
import numpy as np
import matplotlib.pyplot as plt

from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import cross_val_score

import sys
sys.path.append(".")
from ImageData import *
from Functions import *

## from Part 4, Linear Classifiers accuracy were very low for example SGD gave 44% accuracy 
## the interesting part for me is to check if PSO will improve accuracy on Images Dataset to some reasonable accuracy

In [4]:
### Load Data
X,y = LoadData(type="Array",use_PCA="False")
### Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100, shuffle=True)

### Testing using SGD Classifier
SGD_classifier = SGDClassifier(max_iter=1000, tol=1e-3 , shuffle=True , random_state=100)
SGD_classifier.fit(X_train,y_train)

scores = cross_val_score(SGD_classifier, X_test, y_test)
print("Accuracy Score on Testing Dataset = %0.2f  with standard deviation = %0.2f" % (scores.mean(), scores.std()))

Accuracy Score on Testing Dataset = 0.42  with standard deviation = 0.02


# Initializing functions for weights and Vectors

In [7]:
import pandas as pd
import numpy as np
from scipy import special
import time
from numpy import array
from random import random
from random import uniform
from math import sin, sqrt
from scipy.special import xlogy , xlog1py

def Create_Weights_Bias_from_ParticleVector( weights, bias, pv):
    i=0
    for key,w in weights.items():
        weights[key]=pv[i:i + w.shape[0]*w.shape[1]].reshape(w.shape)
        i+=w.shape[0]*w.shape[1]

    for key,b in bias.items():
        bias[key]=pv[i:i + b.shape[0]*b.shape[1]].reshape(b.shape)
        i+=b.shape[0]*b.shape[1]
                       
    return weights, bias

def Create_Particles_Vector( weights, bias):
    pv=[]
    for key in weights:
        for array in weights[key]:
            for val in array:
                pv.append(val)

    for key in bias:
        for array in bias[key]:
            for val in array:
                pv.append(val)
    return pv



def Activation_Function_funct(A , AType="Sigmoid, Relu, Tanh"):
    if AType == "Sigmoid":
        a=np.zeros([1,1])
        a=1/(1+np.exp(-A))
        return a , a*(1-a) 
    elif AType == "Tanh":
        t= (np.exp(A)-np.exp(-A))/(np.exp(A)+np.exp(-A)) #np.tanh(A) 
        dt=1-t**2
        return t,dt
    elif AType == "Relu":
        R = np.maximum(0,A)
        dR = np.where(R <= 0, 0, 1)
        return  R , dR 



def cross_entropy_loss(Predicted, Actual):
    _Loss = -1*(special.xlog1py(Actual,Predicted + 1e-15)+ special.xlog1py((1-Actual), (1-Predicted + 1e-15)))
    #print("cross_entropy_loss shape",_Loss.shape)
    return np.mean(_Loss)


def hinge_loss(Predicted, Actual):
    new_predicted = np.array([-1 if i==0 else i for i in Predicted])
    new_actual = np.array([-1 if i==0 else i for i in Actual])

    hinge_loss = np.mean([max(0, 1-x*y) for x, y in zip(new_actual, new_predicted)])

    print("hinge_loss shape",hinge_loss.shape)
    return hinge_loss

def Loss_Fuction(Predicted, Actual, Loss="cross_entropy_loss , hinge_loss"):
    if Loss=="cross_entropy_loss":
        return cross_entropy_loss(Predicted, Actual)
    elif Loss=="hinge_loss":
        return hinge_loss(Predicted, Actual)


def forward_propagation(input,y, weights, bias, losstype,ActivationFunctions):
    #print("New Forward Propagation code ")
    A = {}
    Z = {}
    for i in range(0, weights.__len__()):
        Z[i] = np.matmul(weights[i].T, input) + bias[i]
        A[i],daaaa  = Activation_Function_funct(Z[i],ActivationFunctions[i])
        input= A[i].copy()
    #print("A = ", A)
    W_dimention = weights.__len__() - 1
    Last_Activation = A[W_dimention][0]#.copy()          ## Last activation function in our last layer   
    #print("dimention ", W_dimention)
    #print("Weights = ", weights)
    #print("AA = ", Last_Activation)
    #self.global_AA = Last_Activation.copy()
    #print("Particle Predicted shape {}  = {}".format(Last_Activation.shape, Last_Activation))
    #print("Particle Actual shape {}  = {}".format(y.shape, y))

    Cost = Loss_Fuction(Last_Activation,y,losstype)
    #print("J = ", J)
    #print(y.shape[0])
    ##Cost = J /y.shape[0]
    #print("Cost = ", Cost)
    return Cost 




# Creating Particle Class 

In [5]:
class Particle:
    pass

# Creating PSO Class

In [8]:
import pandas as pd
import numpy as np
from numpy import array
from random import random
from random import uniform
from math import sin, sqrt

class PSO:
    particles = []
    gbest = 0
    def Initialize_PSO(self, pop_size = 100,dimensions = 2):
        #initialize the particles

        for i in range(pop_size):
            p = Particle()
            p_v=array([uniform(-0.5,0.5) for i in range(dimensions)])
            print(p_v)
            p.params = p_v
            p.fitness = 0.0
            p.v = 0.0
            p.best = p_v
            p.informants = []
            p.best_informant = 0
            self.particles.append(p)
        
        self.gbest = self.particles[0]
        

    def addInformants(self, index, p):
        from random import choice
        pcount = self.particles.__len__()
        n1=choice([i for i in range(0,pcount) if i not in [index]])
        n2 = choice([i for i in range(0,pcount) if i not in [index,n1]])
        n3 = choice([i for i in range(0,pcount) if i not in [index,n1,n2]])

        p.informants.append(self.particles[n1].params)
        p.informants.append(self.particles[n2].params)
        p.informants.append(self.particles[n3].params)

        return p.informants
    
    def bestInformant(self,p, input, y, weights, bias, losstype,ActivationFunctions):
        best = float('inf')
        ind=0
        for inf in p.informants:
            ind +=1
            weights, bias = Create_Weights_Bias_from_ParticleVector(weights, bias, p.params)
            fitness = forward_propagation(input,y, weights, bias, losstype,ActivationFunctions)

            if fitness < best:
                p.best_informant = p.params
                best = fitness
        return p.best_informant    
                    

    def Run_PSO(self, input, y, weights, bias, losstype, ActivationFunctions, iter_max ,c1 = 2,c2 = 2 , c3 =0.1 ,err_crit = 0.00001 ):
        i=0
        
        # let the first particle be the global best
        
        err = 999999999
        while i < iter_max :
            for p in self.particles:
                p.informants = []
            
            index=0
            #print("itteration {} :".format(i))
            for p in self.particles:
                #print("particle params : {} ".format(p.params))
                p.informants = self.addInformants(index, p)
                p.best_informant = self.bestInformant(p, input, y, weights, bias, losstype,ActivationFunctions)
                #print("particle : {} , Best Informat : {}".format(index,p.best_informant))
                index +=1


            for p in self.particles:
                
                _weights, _bias = Create_Weights_Bias_from_ParticleVector(weights, bias, p.params)

                fitness = forward_propagation(input,y, _weights, _bias, losstype, ActivationFunctions) #f6(p.params)
                #print('Cost for each particle {}:  {} err : {}'.format(i,fitness,err))
                #print("fitness = ", fitness)
                #print("p.fitness before = ", p.fitness)
                if fitness < p.fitness:
                    p.fitness = fitness
                    #print("p.fitness After = ", p.fitness)
                    p.best = p.params

                if fitness < self.gbest.fitness:
                    self.gbest = p
                    #print("gbest.param = ", gbest.fitness)
                v = p.v + c1 * random() * (p.best - p.params) \
                        + c2 * random() * (self.gbest.params - p.params) \
                        + c3 * random() * (p.best_informant - p.params)
                p.params = p.params + v
                
            i  += 1
            #if err < err_crit:
            #    break
            #progress bar. '.' = 10%
            #if i % (iter_max/10) == 0:
            #    print('.')

        return self.gbest.params

# Using CNN with PSO

## Creating CNN Class Code    https://github.com/feferna/psoCNN



## Jad: Below code is not mine, writing my code will take very long time for this research questions. so i used code from the internet referenced below to see if PSO will work with CNN.

## https://towardsdatascience.com/training-a-convolutional-neural-network-from-scratch-2235c2a25754

In [18]:
import keras
from keras.layers import Conv3D
from keras.layers import MaxPool2D
from keras.layers import Softmax

In [28]:
import mnist
import numpy as np
#from conv import Conv3x3
#from maxpool import MaxPool2
#from softmax import Softmax
import keras
from keras.layers import MaxPool2D
from keras.layers import Dense
from keras.layers import Conv2D

train_images = X_train.copy()
train_labels = y_train.copy()
test_images = X_test.copy()
test_labels = y_test.copy()

def softmax(x):
    e_x = np.exp(x)
    return e_x / e_x.sum()

conv = Conv2D(8,kernel_size=(3,3))                  # 28x28x1 -> 26x26x8
pool = MaxPool2D()                  # 26x26x8 -> 13x13x8
softmax = softmax(48 * 48 * 1) # 13x13x8 -> 10

def forward(image, label):
  '''
  Completes a forward pass of the CNN and calculates the accuracy and
  cross-entropy loss.
  - image is a 2d numpy array
  - label is a digit
  '''
  # We transform the image from [0, 255] to [-0.5, 0.5] to make it easier
  # to work with. This is standard practice.
  out = conv.forward((image / 255) - 0.5)
  out = pool.forward(out)
  out = softmax.forward(out)

  # Calculate cross-entropy loss and accuracy. np.log() is the natural log.
  loss = -np.log(out[label])
  acc = 1 if np.argmax(out) == label else 0

  return out, loss, acc

def train(im, label, lr=.005):
  '''
  Completes a full training step on the given image and label.
  Returns the cross-entropy loss and accuracy.
  - image is a 2d numpy array
  - label is a digit
  - lr is the learning rate
  '''
  # Forward
  out, loss, acc = forward(im, label)

  # Calculate initial gradient
  gradient = np.zeros(10)
  gradient[label] = -1 / out[label]

  # Backprop
  gradient = softmax.backprop(gradient, lr)
  gradient = pool.backprop(gradient)
  gradient = conv.backprop(gradient, lr)

  return loss, acc



# Train the CNN for 3 epochs
for epoch in range(3):
  print('--- Epoch %d ---' % (epoch + 1))

  # Shuffle the training data
  permutation = np.random.permutation(len(train_images))
  train_images = train_images[permutation]
  train_labels = train_labels[permutation]

  # Train!
  loss = 0
  num_correct = 0
  for i, (im, label) in enumerate(zip(train_images, train_labels)):
    if i > 0 and i % 100 == 99:
      print(
        '[Step %d] Past 100 steps: Average Loss %.3f | Accuracy: %d%%' %
        (i + 1, loss / 100, num_correct)
      )
      loss = 0
      num_correct = 0

    l, acc = train(im, label)
    loss += l
    num_correct += acc

# Test the CNN
print('\n--- Testing the CNN ---')
loss = 0
num_correct = 0
for im, label in zip(test_images, test_labels):
  _, l, acc = forward(im, label)
  loss += l
  num_correct += acc

num_tests = len(test_images)
print('Test Loss:', loss / num_tests)
print('Test Accuracy:', num_correct / num_tests)

MNIST CNN initialized!
--- Epoch 1 ---




AttributeError: 'Conv2D' object has no attribute 'forward'