## Task 1: Multi-layer ANN

Hyperparameters:
 1. Number of nodes
 2. Number of layers
 3. Activation function

In [1]:
# Load library
import pandas as pd
import numpy as np
import math
from enum import Enum
from sklearn.model_selection import train_test_split

In [2]:
# Load data
concrete = pd.read_csv('data/concrete_data.csv')
concrete.head()

Unnamed: 0,cement,blast_furnace_slag,fly_ash,water,superplasticizer,coarse_aggregate,fine_aggregate,age,concrete_compressive_strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
# Separate X and Y
# Then separate test and train set
# Also do the Cross-Validation (optional)
X = concrete.drop('concrete_compressive_strength', axis = 1)
y = concrete['concrete_compressive_strength']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.7, random_state = 42)

### Setting up activation functions

In [4]:
# Activation function
def logistic(x):
    return 1/(1 + math.exp(-x))

def ReLU(x):
    return max(0, x)

def hyperbolic(x):
    return math.tanh(x)

class ActFunc(Enum):
    log = logistic
    relu = ReLU
    hb = hyperbolic

### Setting up ANN

In [5]:
# Neural Network test!
from neuralNet import neuralNet
from layer import layer

network = neuralNet()

network.add(layer(ActFunc.relu,6))
network.add(layer(ActFunc.relu,4))
network.add(layer(ActFunc.relu,2))
network.add(layer(ActFunc.relu,1))

## Task 2: Implement the PSO

Hyperparameters:
1. Swarmsize
2. Alpha
3. Beta
4. Gamma
5. Delta
6. Epsilon
7. Number of iterations (epoch)


In [6]:
# PSO test!
import pso

swarmsize = 10
alpha = 0.7
beta = 2
gamma = 1.5
delta = 1.5
epsilon = 0.4
n_iter = 10

all_best = []
for i in range(1):
    particle_swarm_opti = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, epsilon, n_iter, prints=False)
    opti_particle, best_mae_arr = particle_swarm_opti.optimise()
    all_best.append(best_mae_arr[-1])
avg_best = sum(all_best)/len(all_best)
avg_best

13.451531634425892

## Task 3, 4 & 5: Exploring ANN and PSO hyperparameters on the concrete data

### ANN architecture

#### Trying different number of layer

In [7]:
# Try neural network architectures
# 2-10 layers
# 3-10 neurons per layer

layers = range(2,10)

# Try different layers
# Constant 4 neurons per layer, activation func relu

avg_accuraciesForLayers = {}
for numLayers in layers:
    accuraciesForLayers = []
    # Set up network
    network = neuralNet()
    for i in range(numLayers - 1):
        network.add(layer(ActFunc.relu,4))
    network.add(layer(ActFunc.relu,1))
    for i in range(1):
        layerTestPSO = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, epsilon, n_iter,prints=False)
        opti_particle, best_mae_arr = layerTestPSO.optimise()
        accuraciesForLayers.append(best_mae_arr[-1])
    avg_accuraciesForLayers[numLayers] = sum(accuraciesForLayers)/len(accuraciesForLayers)
avg_accuraciesForLayers

# Best: 4 layers

{2: 10.14732421201561,
 3: 15.004473812005005,
 4: 16.387779330618713,
 5: 11.385846704402498,
 6: 11.484367661039272,
 7: 12.772960385320248,
 8: 13.45855669636377,
 9: 18.403413570468995}

In [8]:
avg_accuraciesForLayers

{2: 10.14732421201561,
 3: 15.004473812005005,
 4: 16.387779330618713,
 5: 11.385846704402498,
 6: 11.484367661039272,
 7: 12.772960385320248,
 8: 13.45855669636377,
 9: 18.403413570468995}

#### Trying different number of neurons

In [9]:
neurons = range(2,11)

avg_accuraciesForNeuronCounts = {}

for neuronCount in neurons:
    accuraciesForNeuronCounts = []
    # Set up network
    network = neuralNet()
    for i in range(3):
        network.add(layer(ActFunc.relu,neuronCount))
    network.add(layer(ActFunc.relu,1))
    # Run optimisation 10 times
    for i in range(1):
        neuronsTestPSO = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, epsilon, n_iter,prints=False)
        opti_particle, best_mae_arr = neuronsTestPSO.optimise()
        accuraciesForNeuronCounts.append(best_mae_arr[-1])
    avg_accuraciesForNeuronCounts[neuronCount] = sum(accuraciesForNeuronCounts)/len(accuraciesForNeuronCounts)
avg_accuraciesForNeuronCounts

# Best is 3 neurons per layer

{2: 12.21494578778015,
 3: 12.257277960554257,
 4: 15.226750719901235,
 5: 12.645783111308017,
 6: 15.028134405611029,
 7: 26.95547406706671,
 8: 11.907367701166727,
 9: 14.072182658966591,
 10: 14.602080711399982}

In [10]:
avg_accuraciesForNeuronCounts

{2: 12.21494578778015,
 3: 12.257277960554257,
 4: 15.226750719901235,
 5: 12.645783111308017,
 6: 15.028134405611029,
 7: 26.95547406706671,
 8: 11.907367701166727,
 9: 14.072182658966591,
 10: 14.602080711399982}

### PSO hyperparameters

In [11]:
# Potential ranges of PSO parameters to try
# Could try them like a gridsearch but its probably a bit too much for that, tuning one at a time is probably the way to go

alphaRange = np.arange(0.4,0.9,0.1)
betaRange = np.arange(1.5,2.5,0.1)
gammaRange = np.arange(1.5,2.5,0.1)
deltaRange = np.arange(1.5,2.5,0.1)
epsilonRange = np.arange(0.1,0.5,0.1) # not sure about the range of the learning rate, might need to experiment or look for more sources

#### Tuning alpha

In [12]:
avg_accuraciesForAlphas = {}

for a in alphaRange:
    accuraciesForAlphas = []
    # Set up network
    network = neuralNet()
    for i in range(3):
        network.add(layer(ActFunc.relu,3))
    network.add(layer(ActFunc.relu,1))
    for i in range(1):
        alphaTestPSO = pso.PSO(X_train, y_train, network, swarmsize, a, beta, gamma, delta, epsilon, n_iter,prints=False)
        opti_particle, best_mae_arr = alphaTestPSO.optimise()
        accuraciesForAlphas.append(best_mae_arr[-1])
    avg_accuraciesForAlphas[a] = sum(accuraciesForAlphas)/len(accuraciesForAlphas)

#### Tuning beta

In [13]:
avg_accuraciesForBetas = {}

for b in betaRange:
    accuraciesForBetas = []
    # Set up network
    network = neuralNet()
    for i in range(3):
        network.add(layer(ActFunc.relu,3))
    network.add(layer(ActFunc.relu,1))
    for i in range(1):
        betaTestPSO = pso.PSO(X_train, y_train, network, swarmsize, alpha, b, gamma, delta, epsilon, n_iter,prints=False)
        opti_particle, best_mae_arr = betaTestPSO.optimise()
        accuraciesForBetas.append(best_mae_arr[-1])
    avg_accuraciesForBetas[b] = sum(accuraciesForBetas)/len(accuraciesForBetas)

#### Tuning gamma

In [14]:
avg_accuraciesForGammas = {}

for g in gammaRange:
    accuraciesForGammas = []
    # Set up network
    network = neuralNet()
    for i in range(3):
        network.add(layer(ActFunc.relu,3))
    network.add(layer(ActFunc.relu,1))
    for i in range(1):
        gammaTestPSO = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, g, delta, epsilon, n_iter,prints=False)
        opti_particle, best_mae_arr = gammaTestPSO.optimise()
        accuraciesForGammas.append(best_mae_arr[-1])
    avg_accuraciesForGammas[g] = sum(accuraciesForGammas)/len(accuraciesForGammas)

#### Tuning delta

In [15]:
avg_accuraciesForDeltas = {}

for d in deltaRange:
    accuraciesForDeltas = []
    # Set up network
    network = neuralNet()
    for i in range(3):
        network.add(layer(ActFunc.relu,3))
    network.add(layer(ActFunc.relu,1))
    for i in range(1):
        deltaTestPSO = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, d, epsilon, n_iter,prints=False)
        opti_particle, best_mae_arr = deltaTestPSO.optimise()
        accuraciesForDeltas.append(best_mae_arr[-1])
    avg_accuraciesForDeltas[d] = sum(accuraciesForDeltas)/len(accuraciesForDeltas)

#### Tuning epsilon

In [16]:
avg_accuraciesForEpsilons = {}

for e in epsilonRange:
    accuraciesForEpsilons = []

    # Set up network
    network = neuralNet()
    for i in range(3):
        network.add(layer(ActFunc.relu,3))
    network.add(layer(ActFunc.relu,1))
    for i in range(1):
        epsilonTestPSO = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, e, n_iter,prints=False)
        opti_particle, best_mae_arr = epsilonTestPSO.optimise()
        accuraciesForEpsilons.append(best_mae_arr[-1])
    avg_accuraciesForEpsilons[e] = sum(accuraciesForEpsilons)/len(accuraciesForEpsilons)

In [17]:
avg_accuraciesForAlphas 
# Best: 0.6

{np.float64(0.4): 15.065149086931363,
 np.float64(0.5): 12.711912078193958,
 np.float64(0.6): 11.89618211308327,
 np.float64(0.7): 12.692801781346283,
 np.float64(0.7999999999999999): 13.67870935308151}

In [18]:
avg_accuraciesForBetas
# Best: 2.4

{np.float64(1.5): 30.325597484110258,
 np.float64(1.6): 11.809658675068352,
 np.float64(1.7000000000000002): 15.78216376104455,
 np.float64(1.8000000000000003): 13.286233181558115,
 np.float64(1.9000000000000004): 12.72433392654653,
 np.float64(2.0000000000000004): 14.577975466970747,
 np.float64(2.1000000000000005): 12.347327592841335,
 np.float64(2.2000000000000006): 13.273909090656577,
 np.float64(2.3000000000000007): 13.114383455940429,
 np.float64(2.400000000000001): 15.8696925774791}

In [19]:
avg_accuraciesForGammas
# Best: 1.5

{np.float64(1.5): 16.571768133035697,
 np.float64(1.6): 12.64076288542907,
 np.float64(1.7000000000000002): 14.717056640649119,
 np.float64(1.8000000000000003): 13.792139508456104,
 np.float64(1.9000000000000004): 12.004093797385174,
 np.float64(2.0000000000000004): 13.721067600981334,
 np.float64(2.1000000000000005): 13.60463294924608,
 np.float64(2.2000000000000006): 16.888981482950182,
 np.float64(2.3000000000000007): 11.53568424795468,
 np.float64(2.400000000000001): 12.97534767568286}

In [20]:
avg_accuraciesForDeltas
# Best: 1.6

{np.float64(1.5): 11.88655941687413,
 np.float64(1.6): 13.394002432529403,
 np.float64(1.7000000000000002): 14.502812570138076,
 np.float64(1.8000000000000003): 12.274850526547047,
 np.float64(1.9000000000000004): 12.38548413089067,
 np.float64(2.0000000000000004): 15.249815404253688,
 np.float64(2.1000000000000005): 12.840889574482826,
 np.float64(2.2000000000000006): 13.738451861644485,
 np.float64(2.3000000000000007): 15.305001339880624,
 np.float64(2.400000000000001): 15.683789970039138}

In [21]:
avg_accuraciesForEpsilons
# Best: 0.4

{np.float64(0.1): 13.060614984879418,
 np.float64(0.2): 16.751982422083433,
 np.float64(0.30000000000000004): 12.405030990844962,
 np.float64(0.4): 14.761597759012353}

### A few best combination of hyperparameter

In [22]:
# Combination 1
# Network layer (3, 3, 3)
# All best PSO hyperparam

# Set up network
network = neuralNet()

network.add(layer(ActFunc.relu,3))
network.add(layer(ActFunc.relu,3))
network.add(layer(ActFunc.relu,3))
network.add(layer(ActFunc.relu,1))

# PSO Hyperparams
swarmsize = 10
alpha = 0.6
beta = 2.4
gamma = 1.5
delta = 1.6
epsilon = 0.4
n_iter = 10

mse_arr = []
for i in range(10):
    # Getting optimal position
    particle_swarm_opti = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, epsilon, n_iter, prints=False)
    opti_particle, best_mae_arr = particle_swarm_opti.optimise()

    # Apply to test set
    weights, bias = particle_swarm_opti.assessFitness_helper( opti_particle)
    y_pred = X_test.apply(network.forwardCalculation, args = (weights, bias), axis = 1)
    mse_arr.append(network.errorCalculation(y_pred, y_test))
mse_comb1 = sum(mse_arr)/len(mse_arr)
mse_comb1

13.677595506562039

In [23]:
# Combination 2
# Network layer (6, 4, 2)
# All best PSO hyperparam

# Set up network
network = neuralNet()

network.add(layer(ActFunc.relu,6))
network.add(layer(ActFunc.relu,4))
network.add(layer(ActFunc.relu,2))
network.add(layer(ActFunc.relu,1))

# PSO Hyperparams
swarmsize = 10
alpha = 0.6
beta = 2.4
gamma = 1.5
delta = 1.6
epsilon = 0.4
n_iter = 10

mse_arr = []
for i in range(10):
    # Getting optimal position
    particle_swarm_opti = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, epsilon, n_iter, prints=False)
    opti_particle, best_mae_arr = particle_swarm_opti.optimise()

    # Apply to test set
    weights, bias = particle_swarm_opti.assessFitness_helper( opti_particle)
    y_pred = X_test.apply(network.forwardCalculation, args = (weights, bias), axis = 1)
    mse_arr.append(network.errorCalculation(y_pred, y_test))
mse_comb2 = sum(mse_arr)/len(mse_arr)
mse_comb2

13.666496699815912

In [24]:
# Combination 3
# Network layer (3, 3, 3)
# All best PSO hyperparam except beta = 1.6

# Set up network
network = neuralNet()

network.add(layer(ActFunc.relu,3))
network.add(layer(ActFunc.relu,3))
network.add(layer(ActFunc.relu,3))
network.add(layer(ActFunc.relu,1))

# PSO Hyperparams
swarmsize = 10
alpha = 0.6
beta = 1.6
gamma = 1.5
delta = 1.6
epsilon = 0.4
n_iter = 10

mse_arr = []
for i in range(10):
    # Getting optimal position
    particle_swarm_opti = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, epsilon, n_iter, prints=False)
    opti_particle, best_mae_arr = particle_swarm_opti.optimise()

    # Apply to test set
    weights, bias = particle_swarm_opti.assessFitness_helper( opti_particle)
    y_pred = X_test.apply(network.forwardCalculation, args = (weights, bias), axis = 1)
    mse_arr.append(network.errorCalculation(y_pred, y_test))
mse_comb3 = sum(mse_arr)/len(mse_arr)
mse_comb3

14.28136310764321

In [25]:
# Combination 4
# Network layer (6, 4, 2)
# All best PSO hyperparam except beta = 1.6

# Set up network
network = neuralNet()

network.add(layer(ActFunc.relu,6))
network.add(layer(ActFunc.relu,4))
network.add(layer(ActFunc.relu,2))
network.add(layer(ActFunc.relu,1))

# PSO Hyperparams
swarmsize = 10
alpha = 0.6
beta = 1.6
gamma = 1.5
delta = 1.6
epsilon = 0.4
n_iter = 10

mse_arr = []
for i in range(10):
    # Getting optimal position
    particle_swarm_opti = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, epsilon, n_iter, prints=False)
    opti_particle, best_mae_arr = particle_swarm_opti.optimise()

    # Apply to test set
    weights, bias = particle_swarm_opti.assessFitness_helper( opti_particle)
    y_pred = X_test.apply(network.forwardCalculation, args = (weights, bias), axis = 1)
    mse_arr.append(network.errorCalculation(y_pred, y_test))
mse_comb4 = sum(mse_arr)/len(mse_arr)
mse_comb4

13.909314781449874

In [26]:
# Combination 5
# Network layer (6, 4, 2)
# All best PSO hyperparam except alpha = 0.7

# Set up network
network = neuralNet()

network.add(layer(ActFunc.relu,6))
network.add(layer(ActFunc.relu,4))
network.add(layer(ActFunc.relu,2))
network.add(layer(ActFunc.relu,1))

# PSO Hyperparams
swarmsize = 10
alpha = 0.7
beta = 2.4
gamma = 1.5
delta = 1.6
epsilon = 0.4
n_iter = 10

mse_arr = []
for i in range(10):
    # Getting optimal position
    particle_swarm_opti = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, epsilon, n_iter, prints=False)
    opti_particle, best_mae_arr = particle_swarm_opti.optimise()

    # Apply to test set
    weights, bias = particle_swarm_opti.assessFitness_helper( opti_particle)
    y_pred = X_test.apply(network.forwardCalculation, args = (weights, bias), axis = 1)
    mse_arr.append(network.errorCalculation(y_pred, y_test))
mse_comb5 = sum(mse_arr)/len(mse_arr)
mse_comb5

14.460693562014788

In [27]:
# Combination 6
# Network layer (3, 3, 3)
# All best PSO hyperparam except alpha = 0.7

# Set up network
network = neuralNet()

network.add(layer(ActFunc.relu,3))
network.add(layer(ActFunc.relu,3))
network.add(layer(ActFunc.relu,3))
network.add(layer(ActFunc.relu,1))

# PSO Hyperparams
swarmsize = 10
alpha = 0.7
beta = 2.4
gamma = 1.5
delta = 1.6
epsilon = 0.4
n_iter = 10

mse_arr = []
for i in range(10):
    # Getting optimal position
    particle_swarm_opti = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, epsilon, n_iter, prints=False)
    opti_particle, best_mae_arr = particle_swarm_opti.optimise()

    # Apply to test set
    weights, bias = particle_swarm_opti.assessFitness_helper( opti_particle)
    y_pred = X_test.apply(network.forwardCalculation, args = (weights, bias), axis = 1)
    mse_arr.append(network.errorCalculation(y_pred, y_test))
mse_comb6 = sum(mse_arr)/len(mse_arr)
mse_comb6

13.691696188951273

### Best way of allocating solution evaluation

In [28]:
# Swarm size of 100 but number of iteration 10

swarmsize = 100
alpha = 0.7
beta = 2
gamma = 1.5
delta = 1.5
epsilon = 0.4
n_iter = 10

all_best = []
for i in range(1):
    particle_swarm_opti = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, epsilon, n_iter, prints=False)
    opti_particle, best_mae_arr = particle_swarm_opti.optimise()
    all_best.append(best_mae_arr[-1])
avg_best_a = sum(all_best)/len(all_best)
avg_best_a

10.049228908961974

In [29]:
# Swarm size of 10 but number of iteration 100

swarmsize = 10
alpha = 0.7
beta = 2
gamma = 1.5
delta = 1.5
epsilon = 0.4
n_iter = 100

all_best = []
for i in range(1):
    particle_swarm_opti = pso.PSO(X_train, y_train, network, swarmsize, alpha, beta, gamma, delta, epsilon, n_iter, prints=False)
    opti_particle, best_mae_arr = particle_swarm_opti.optimise()
    all_best.append(best_mae_arr[-1])
avg_best_b = sum(all_best)/len(all_best)
avg_best_b

11.48772890605923