In [1]:
import datetime
import numpy as np
import torch
import matplotlib.pyplot as plt
import pandas as pd

import math
import random
import timeit

from torch.autograd import Variable

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
in_dim, hid1_dim, hid2_dim, out_dim = 4, 16, 8, 3
biases = 3

In [3]:
iris = load_iris()
X = iris['data']
y = iris['target']
names = iris['target_names']
feature_names = iris['feature_names']

# Scale data to have mean 0 and variance 1 
# which is importance for convergence of the neural network
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data set into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=2)

X_train = Variable(torch.from_numpy(X_train), requires_grad = False).float()
y_train = Variable(torch.from_numpy(y_train), requires_grad = False).long()
X_test  = Variable(torch.from_numpy(X_test), requires_grad = False).float()
y_test  = Variable(torch.from_numpy(y_test), requires_grad = False).long()

In [4]:
relu = torch.nn.ReLU()
soft = torch.nn.Softmax(dim=1)
loss_func = torch.nn.CrossEntropyLoss()

In [5]:
def get_fitness(X_train, y_train, weights):
    w1, w2, w3 = [], [], []
    lengths = 0
    for i in range(in_dim) :
        w1.append(weights[lengths : hid1_dim + lengths])
        lengths += hid1_dim
    w1 = np.array(w1)
    
    for i in range(hid1_dim) :
        w2.append(weights[lengths : hid2_dim + lengths])
        lengths += hid2_dim
    w2 = np.array(w2)
    
    for i in range(hid2_dim) :
        w3.append(weights[lengths : out_dim + lengths])
        lengths += out_dim
    w3 = np.array(w3)
    
    b = weights[lengths :]

    w1, w2, w3, b = Variable(torch.from_numpy(w1)).float(), \
    Variable(torch.from_numpy(w2)).float(), \
    Variable(torch.from_numpy(w3)).float(), Variable(torch.from_numpy(b)).float()
    
    y_pred = X_train.mm(w1)
    y_pred = y_pred + b[0]
    y_pred = relu(y_pred.clone().detach())
    y_pred = y_pred.mm(w2)
    y_pred = y_pred + b[1]
    y_pred = relu(y_pred.clone().detach())
    y_pred = y_pred.mm(w3)
    y_pred = y_pred + b[2]
    y_pred = soft(y_pred.clone().detach())
    loss = loss_func(y_pred, y_train)
    
    return loss.item()

def tester(X_test, y_test, weights):
    w1, w2, w3 = [], [], []
    lengths = 0
    for i in range(in_dim) :
        w1.append(weights[lengths : hid1_dim + lengths])
        lengths += hid1_dim
    w1 = np.array(w1)
    
    for i in range(hid1_dim) :
        w2.append(weights[lengths : hid2_dim + lengths])
        lengths += hid2_dim
    w2 = np.array(w2)
    
    for i in range(hid2_dim) :
        w3.append(weights[lengths : out_dim + lengths])
        lengths += out_dim
    w3 = np.array(w3)

    b = weights[lengths :]
    
    w1, w2, w3, b = Variable(torch.from_numpy(w1)).float(), Variable(torch.from_numpy(w2)).float(),\
    Variable(torch.from_numpy(w3)).float(), Variable(torch.from_numpy(b)).float()
    
    y_pred = X_test.mm(w1)
    y_pred = y_pred + b[0]
    y_pred = relu(y_pred.clone().detach())
    y_pred = y_pred.mm(w2)
    y_pred = y_pred + b[1]
    y_pred = relu(y_pred.clone().detach())
    y_pred = y_pred.mm(w3)
    y_pred = y_pred + b[2]
    y_pred = soft(y_pred.clone().detach())
    
    return y_pred

In [6]:
experiment, EPOCHS = 20, 500
train_loss_list = np.zeros((experiment,))
val_loss_list = np.zeros((experiment,))
val_acc_list = np.zeros((experiment,))

memory_val_loss_list = np.full((experiment,), float("inf"))
memory_train_loss_list = np.zeros((experiment,))
memory_val_acc_list = np.zeros((experiment,))

lb, ub, dim = -1, 1, in_dim * hid1_dim + hid1_dim * hid2_dim + hid2_dim * out_dim + biases
PHASE = 0.1
PSO_EPOCHS = int(EPOCHS * PHASE)
GWO_EPOCHS = EPOCHS - PSO_EPOCHS
swarm_no = 60
wolves_no = 20

inertia_w = 0.3 # inertia constant
c1 = 1 # cognitive constant
c2 = 1 # social constant

In [7]:
experiment_date = datetime.datetime.now()
starter = timeit.default_timer()
for exper in range(20) :
    population = np.zeros((swarm_no, dim))
    velocity = np.ones((swarm_no, dim))
    particle_best_pos = np.zeros((swarm_no, dim))
    particle_best_sco = np.zeros((swarm_no))
    swarm_best_pos = np.zeros(dim)
    swarm_best_sco = float("inf")

    # PSO PHASE
    for i in range(swarm_no) :
        population[i, :] = np.random.uniform(lb, ub, dim)
        velocity[i, :] = np.random.uniform(lb, ub, dim)
        init_fit = get_fitness(X_train, y_train, population[i, :])

        particle_best_pos[i, :] = population[i, :]
        particle_best_sco[i] = init_fit 

        if swarm_best_sco > init_fit :
            swarm_best_pos = population[i, :]
            swarm_best_sco = init_fit
            
    for epoch in range(PSO_EPOCHS) :
        for i in range(swarm_no) :
            r1, r2 = np.random.rand(dim), np.random.rand(dim)
            velo_cog = c1 * r1 * (particle_best_pos[i, :].copy() - population[i, :].copy())
            velo_soc = c2 * r2 * (swarm_best_pos.copy() - population[i, :].copy())
            velocity[i, :] = inertia_w * velocity[i, :].copy() + velo_cog.copy() + velo_soc.copy()
            population[i, :] = population[i, :].copy() + velocity[i, :].copy()
            result_pos = population[i, :].copy()
            result_fit = get_fitness(X_train, y_train, result_pos)
            
            # update particle_best
            if particle_best_sco[i] > result_fit :
                particle_best_pos[i] = result_pos
                particle_best_sco[i] = result_fit

            # update swarm_best
            if swarm_best_sco > result_fit :
                swarm_best_pos = result_pos
                swarm_best_sco = result_fit

        w_set = swarm_best_pos
        valid = tester(X_test, y_test, w_set)
        val_loss = loss_func(valid, y_test).item()

        if val_loss < memory_val_loss_list[exper] :
            memory_val_loss_list[exper] = val_loss
            memory_train_loss_list[exper] = swarm_best_sco
            
            val_correct = (torch.argmax(valid, dim=1) == y_test).type(torch.FloatTensor)
            memory_val_acc_list[exper] = val_correct.mean().item()
                
    # GWO Phase
    alpha_pos = swarm_best_pos
    alpha_score = swarm_best_sco

    beta_pos = np.zeros(dim)
    beta_score = float("inf")

    delta_pos = np.zeros(dim)
    delta_score = float("inf")
    
    sampler = random.sample(range(swarm_no), wolves_no)
    new_population = np.zeros((wolves_no, dim))

    for i in range(wolves_no) :
        new_population[i, :] = population[sampler[i], :] 
        fitness = get_fitness(X_train, y_train, population[i, :])

        if fitness < alpha_score:
            delta_score = beta_score  # Update delta
            delta_pos = beta_pos.copy()
            beta_score = alpha_score  # Update beta
            beta_pos = alpha_pos.copy()
            alpha_score = fitness  # Update alpha
            alpha_pos = new_population[i, :].copy()

        if fitness > alpha_score and fitness < beta_score:
            delta_score = beta_score  # Update delte
            delta_pos = beta_pos.copy()
            beta_score = fitness  # Update beta
            beta_pos = new_population[i, :].copy()

        if fitness > alpha_score and fitness > beta_score and fitness < delta_score:
            delta_score = fitness  # Update delta
            delta_pos = new_population[i, :].copy()

    population = new_population
    
    for epoch in range(GWO_EPOCHS) :
    # a는 선형적으로 감소하는 값으로 2 ~ 0을 가짐
        a = 2 - (PSO_EPOCHS + epoch) * ((2) / EPOCHS)
    
        for i in range(wolves_no) :
            r1 = np.random.rand(dim)  # r1 is a random number in [0,1]
            r2 = np.random.rand(dim)  # r2 is a random number in [0,1]
            A1 = 2 * a * r1 - a  # Equation (3.3)
            C1 = 2 * r2  # Equation (3.4)
            D_alpha = abs(C1 * alpha_pos - population[i, :])  # Equation (3.5)-part 1
            X1 = alpha_pos - A1 * D_alpha  # Equation (3.6)-part 1

            r1 = np.random.rand(dim)
            r2 = np.random.rand(dim)
            A2 = 2 * a * r1 - a  # Equation (3.3)
            C2 = 2 * r2  # Equation (3.4)
            D_beta = abs(C2 * beta_pos - population[i, :])  # Equation (3.5)-part 2
            X2 = beta_pos - A2 * D_beta  # Equation (3.6)-part 2

            r1 = np.random.rand(dim)
            r2 = np.random.rand(dim)
            A3 = 2 * a * r1 - a  # Equation (3.3)
            C3 = 2 * r2  # Equation (3.4)
            D_delta = abs(C3 * delta_pos - population[i, :])  # Equation (3.5)-part 3
            X3 = delta_pos - A3 * D_delta  # Equation (3.5)-part 3

            population[i, :] = ((X1 + X2 + X3) / 3)  # Equation (3.7)

            fitness = get_fitness(X_train, y_train, population[i, :])

            if fitness < alpha_score:
                delta_score = beta_score  # Update delta
                delta_pos = beta_pos.copy()
                beta_score = alpha_score  # Update beta
                beta_pos = alpha_pos.copy()
                alpha_score = fitness  # Update alpha
                alpha_pos = population[i, :].copy()

            if fitness > alpha_score and fitness < beta_score:
                delta_score = beta_score  # Update delte
                delta_pos = beta_pos.copy()
                beta_score = fitness  # Update beta
                beta_pos = population[i, :].copy()

            if fitness > alpha_score and fitness > beta_score and fitness < delta_score:
                delta_score = fitness  # Update delta
                delta_pos = population[i, :].copy()
                
        w_set = alpha_pos.copy()
        valid = tester(X_test, y_test, w_set)
        val_loss = loss_func(valid, y_test).item()

        if val_loss < memory_val_loss_list[exper] :
            memory_val_loss_list[exper] = val_loss
            memory_train_loss_list[exper] = alpha_score
            
            val_correct = (torch.argmax(valid, dim=1) == y_test).type(torch.FloatTensor)
            memory_val_acc_list[exper] = val_correct.mean().item()

    train_loss_list[exper] = alpha_score
    
    w_set = alpha_pos
    valid = tester(X_test, y_test, w_set)
    val_loss_list[exper] = loss_func(valid, y_test).item()
    
    val_correct = (torch.argmax(valid, dim=1) == y_test).type(torch.FloatTensor)
    val_acc_list[exper] = val_correct.mean().item()
    
    print(exper, " Train_loss :", train_loss_list[exper], "Val_loss :", val_loss_list[exper], "Val_acc :", val_acc_list[exper])
ender = timeit.default_timer()

0  Train_loss : 0.5613499283790588 Val_loss : 0.5683485865592957 Val_acc : 0.9666666388511658
1  Train_loss : 0.616433322429657 Val_loss : 0.6377427577972412 Val_acc : 0.8666666746139526
2  Train_loss : 0.5598674416542053 Val_loss : 0.6262403726577759 Val_acc : 0.9333333373069763
3  Train_loss : 0.5599876046180725 Val_loss : 0.6073557734489441 Val_acc : 0.9333333373069763
4  Train_loss : 0.5597978830337524 Val_loss : 0.5518413782119751 Val_acc : 1.0
5  Train_loss : 0.5681113004684448 Val_loss : 0.5847779512405396 Val_acc : 0.9666666388511658
6  Train_loss : 0.5712512135505676 Val_loss : 0.6210106015205383 Val_acc : 0.9333333373069763
7  Train_loss : 0.5616281628608704 Val_loss : 0.6089903712272644 Val_acc : 0.9333333373069763
8  Train_loss : 0.5597780346870422 Val_loss : 0.5753638744354248 Val_acc : 0.9666666388511658
9  Train_loss : 0.5597782731056213 Val_loss : 0.6151220798492432 Val_acc : 0.9333333373069763
10  Train_loss : 0.5597789287567139 Val_loss : 0.6072059869766235 Val_acc : 

In [8]:
print("실험 정보 : KOPT, on IRIS")
print("실험 횟수 :", experiment, "\n"+"실험 일자 :", experiment_date)
print("Computational Time :", ender - starter)
print("train_loss min :", np.min(train_loss_list))
print("train_loss mean :", np.mean(train_loss_list))
print("train_loss std :", np.std(train_loss_list))
print()
print("val_loss min :", np.min(val_loss_list))
print("val_loss mean :", np.mean(val_loss_list))
print("val_loss std :", np.std(val_loss_list))
print()
print("val_acc max :", np.max(val_acc_list))
print("val_acc mean :", np.mean(val_acc_list))
print("val_acc std :", np.std(val_acc_list))
print()
print("Memory_val min :", np.min(memory_val_loss_list))
print("Memory_val mean :", np.mean(memory_val_loss_list))
print("Memory_val std :", np.std(memory_val_loss_list))
print()
print("Memory_train min :", np.min(memory_train_loss_list))
print("Memory_train mean :", np.mean(memory_train_loss_list))
print("Memory_train std :", np.std(memory_train_loss_list))
print()
print("Memory_acc min :", np.min(memory_val_acc_list))
print("Memory_acc mean :", np.mean(memory_val_acc_list))
print("Memory_acc std :", np.std(memory_val_acc_list))

실험 정보 : KOPT, on IRIS
실험 횟수 : 20 
실험 일자 : 2022-08-01 11:43:22.836978
Computational Time : 90.92704073898494
train_loss min : 0.5515538454055786
train_loss mean : 0.5642277896404266
train_loss std : 0.012704361898614897

val_loss min : 0.5514823794364929
val_loss mean : 0.5946143120527267
val_loss std : 0.025707215389054856

val_acc max : 1.0
val_acc mean : 0.951666659116745
val_acc std : 0.030686580804760433

Memory_val min : 0.5514507293701172
Memory_val mean : 0.5705717146396637
Memory_val std : 0.017191575078465627

Memory_train min : 0.55977863073349
Memory_train mean : 0.5708203166723251
Memory_train std : 0.013288832621265283

Memory_acc min : 0.8999999761581421
Memory_acc mean : 0.9816666543483734
Memory_acc std : 0.024664425025287668
