In [1]:
from classes.network import Network
import pandas as pd
import numpy as np
from tqdm import tqdm
import time
import math

def shuffle_and_split(X_train, y_train, batch_size=10):
    
    number_of_chunks_to_split = math.ceil(len(y_train)/batch_size)
    
    p = np.random.permutation(len(X_train))

    new_X_train = X_train[p]
    new_y_train = y_train[p]

    splitted_X_train = np.array_split(new_X_train, number_of_chunks_to_split)
    splitted_y_train = np.array_split(new_y_train, number_of_chunks_to_split)

    return splitted_X_train[:-1], splitted_y_train[:-1], splitted_X_train[-1], splitted_y_train[-1]


# Shallow vs Deep neural networks

When training neural networks, we have to come up with a design that fits its intention. Here, we tried to find, given a certain dataset, which architecture was the most ideal.

In [2]:
DATA_FILES = [
    "data/1in_tanh.txt",
    "data/1in_linear.txt",
    "data/1in_cubic.txt",
    "data/1in_sine.txt",
    "data/2in_complex.txt",
    "data/2in_xor.txt"
]
def load_dataset(path):
    ds = pd.read_csv(path, sep=r"\s+", header=None)
    X_train, y_train = ds.iloc[:,:-1].to_numpy(), ds.iloc[:,-1].to_numpy()
    return X_train, y_train

In [3]:
result_networks_n = {
    "1in_tan":{},
    "1in_linear":{},
    "1in_cubic":{},
    "1in_sine":{},
    "2in_complex":{},
    "2in_xor":{}
}

#batch:20,swarm:100,iter:10
for i, dataset in enumerate(result_networks_n.keys()):
    #Loading the dataset
    X_train, y_train = load_dataset(DATA_FILES[i])
    splitted_X_train, splitted_y_train, X_test, y_test = shuffle_and_split(X_train, y_train, batch_size=20)
    
    # First, creating a shallow network
    shallow = Network(error="l2")
    shallow.addLayer(1, inputSize=eval(dataset[0]), activation="tanh")
    print('-----------------------------')
    print(f"\tTraining shallow on {dataset}")
    
    start_shallow = time.time()
    shallow_train_perfs = []
    for (x_batch,y_batch) in zip(splitted_X_train,splitted_y_train):
        perf = shallow.train_with_pso(x_batch, y_batch)
        shallow_train_perfs.append(perf)
        
    shallow_test = shallow.test(X_test, y_test)
    print(f"Error on test: {shallow_test}")
    end_shallow = time.time()
        
    print(f"Time: {end_shallow - start_shallow}")
    
    start_deep = time.time()
    # Then, creating a deep network
    deep = Network(error="l2")
    deep.addLayer(3, inputSize=eval(dataset[0]), activation="tanh")
    deep.addLayer(3, activation="tanh")
    deep.addLayer(1, activation="tanh")
    print('-----------------------------')
    print(f"\tTraining deep on {dataset}")
    
    deep_train_perfs = []
    for (x_batch,y_batch) in zip(splitted_X_train,splitted_y_train):
        perf = deep.train_with_pso(x_batch, y_batch)
        deep_train_perfs.append(perf)
        
    deep_test = deep.test(X_test, y_test)
    print(f"Error on test: {deep_test}")
    end_deep = time.time()
    
    print(f"Time: {end_deep - start_deep}")
    
    result_networks_n[dataset]["shallow"] = {
        "model": shallow,
        "train_perfs": shallow_train_perfs,
        "test_perfs": shallow_test,
        "exec_time": end_shallow - start_shallow
    }
    
    result_networks_n[dataset]["deep"] = {
        "model": deep,
        "train_perfs": deep_train_perfs,
        "test_perfs": deep_test,
        "exec_time": end_deep - start_deep
    }

-----------------------------
	Training shallow on 1in_tan
Best_fitness: 9.188523134800781e-05
Best_fitness: 3.374437183902416e-09
Best_fitness: 9.833583102853636e-07
Best_fitness: 2.22738615205029e-05
Best_fitness: 4.0975085147504436e-06
Error on test: 5.843478612375912e-06
Time: 234.34072470664978
-----------------------------
	Training deep on 1in_tan
Best_fitness: 0.06742680086555371
Best_fitness: 0.008458325341320901
Best_fitness: 0.013231064894961201
Best_fitness: 0.06647012667079867
Best_fitness: 0.03515286289902064
Error on test: 0.17911776604604113
Time: 531.8018336296082
-----------------------------
	Training shallow on 1in_linear
Best_fitness: 0.004994995228089439
Best_fitness: 0.0013646573717595724
Best_fitness: 0.0021481531866028945
Best_fitness: 0.00286434417232185
Error on test: 0.00482097115333959
Time: 181.5251317024231
-----------------------------
	Training deep on 1in_linear
Best_fitness: 0.03314011438444224
Best_fitness: 0.014510346876163432
Best_fitness: 0.027319

In [4]:
for i, dataset in enumerate(result_networks_n.keys()):
    print(f"For dataset '{dataset}':")
    print("\tSHALLOW")
    print(f"Test perfs: {result_networks_n[dataset]['shallow']['test_perfs']}")
    print(f"Exec time: {result_networks_n[dataset]['shallow']['exec_time']}")
          
    print("\tDEEP")
    print(f"Test perfs: {result_networks_n[dataset]['deep']['test_perfs']}")
    print(f"Exec time: {result_networks_n[dataset]['deep']['exec_time']}")
    print("","-----------------\n")

For dataset '1in_tan':
	SHALLOW
Test perfs: 5.843478612375912e-06
Exec time: 234.34072470664978
	DEEP
Test perfs: 0.17911776604604113
Exec time: 531.8018336296082
 -----------------

For dataset '1in_linear':
	SHALLOW
Test perfs: 0.00482097115333959
Exec time: 181.5251317024231
	DEEP
Test perfs: 0.06586227335212336
Exec time: 419.62034845352173
 -----------------

For dataset '1in_cubic':
	SHALLOW
Test perfs: 0.02628952348551735
Exec time: 193.6641411781311
	DEEP
Test perfs: 0.027876169640711494
Exec time: 452.29343724250793
 -----------------

For dataset '1in_sine':
	SHALLOW
Test perfs: 8.649286769624107e-05
Exec time: 188.0481505393982
	DEEP
Test perfs: 0.13241142698369202
Exec time: 436.8915390968323
 -----------------

For dataset '2in_complex':
	SHALLOW
Test perfs: 0.18620737972724138
Exec time: 188.31769347190857
	DEEP
Test perfs: 0.20563523481126667
Exec time: 425.55572152137756
 -----------------

For dataset '2in_xor':
	SHALLOW
Test perfs: 0.15001688883871395
Exec time: 213.3

## Trying to improve Deep architecture training by tuning PSO parameters such as swarm size and iterations

Giving PSO the right tools for it to reach better convergence is the reason that motivates these investigations ! 

In [5]:
result_networks_custom = {
    "1in_tan":{},
    "1in_linear":{},
    "1in_cubic":{},
    "1in_sine":{},
    "2in_complex":{},
    "2in_xor":{}
}

#batch:50,swarm:100,iter:100
for i, dataset in enumerate(result_networks_custom.keys()):
    #Loading the dataset
    X_train, y_train = load_dataset(DATA_FILES[i])
    splitted_X_train, splitted_y_train, X_test, y_test = shuffle_and_split(X_train, y_train, batch_size=50)
    
    # First, creating a shallow network
    shallow = Network(error="l2")
    shallow.addLayer(1, inputSize=eval(dataset[0]), activation="tanh")
    print('-----------------------------')
    print(f"\tTraining shallow on {dataset}")
    
    start_shallow = time.time()
    shallow_train_perfs = []
    for (x_batch,y_batch) in zip(splitted_X_train,splitted_y_train):
        perf = shallow.train_with_pso(x_batch, y_batch, iter_count=100)
        shallow_train_perfs.append(perf)
        
    shallow_test = shallow.test(X_test, y_test)
    print(f"Error on test: {shallow_test}")
    end_shallow = time.time()
    
    start_deep = time.time()
    # Then, creating a deep network
    deep = Network(error="l2")
    deep.addLayer(3, inputSize=eval(dataset[0]), activation="tanh")
    deep.addLayer(3, activation="tanh")
    deep.addLayer(1, activation="tanh")
    print('-----------------------------')
    print(f"\tTraining deep on {dataset}")
        
    print(f"Time: {end_shallow - start_shallow}")
    
    deep_train_perfs = []
    for (x_batch,y_batch) in zip(splitted_X_train,splitted_y_train):
        perf = deep.train_with_pso(x_batch, y_batch, iter_count=100)
        deep_train_perfs.append(perf)
        
    deep_test = deep.test(X_test, y_test)
    print(f"Error on test: {deep_test}")
    end_deep = time.time()
    
    print(f"Time: {end_deep - start_deep}")
    
    result_networks_custom[dataset]["shallow"] = {
        "model": shallow,
        "train_perfs": shallow_train_perfs,
        "test_perfs": shallow_test,
        "exec_time": end_shallow - start_shallow
    }
    
    result_networks_custom[dataset]["deep"] = {
        "model": deep,
        "train_perfs": deep_train_perfs,
        "test_perfs": deep_test,
        "exec_time": end_deep - start_deep
    }
    
    

-----------------------------
	Training shallow on 1in_tan
Best_fitness: 5.785347652660321e-10
Best_fitness: 4.881118709392624e-10
Error on test: 1.1833323767106554e-09
-----------------------------
	Training deep on 1in_tan
Time: 1595.451779127121
Best_fitness: 0.053264869330752765
Best_fitness: 0.0037893873318501784
Error on test: 0.07587599468992168
Time: 3913.156039237976
-----------------------------
	Training shallow on 1in_linear
Best_fitness: 0.004102476887832322
Error on test: 0.0026793372467690197
-----------------------------
	Training deep on 1in_linear
Time: 1146.9014484882355
Best_fitness: 0.019793313275948767
Error on test: 0.01393165556123718
Time: 2661.0522117614746
-----------------------------
	Training shallow on 1in_cubic
Best_fitness: 0.026975802651453676
Best_fitness: 0.02206765289505194
Error on test: 0.03285611323714251
-----------------------------
	Training deep on 1in_cubic
Time: 1517.082524061203
Best_fitness: 0.031112137103559076
Best_fitness: 0.0329093692

In [6]:
for i, dataset in enumerate(result_networks_custom.keys()):
    print(f"For dataset '{dataset}':")
    print("\tSHALLOW")
    print(f"Test perfs: {result_networks_custom[dataset]['shallow']['test_perfs']}")
    print(f"Exec time: {result_networks_custom[dataset]['shallow']['exec_time']}")
          
    print("\tDEEP")
    print(f"Test perfs: {result_networks_custom[dataset]['deep']['test_perfs']}")
    print(f"Exec time: {result_networks_custom[dataset]['deep']['exec_time']}")
    print("","-----------------\n")

For dataset '1in_tan':
	SHALLOW
Test perfs: 1.1833323767106554e-09
Exec time: 1595.451779127121
	DEEP
Test perfs: 0.07587599468992168
Exec time: 3913.156039237976
 -----------------

For dataset '1in_linear':
	SHALLOW
Test perfs: 0.0026793372467690197
Exec time: 1146.9014484882355
	DEEP
Test perfs: 0.01393165556123718
Exec time: 2661.0522117614746
 -----------------

For dataset '1in_cubic':
	SHALLOW
Test perfs: 0.03285611323714251
Exec time: 1517.082524061203
	DEEP
Test perfs: 0.04639195586733256
Exec time: 3597.5754783153534
 -----------------

For dataset '1in_sine':
	SHALLOW
Test perfs: 2.36671663269262e-08
Exec time: 1110.8622043132782
	DEEP
Test perfs: 0.08036977440141538
Exec time: 2582.4182965755463
 -----------------

For dataset '2in_complex':
	SHALLOW
Test perfs: 0.13966536411015082
Exec time: 1122.8274807929993
	DEEP
Test perfs: 0.15853007848335715
Exec time: 2668.7885870933533
 -----------------

For dataset '2in_xor':
	SHALLOW
Test perfs: 4.417952225623807e-08
Exec time: 

## Trying to improve PSO convergence with updated velocity function

We updated our velocity function and we will check the performance of our PSO algorithm then.

In [7]:
result_networks_new_v = {
    "1in_tan":{},
    "1in_linear":{},
    "1in_cubic":{},
    "1in_sine":{},
    "2in_complex":{},
    "2in_xor":{}
}

#batch:20,swarm:100,iter:100
for i, dataset in enumerate(result_networks_new_v.keys()):
    #Loading the dataset
    X_train, y_train = load_dataset(DATA_FILES[i])
    splitted_X_train, splitted_y_train, X_test, y_test = shuffle_and_split(X_train, y_train, batch_size=20)
    
    # First, creating a shallow network
    shallow = Network(error="l2")
    shallow.addLayer(1, inputSize=eval(dataset[0]), activation="tanh")
    print('-----------------------------')
    print(f"\tTraining shallow on {dataset}")
    
    start_shallow = time.time()
    shallow_train_perfs = []
    for (x_batch,y_batch) in zip(splitted_X_train,splitted_y_train):
        perf = shallow.train_with_pso(
            x_batch, 
            y_batch, 
            iter_count=100,
            increase_convergence_factor=True)
        shallow_train_perfs.append(perf)
        
    shallow_test = shallow.test(X_test, y_test)
    print(f"Error on test: {shallow_test}")
    end_shallow = time.time()
    
    start_deep = time.time()
    # Then, creating a deep network
    deep = Network(error="l2")
    deep.addLayer(3, inputSize=eval(dataset[0]), activation="tanh")
    deep.addLayer(3, activation="tanh")
    deep.addLayer(1, activation="tanh")
    print('-----------------------------')
    print(f"\tTraining deep on {dataset}")
        
    print(f"Time: {end_shallow - start_shallow}")
    
    deep_train_perfs = []
    for (x_batch,y_batch) in zip(splitted_X_train,splitted_y_train):
        perf = deep.train_with_pso(
            x_batch, 
            y_batch,
            iter_count=100,
            increase_convergence_factor=True)
        deep_train_perfs.append(perf)
        
    deep_test = deep.test(X_test, y_test)
    print(f"Error on test: {deep_test}")
    end_deep = time.time()
    
    print(f"Time: {end_deep - start_deep}")
    
    result_networks_new_v[dataset]["shallow"] = {
        "model": shallow,
        "train_perfs": shallow_train_perfs,
        "test_perfs": shallow_test,
        "exec_time": end_shallow - start_shallow
    }
    
    result_networks_new_v[dataset]["deep"] = {
        "model": deep,
        "train_perfs": deep_train_perfs,
        "test_perfs": deep_test,
        "exec_time": end_deep - start_deep
    }
    
    

-----------------------------
	Training shallow on 1in_tan
Best_fitness: 5.76877983434113e-06
Best_fitness: 2.2165358258332538e-05
Best_fitness: 0.00012983721309977591
Best_fitness: 0.00011962907460070403
Best_fitness: 0.0006630407597474645
Error on test: 0.00038940707669185516
-----------------------------
	Training deep on 1in_tan
Time: 2011.7609379291534
Best_fitness: 0.021039456941624072


KeyboardInterrupt: 

In [None]:
for i, dataset in enumerate(result_networks_new_v.keys()):
    print(f"For dataset '{dataset}':")
    print("\tSHALLOW")
    print(f"Test perfs: {result_networks_new_v[dataset]['shallow']['test_perfs']}")
    print(f"Exec time: {result_networks_new_v[dataset]['shallow']['exec_time']}")
          
    print("\tDEEP")
    print(f"Test perfs: {result_networks_new_v[dataset]['deep']['test_perfs']}")
    print(f"Exec time: {result_networks_new_v[dataset]['deep']['exec_time']}")
    print("","-----------------\n")

# Plots
We will now plot the results in graph and compare them to the original function

In [None]:
import matplotlib.pyplot as plt

x = np.linspace(-5,5,1000)

actual_functions = [
    lambda x: np.tanh(x),
    lambda x: x,
    lambda x: x*x*x,
    lambda x: np.sin(x)
    
]

for i, dataset in enumerate(result_networks_new_v.keys()):
    if (i < 4):
        shallow_res = result_networks_new_v[dataset]['shallow']['model'].predict(x)
        deep_res = result_networks_new_v[dataset]['deep']['model'].predict(x)
        actual_fun = [actual_functions[i](xi) for xi in x]

        plt.plot(x, shallow_res, "b", label="Shallow")
        plt.plot(x, deep_res, "r", label="Deep")
        plt.plot(x, actual_fun, "g", label="Actual function")
        plt.legend(loc="best")
        plt.show()

In [None]:
import pickle

In [None]:
# Step 2
with open('results_experiments_one.dictionary', 'wb') as config_dictionary_file:
 
    # Step 3
    pickle.dump(result_networks_n, config_dictionary_file)

# Step 2
with open('results_experiments_two.dictionary', 'wb') as config_dictionary_file:
 
    # Step 3
    pickle.dump(result_networks_custom, config_dictionary_file)

# Step 2
with open('results_experiments_three.dictionary', 'wb') as config_dictionary_file:
 
    # Step 3
    pickle.dump(result_networks_new_v, config_dictionary_file)