# Experiments

first we will import the net and the tools we need

In [None]:
from FC import fullyConnectedNN as fc
import numpy as np
from sklearn.model_selection import train_test_split
from experiments import experiment
import itertools


## Data 

##### Import the data

In [None]:
X = np.load('MNIST-data.npy')
y = np.load("MNIST-lables.npy")

##### Prepare the data for the NN

In [None]:
# make the features ready for the net
labels = np.zeros((len(y), 10))
labels[np.arange(len(y)), y] = 1
features = X.reshape((X.shape[0], -1))
input_dim = len(features[0])
output_dim = len(labels[0])

# split to train and test
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# normalize the feature to to the avg of the train
mean = np.mean(np.mean(X_train, axis=0))
X_train = X_train - mean
X_test = X_test - mean

# split the test to validation and test
X_vladition, X_test, y_vladition, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

## learning rate and structure

In this experiments we will try to find the best learning rate for the model

In [None]:
best_score = 0

In [None]:
lrs = [0.5, 0.3, 0.1, 0.05, 0.0]
structures = [[input_dim, 1024, 1024,  output_dim],
             [input_dim, 1024, 256,  output_dim],              
             [input_dim, 1024, 512, 256, output_dim], 
            ]

for lr in lrs:
    for struct in structures:
        exper = experiment(struct, activation='sig', lr=lr, max_epochs=50)
        net, score = exper.run_experiment(X_train, y_train, X_vladition, y_vladition)
        if score > best_score:
            print("----------------------new-best-model--------------------------")
            best_struct = struct
            best_lr = lr
            best_score = score
            best_net = net

In [None]:
lrs = [0.5, 0.3, 0.1, 0.05, 0.0]
structures = [[input_dim, 1024, 1024,  output_dim],
             [input_dim, 1024, 256,  output_dim],              
             [input_dim, 1024, 512, output_dim], 
             [input_dim, 512, 512, output_dim], 
             [input_dim, 512, 256, output_dim],
             [input_dim, 2048, output_dim],
            ]

for lr in lrs:
    for struct in structures:
        exper = experiment(struct, activation='tan_h', lr=lr, max_epochs=50)
        net, score = exper.run_experiment(X_train, y_train, X_vladition, y_vladition)
        if score > best_score:
            print("----------------------new-best-model--------------------------")
            best_struct = struct
            best_lr = lr
            best_score = score
            best_net = net

## Regularization

In this experiment we will try to find the best regularization coef

In [None]:
reg = [10**-4, 10**-3]

best_reg1 = 0
best_reg2 = 0
for reg1 in reg:
    for reg2 in reg:
        exper = experiment(best_struct, lr=best_lr,l1_reg=reg1, l2_reg=reg2, max_epochs=50)
        net, score = exper.run_experiment(X_train, y_train, X_vladition, y_vladition)
        if score > best_score:
            print("----------------------new-best-model--------------------------")
            net.print_net
            best_reg1 = reg1
            best_reg2 = reg2
            best_score = score
            best_net = net

In [None]:
reg = [10**-4, 10**-3]

best_reg1 = 0
best_reg2 = 0
for reg1 in reg:
    for reg2 in reg:
        exper = experiment(best_struct, activation='tan_h', lr=best_lr,l1_reg=reg1, l2_reg=reg2, max_epochs=50)
        net, score = exper.run_experiment(X_train, y_train, X_vladition, y_vladition)
        if score > best_score:
            print("----------------------new-best-model--------------------------")
            net.print_net
            best_reg1 = reg1
            best_reg2 = reg2
            best_score = score
            best_net = net

## Momentum

In this experiment we will try to add momentum to the grad

In [None]:
exper = experiment(best_struct, lr=best_lr,l1_reg=best_reg1, l2_reg=best_reg2, momentum=0.9, max_epochs=50)
net, score = exper.run_experiment(X_train, y_train, X_vladition, y_vladition)
if score > best_score:
    print("----------------------new-best-model--------------------------")
    net.print_net
    best_reg1 = reg1
    best_reg2 = reg2
    best_score = score
    best_net = net

## batch size

In [None]:
batch_sizes = [32, 64, 256]
best_batch = 128
for batch_size in batch_sizes:
    exper = experiment(best_struct, lr=best_lr,l1_reg=best_reg1, l2_reg=best_reg2, momentum=0.9, max_epochs=50, batch_size=batch_size)
    net, score = exper.run_experiment(X_train, y_train, X_vladition, y_vladition)
    score = net.score(X_vladition, np.argmax(y_vladition, axis=1))
    train_score = net.score(X_train, np.argmax(y_train, axis=1))
    if score > best_score:
        print("----------------------new-best-model--------------------------")
        net.print_net
        best_batch = batch_size
        best_score = score
        best_net = net
    

In [None]:
batch_sizes = [32, 64, 256]
best_batch = 128
for batch_size in batch_sizes:
    net = fc(best_struct, activation='tan_h',l1_reg=best_reg1, l2_reg=best_reg2)
    net.train(X_train, y_train, X_vladition, y_vladition, lr=best_lr, epochs = 50, batch_size=batch_size)
    score = net.score(X_vladition, np.argmax(y_vladition, axis=1))
    train_score = net.score(X_train, np.argmax(y_train, axis=1))
    print('--------------------------results------------------------------')
    print(f"validation score={score} train score={train_score}")
    net.print_net()
    print('----------------------experiment-over--------------------------')
    if score > best_score:
        print("----------------------new-best-model--------------------------")
        net.print_net
        best_batch = batch_size
        best_score = score
        best_net = net
    

# changing the learning rate during the epochs

In [None]:
exper = experiment(best_struct, lr=best_lr,l1_reg=best_reg1, l2_reg=best_reg2, max_epochs=50, lr_change=0.9)
net, score = exper.run_experiment(X_train, y_train, X_vladition, y_vladition)
if score > best_score:
    print("----------------------new-best-model--------------------------")
    net.print_net
    best_score = score
    best_net = net

# train the best net

In [None]:
best_net.train(X_train, y_train, X_vladition, y_vladition, epochs=50, lr=best_lr)

In [None]:
best_net.train(X_train, y_train, X_vladition, y_vladition, epochs=50,lr=best_lr*0.5)

In [None]:
best_net.train(X_train, y_train, X_vladition, y_vladition, epochs=50, lr=best_lr*0.1)

In [None]:
best_net.print_net()

In [None]:
best_net.score(X_test, np.argmax(y_test, axis=1))