In [None]:
import copy
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import torchvision
from torchvision.transforms import ToTensor, Normalize, Compose

In [None]:
from scripts.architecture import MLP, MLPManual
from scripts.train import *
from scripts.plot_utils import plot_loss_accuracy, plotValAccuracy, fillSubplot

In [None]:
print(torch.__version__)
print(np.__version__)

## Create Parity Data Iterator

In [None]:
transforms = Compose([
    ToTensor(),
    Normalize((0.1307,), (0.3081,))
])

In [None]:
# doesn't perform and transformation until we call the loader
trainset = torchvision.datasets.MNIST(root='data', train=True, download=True, transform=transforms)
testset = torchvision.datasets.MNIST(root='data', train=False, download=True, transform=transforms)

In [None]:
learn_rate = 0.05
num_epochs = 20
batch_size = 128
loss_type = "Binary Cross Entropy"
loss_fn = torch.nn.BCELoss()
B_initialization = "uniform"
optim = "SGD"
momentum, nesterov_momentum = False, False

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

### For k = 1

In [None]:
k=1
model = MLP(k, "ReLU", loss_type)
optimizer = torch.optim.Adadelta(model.parameters(), lr=learn_rate, weight_decay = 0.001)

trainLostList_Ada1, trainAccList_Ada1, valLossList_Ada1, valAccList_Ada1  = train_model(model, k, trainset, testset, loss_type, loss_fn, optimizer, num_epochs, batch_size, validate_model = True, performance=accuracy, device=device, lr_scheduler=None, updateWManually=False)

plot_loss_accuracy(trainLostList_Ada1,valLossList_Ada1,trainAccList_Ada1,valAccList_Ada1,num_epochs)

In [None]:
model2 = MLP(k, "ReLU", loss_type)
optimizer = torch.optim.SGD(model2.parameters(), lr=learn_rate, weight_decay=0.001)

trainLostList_sgd1, trainAccList_sgd1, valLossList_sgd1, valAccList_sgd1  = train_model(model2, k, trainset, testset, loss_type, loss_fn, optimizer, num_epochs, batch_size, validate_model = True, performance=accuracy, device=device,lr = learn_rate, lr_scheduler=None, updateWManually=False)

In [None]:
modelManual = MLPManual(k, learn_rate, loss_type, "BP", None, optim, device, False)

trainLostList_sgd1_scratch, trainAccList_sgd1_scratch, \
valLossList_sgd1_scratch, valAccList_sgd1_scratch  = train_model_manually(modelManual, k, trainset, testset, loss_type, loss_fn, num_epochs, batch_size, momentum,
                                                                         nesterov_momentum, validate_model = True, device=device)

In [None]:
modelManualDFA = MLPManual(k, learn_rate, loss_type, "DFA", B_initialization, optim, device, False)
trainLostList_sgd1_dfa, trainAccList_sgd1_dfa, \
valLossList_sgd1_dfa, valAccList_sgd1_dfa  =  train_model_manually(modelManualDFA, k, trainset, testset, loss_type, loss_fn, num_epochs, batch_size, momentum,
                                                                         nesterov_momentum, validate_model = True, device=device)

In [None]:
plt.figure(figsize=(15,8))
plt.ylim(0.5,1)
plt.plot(valAccList_sgd1, label="SGD")
plt.plot(valAccList_Ada1, label="Adadelta")
plt.plot(valAccList_sgd1_scratch, label= "SGD BP Scratch")
plt.plot(valAccList_sgd1_dfa, label= "SGD DFA Scratch")
plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
plt.legend();


### For k = 3

In [None]:
k = 3
model3 = MLP(k,"ReLU", loss_type)
optimizer = torch.optim.Adadelta(model3.parameters(), lr=learn_rate, weight_decay=0.001)

trainLostList_Ada3, trainAccList_Ada3, \
valLossList_Ada3, valAccList_Ada3  = train_model(model3, k, trainset, testset, loss_type, loss_fn, optimizer, num_epochs, batch_size, validate_model = True, performance=accuracy, device=device, lr_scheduler=None)

plot_loss_accuracy(trainLostList_Ada3,valLossList_Ada3,trainAccList_Ada3,valAccList_Ada3,num_epochs)

In [None]:
model4 = MLP(k, "ReLU", loss_type)
optimizer = torch.optim.SGD(model4.parameters(), lr=learn_rate, weight_decay=0.001)

trainLostList_sgd3, trainAccList_sgd3, valLossList_sgd3, valAccList_sgd3  = train_model(model4, k, trainset, testset, loss_type, loss_fn, optimizer, num_epochs, batch_size, validate_model = True, performance=accuracy, device=device, lr_scheduler=None)

In [None]:
modelManual3 = MLPManual(k, learn_rate, loss_type, "BP", None, optim, device, False)
trainLostList_sgd3_scratch, trainAccList_sgd3_scratch, \
valLossList_sgd3_scratch, valAccList_sgd3_scratch  = train_model_manually(modelManual3, k, trainset, testset, loss_type, loss_fn, num_epochs, batch_size, momentum,
                                                                         nesterov_momentum, validate_model = True, device=device)

In [None]:
learn_rate = 0.02 # one of the best lr that I got for uniform B, with 0.05 training didn't perform well
modelManual3DFA = MLPManual(k, learn_rate, loss_type, "DFA", B_initialization, optim, device, False)
trainLostList_sgd3_dfa, trainAccList_sgd3_dfa, \
valLossList_sgd3_dfa, valAccList_sgd3_dfa  = train_model_manually(modelManual3DFA, k, trainset, testset, loss_type, loss_fn, num_epochs, batch_size, momentum,
                                                                         nesterov_momentum, validate_model = True, device=device)

In [None]:
plt.figure(figsize=(15,8))
plt.ylim(0.4,1)
plt.plot(valAccList_sgd3, label="SGD")
plt.plot(valAccList_Ada3, label="Adadelta")
plt.plot(valAccList_sgd3_scratch, label= "SGD BP Scratch")
plt.plot(valAccList_sgd3_dfa, label= "SGD DFA Scratch")
plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
plt.grid(True)
plt.legend();

In [None]:
fig, (ax1, ax3) = plt.subplots(1, 2, figsize=(16,8))

ax1.plot(range(1,21),valAccList_sgd1, color = "blue", label = "SGD BP Pytorch") 
ax1.plot(range(1,21),valAccList_Ada1, color = "green", label = "Adadelta BP Pytorch")
ax1.plot(range(1,21),valAccList_sgd1_scratch, color = "orange", label = "SGD BP Pytorch") 
ax1.plot(range(1,21),valAccList_sgd1_dfa, color = "red", label = "SGD DFA Pytorch")
ax1.set_ylim(0.40,1.05)
ax1.set_title("Test Accuracy k=1")
ax1.set_xlabel("Iteration")
ax1.set_ylabel("Accuracy")
ax1.set_xticks(range(1,21))
ax1.legend()
ax1.grid(True)


ax3.plot(range(1,21),valAccList_sgd3, color = "blue", label = "SGD BP Pytorch")
ax3.plot(range(1,21),valAccList_Ada3, color = "green", label = "Adadelta BP Pytorch")
ax3.plot(range(1,21),valAccList_sgd3_scratch, color = "orange", label = "SGD BP Dogan")
ax3.plot(range(1,21),valAccList_sgd3_dfa, color = "red", label = "SGD DFA Pytorch")
ax3.set_ylim(0.40,1.05)
ax3.set_title("Test Accuracy k=3")
ax3.set_xlabel("Iteration")
ax3.set_ylabel("Accuracy")
ax3.set_xticks(range(1,21))
ax3.legend()
ax3.grid(True)

### Try with the same weights (SGD BP Pytorch vs SGD BP Dogan)

In [None]:
k = 3
modelx = MLP(k, "ReLU", loss_type).to(device)

w1 = copy.deepcopy(modelx.state_dict()["layer1.weight"]).to(device)
w2 = copy.deepcopy(modelx.state_dict()["layer2.weight"]).to(device)

optimizer = torch.optim.SGD(modelx.parameters(), lr=learn_rate)

trainLostList_sgd3_w, trainAccList_sgd3_w, valLossList_sgd3_w, valAccList_sgd3_w  = train_model(modelx, k, trainset, testset, loss_type, loss_fn, optimizer, num_epochs, batch_size, validate_model = True, performance=accuracy, device=device, lr=learn_rate, lr_scheduler=None, updateWManually=True)

In [None]:
modelManualx = MLPManual(k, learn_rate, loss_type, "BP", None, optim, device, (w1.t(),w2.t()))
trainLostList_sgd3_scratch_w, trainAccList_sgd3_scratch_w, \
valLossList_sgd3_scratch_w, valAccList_sgd3_scratch_w  = train_model_manually(modelManualx, k, trainset, testset, loss_type, loss_fn, num_epochs, batch_size, momentum,
                                                                         nesterov_momentum, validate_model = True, device=device)

In [None]:
plt.figure(figsize=(15,8))
plt.plot(range(1,21),valAccList_sgd3_w, color = "blue", label = "BP SGD Pytorch")
plt.plot(range(1,21),valAccList_sgd3_scratch_w, color = "green", label = "BP SGD Dogan")

plt.ylim(0.4,1.05)
plt.title("Test Accuracy k=3")
plt.xlabel("Iteration")
plt.ylabel("Accuracy")
plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
plt.legend()
plt.grid(True)

plt.savefig("plots/doganVSPytorch.png")

plt.show();

# They are gonna be different, because I recreate the data every epoch
# Even without recreating, results are different?

### DFA Experiments

In [None]:
# run DFA up to 100 epochs to see if we get similar result as BP
k=3
learn_rate = 0.01
modelManual4 = MLPManual(k, learn_rate, loss_type, "DFA", B_initialization, optim, device, False)
trainLostList_sgad4_sacratch, trainAccList_sgd4_scratch, \
valLossList_sgd4_scratch, valAccList_sgd4_scratch  = train_model_manually(modelManual4, k, trainset, testset, loss_type, loss_fn, 100, batch_size, momentum,
                                                                         nesterov_momentum, weight_decay, validate_model = True, device=device)
plt.figure(figsize=(16,8))                                                                          
plotValAccuracy(valAccList_sgd4_scratch, 100, "DFA Validation", 3)

In [None]:
# have plot for same learning rates (same lr array for each random matrix)
num_epochs = 20
K = 3
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(16,8))
lr_rates = np.linspace(0.001, 0.05, 5)
for init in ["standard uniform", "uniform", "standard gaussian", "gaussian"]:
    for lr in lr_rates:
        if init == "standard uniform":
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                                          batch_size, momentum, nesterov_momentum, 
                                                                          validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax1, init)
        elif init == "uniform":
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                                          batch_size, momentum, nesterov_momentum, 
                                                                          validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax2, init)
        elif init == "standard gaussian":
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                                          batch_size, momentum, nesterov_momentum, 
                                                                          validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax3, init)
        elif init == "gaussian":
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                                          batch_size, momentum, nesterov_momentum, 
                                                                          validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax4, init)

plt.savefig("plots/InitsWSamelrs.png")

In [None]:
def tuneLearningRate(lr_array : np.array, training_method: str, init_B: str, optim: str, k:int, loss_type):
    listofValAcc = []
    for learning_rate in lr_array:
        model = MLPManual(k, learning_rate, loss_type, training_method, init_B, optim, device)
        trainLostListLoc, trainAccListLoc, valLossListLoc, valAccListLoc = train_model_manually(model, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                                                                batch_size, momentum, nesterov_momentum, 
                                                                                                validate_model = True, device=device)
        last5 = valAccListLoc[-10:]
        meanOfLast5 = sum(last5) / len(last5)
        listofValAcc.append(meanOfLast5)
        listofValAccnp = np.array(listofValAcc)
        idx = np.argsort(listofValAccnp)
        best_lr = lr_array[idx][-1]
    
    print("Best learning rate is: ", best_lr)
    return best_lr

In [None]:
# Tune their learning rates to get best one, it is done by checking the last 10 val Accuracy
k=3
lr_array_uni = np.linspace(0.01, 0.025, 6)
lr_array_std_uni = np.linspace(0.0015, 0.0035, 6)
lr_array_gauss = np.linspace(0.01, 0.02, 6)
lr_array_std_gauss = np.linspace(0.0005, 0.001, 6)
lr_array_bp = np.linspace(0.05, 0.3, 6)

best_lr_uni = tuneLearningRate(lr_array_uni, "DFA", "uniform", optim, k, loss_type)
best_lr_std_uni = tuneLearningRate(lr_array_std_uni, "DFA", "standard uniform", optim, k, loss_type)
best_lr_gaussian = tuneLearningRate(lr_array_gauss, "DFA", "gaussian", optim, k, loss_type)
best_lr_std_gaussian = tuneLearningRate(lr_array_std_gauss, "DFA", "standard gaussian", optim, k, loss_type)
best_lr_bp = tuneLearningRate(lr_array_bp, "BP", None, optim, k, loss_type)

In [None]:
# tuning these parameters take too much time, so let's run once and store them statically
best_lr_uni = 0.022
best_lr_std_uni = 0.003 
best_lr_gaussian = 0.016
best_lr_std_gaussian = 0.001
best_lr_bp = 0.05

In [None]:
# Tune their learning rates to get best one, it is done by checking the last 10 val Accuracy
k=3
lr_array_uni = np.linspace(0.01, 0.025, 6)
lr_array_std_uni = np.linspace(0.0015, 0.0035, 6)
lr_array_gauss = np.linspace(0.01, 0.02, 6)
lr_array_std_gauss = np.linspace(0.001, 0.0002, 6)
lr_array_bp = np.linspace(0.05, 0.3, 6)

best_lr_adagrad = tuneLearningRate(lr_array_uni, "DFA", "uniform", optim, k, loss_type)
best_lr_rmsprop = tuneLearningRate(lr_array_std_uni, "DFA", "uniform", optim, k, loss_type)
best_lr_adam = tuneLearningRate(lr_array_gauss, "DFA", "gaussian", k, optim, loss_type)
best_lr_std_gaussian = tuneLearningRate(lr_array_std_gauss, "DFA", "standard gaussian", optim, k, loss_type)
best_lr_bp = tuneLearningRate(lr_array_bp, "BP", None, k, loss_type)

In [None]:

# have a plot with different B initialization methods with 20 epochs with their best lr
initializations = ["standard uniform", "uniform", "standard gaussian", "gaussian", "BP"]
num_epochs=20
K=3
optim = "SGD"

fig = plt.figure(figsize=(15,9))
for ini in initializations:
    print(ini)
    if ini == "uniform":
        modelManualx = MLPManual(K, best_lr_uni, loss_type, "DFA", ini, optim, device, False)
    elif ini == "standard uniform":
        modelManualx = MLPManual(K, best_lr_std_uni, loss_type, "DFA", ini, optim, device, False)
    elif ini == "gaussian":
        modelManualx = MLPManual(K, best_lr_gaussian, loss_type, "DFA", ini, optim, device, False)
    elif ini == "standard gaussian":
        modelManualx = MLPManual(K, best_lr_std_gaussian, loss_type, "DFA", ini, optim, device, False)
    else:
        modelManualx = MLPManual(K, best_lr_bp, loss_type, "BP", None, optim, device, False)
    trainLostList, trainAccList, \
    valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, weight_decay, 
                                                            validate_model = True, device=device)
    plotValAccuracy(valAccList, num_epochs, ini, K)

plt.savefig("plots/randomBInit.png")

In [None]:
# have plot for different learning rates (different lr_array for each random matrix)
num_epochs = 20
K = 3
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(16,8))
lr_array_uni = np.linspace(0.01, 0.025, 6)
lr_array_std_uni = np.linspace(0.0015, 0.0035, 6)
lr_array_gauss = np.linspace(0.01, 0.02, 6)
lr_array_std_gauss = np.linspace(0.001, 0.0002, 6)
lr_array_bp = np.linspace(0.05, 0.3, 6)

for init in ["standard uniform", "uniform", "standard gaussian", "gaussian"]:
    if init == "standard uniform":
        for lr in lr_array_std_uni:
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax1, init)
    elif init == "uniform":
        for lr in lr_array_uni:
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax2, init)
    elif init == "standard gaussian":
        for lr in lr_array_std_gauss:
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax3, init)
    elif init == "gaussian":
        for lr in lr_array_gauss:
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, weight_decay, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax4, init)

plt.savefig("plots/InitsWDifferentlrs.png")

### Optimizers Experiments

In [None]:
# have plot for same learning rates (same lr array for each random matrix)
init = "uniform"
num_epochs = 20
K = 3
fig, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(1, 5, figsize=(16,8))
lr_rates = np.linspace(0.005, 0.025, 6)
for optim in ["SGD", "Adagrad", "Adadelta", "RMSProp", "Adam"]:
    for lr in lr_rates:
        if optim == "SGD":
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax1, optim)
        elif optim == "Adagrad":
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax2, optim)
        elif optim == "Adadelta":
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax3, optim)
        elif optim == "RMSProp":
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax4, optim)
        else: # ADAM
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax5, optim)


plt.savefig("plots/OptimsWSamelrs.png")

In [None]:
# Tune their learning rates to get best one, it is done by checking the last 10 val Accuracy
k=3
lr_array_adagrad = np.linspace(0.001, 0.005, 6)
lr_array_adadelta = np.linspace(0.1, 1, 6)
lr_array_rmsprop = np.linspace(0.0001, 0.0003, 6)
lr_array_adam = np.linspace(0.0001, 0.001, 6)

best_lr_adagrad = tuneLearningRate(lr_array_adagrad, "DFA", init, "Adagrad", k, loss_type)
best_lr_adadelta = tuneLearningRate(lr_array_adadelta, "DFA", init, "Adadelta", k, loss_type)
best_lr_rmsprop = tuneLearningRate(lr_array_rmsprop, "DFA", init, "RMSProp", k, loss_type)
best_lr_adam = tuneLearningRate(lr_array_adam, "DFA", init, "Adam", k, loss_type)

In [None]:
best_lr_SGD = 0.02
best_lr_adagrad = 0.0042
best_lr_adadelta = 1
best_lr_rmsprop = 0.0001
best_lr_adam = 0.001

In [None]:
# have a plot with different B initialization methods with 20 epochs with their best lr
optims = ["SGD", "SGD Momentum", "SGD Nesterov", "Adagrad", "Adadelta", "RMSProp", "Adam"]
num_epochs=20
K=3
init = "uniform"

fig = plt.figure(figsize=(15,9))
for optim in optims:
    if optim == "SGD":
        modelManualx = MLPManual(K, best_lr_SGD, loss_type, "DFA", init, optim, device, False)
    elif optim == "Adagrad":
        modelManualx = MLPManual(K, best_lr_adagrad, loss_type, "DFA", init, optim, device, False)
    elif optim == "Adadelta":
        modelManualx = MLPManual(K, best_lr_adadelta, loss_type, "DFA", init, optim, device, False)
    elif optim == "RMSPRop":
        modelManualx = MLPManual(K, best_lr_rmsprop, loss_type, "DFA", init, optim, device, False)
    elif optim == "Adam":
        modelManualx = MLPManual(K, best_lr_adam, loss_type, "DFA", init, optim, device, False)
    else:
        modelManualx = MLPManual(K, best_lr_adam, loss_type, "BP", None, optim, device, False)

    trainLostList, trainAccList, \
    valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, weight_decay, 
                                                            validate_model = True, device=device)
    plotValAccuracy(valAccList, num_epochs, ini, K)

plt.savefig("plots/randomBOptims.png")

In [None]:
# have plot for different learning rates (different lr_array for each random matrix)
num_epochs = 20
init = "uniform"
K = 3
fig, (ax1, ax2, ax3, ax4, ax5, ax6, ax7) = plt.subplots(1, 7, figsize=(16,8))
lr_array_sgd = np.linspace(0.015, 0.035, 6)
lr_array_adagrad = np.linspace(0.001, 0.005, 6)
lr_array_adadelta = np.linspace(0.1, 1, 6)
lr_array_rmsprop = np.linspace(0.0001, 0.0003, 6)
lr_array_adam = np.linspace(0.001, 0.005, 6)

for optim in ["SGD", "SGD Momentum", "SGD Nesterov", "Adagrad", "Adadelta", "RMSProp", "Adam"]:
    if optim == "SGD":
        for lr in lr_array_sgd:
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax1, optim)

    elif "Momentum" in optim:
        for lr in lr_array_sgd:
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, "SGD", device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, 0.9, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax2, optim)
    
    elif "Nesterov" in optim:
        for lr in lr_array_sgd:
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, "SGD", device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, 0.9, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax3, optim)

    elif optim == "Adagrad":
        for lr in lr_array_adagrad:
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax4, optim)
    elif optim == "Adadelta":
        for lr in lr_array_adadelta:
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax5, optim)
    elif optim == "RMSProp":
        for lr in lr_array_rmsprop:
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax6, optim)
    else:
        for lr in lr_array_adam:
            modelManualx = MLPManual(K, lr, loss_type, "DFA", init, optim, device, False)
            trainLostList, trainAccList, \
            valLossList, valAccList  = train_model_manually(modelManualx, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                            batch_size, momentum, nesterov_momentum, 
                                                            validate_model = True, device=device)
            fillSubplot(valAccList, num_epochs, str(round(lr,4)), ax7, optim)


plt.savefig("plots/OptimsWDifferentlrs.png")

### Main Experiment

In [None]:
# Add Lazy methods
learn_rate = 0.05
K = 3
num_epochs = 20
loss_type = "Binary Cross Entropy"

fig = plt.figure(figsize=(15,9))
for activation in ["Adadelta", "NTK", "Gaussian features", "ReLU features", "Linear features", "SGD", "SGD Dogan", "DFA"]:
    if activation != "SGD Dogan" and activation != "DFA":
        model = MLP(K, activation, loss_type)
        if "features" in activation:
            # deactivate the first layer
            optimizer = torch.optim.Adadelta(model.layer2.parameters(), lr = learn_rate, weight_decay=0.001)
        elif "NTK" in activation:
            paramsToUpdate = list(model.layer1.parameters()) + list(model.layer2.parameters())
            optimizer = torch.optim.Adadelta(paramsToUpdate, lr = learn_rate, weight_decay=0.001)
        elif "SGD" in activation:
            optimizer = torch.optim.SGD(model.parameters(), lr = learn_rate, weight_decay=0.001)
        else:
            optimizer = torch.optim.Adadelta(model.parameters(), lr = learn_rate, weight_decay=0.001)

        print("Activation:",activation)

        trainLostList, trainAccList, valLossList, valAccList  = train_model(model, K, trainset, testset, loss_type, loss_fn, optimizer, num_epochs,
                                                                            batch_size, validate_model = True, performance=accuracy,
                                                                            device="cuda:0", lr_scheduler=None)
    else:
        #print("Activation:",activation)
        if activation == "SGD Dogan":
            modelManual3 = MLPManual(K, learn_rate, loss_type, "BP", None, "SGD", device, False)

        else:
            learn_rate_dfa = 0.02
            modelManual3 = MLPManual(K, learn_rate_dfa, loss_type, activation, init, optim, device, False)

        trainLostList, trainAccList, valLossList, valAccList  = train_model_manually(modelManual3, K, trainset, testset, loss_type, loss_fn, num_epochs,
                                                                                    batch_size, momentum, nesterov_momentum, 
                                                                                    validate_model = True, device=device)


    plotValAccuracy(valAccList, num_epochs, activation, K)

fig.savefig("plots/" + str(K) + "valAccuracy.png")
plt.show()
dataset = MNISTParity(trainset, K, 128)
dataset.plotRandomData()

# TODO: Tune all the models including the lazy ones, for learning_rate and weight_decay