In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/Othercomputers/Il mio laptop/Machine-Learning-Optimization_new

Mounted at /content/drive
/content/drive/Othercomputers/Il mio laptop/Machine-Learning-Optimization_new


In [2]:
#@title Import and utilities 

from Frank_Wolfe.utils.utils import *
from Frank_Wolfe.DFW import *
from Frank_Wolfe.architectures import *
from Frank_Wolfe.MultiClassHingeLoss import *
!pip install barbar
from barbar import Bar
import os
import pickle

device = "cpu"



You should consider upgrading via the 'C:\Users\Federico Betti\anaconda3\envs\CS439\python.exe -m pip install --upgrade pip' command.


In [3]:
save_stats = True
save_figs = True
load = False

In [4]:
#@title Choose dataset name and architecture of the network 

dataset_name = 'CIFAR10' #@param ['CIFAR10', 'CIFAR100']
model_type = 'DenseNet' #@param ['DenseNet', 'WideResNet', 'GoogLeNet', 'ResNeXt']
if model_type == 'GoogLeNet':
    model = GoogleNet(num_class=10 if dataset_name == 'CIFAR10' else 100)
elif model_type == 'DenseNet':
    model = torchvision.models.densenet121(pretrained=False)
elif model_type == 'ResNeXt':
    model = torchvision.models.resnet101(pretrained=False)
elif model_type == 'WideResNet':
    model =  WideResNet(num_classes=10 if dataset_name == 'CIFAR10' else 100)
else:
    raise ValueError("Please, select an available architecture")

datasetDict = setDatasetAttributes(dataset_name)
trainTransformDict, testTransformDict = setTrainAndTest(dataset_name)

root = f"{dataset_name}-dataset"

trainData = datasetDict['datasetDict'](root=root, train=True, download=True,
                                            transform=trainTransformDict[dataset_name])
testData = datasetDict['datasetDict'](root=root, train=False,
                                        transform=testTransformDict[dataset_name])

model = model.to(device="cuda:0")

# define the loss object
loss_criterion = MultiClassHingeLoss().to(device="cuda:0")

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to CIFAR10-dataset\cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting CIFAR10-dataset\cifar-10-python.tar.gz to CIFAR10-dataset


AssertionError: Torch not compiled with CUDA enabled

In [None]:
#@title Choose optimizer and parameters 

optimizer_name = "DFW" #@param  ['DFW', 'Adam', 'AdaGrad', 'SGD with momentum', 'SGD']
momentum = 0.9 #@param {type:"number"}
lr = 0.001 #@param {type:"number"}
eta = 0.1 #@param {type:"number"}
beta_1 = 0.9 #@param {type:"number"}
beta_2 = 0.99 #@param {type:"number"}
weight_decay = 0.01 #@param {type:"number"}

if optimizer_name == "DFW":
    optimizer = DFW(params=model.parameters(), eta=eta, momentum=momentum, 
                    prox_steps=1)
    assert eta > 0
elif optimizer_name == "SGD" or optimizer_name == "SGD with momentum":
    optimizer = torch.optim.SGD(params=model.parameters(), lr=lr,
                              momentum=momentum, weight_decay=weight_decay)
    assert lr > 0
    assert 0 <= momentum <= 1
elif optimizer_name == "Adam":
    optimizer = torch.optim.Adam(params=model.parameters(), lr=lr, 
                               betas=(beta_1, beta_2), weight_decay=weight_decay)
elif optimizer_name == "AdaGrad":
    optimizer = torch.optim.Adam(params=model.parameters(), lr=lr, weight_decay=weight_decay)

In [None]:
#@title Train the network  

train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []
epochs_times = []

nepochs = 10 #@param {type:"integer"}
batch_size = 64  #@param {type:"integer"}
verbose = 0 #@param [0, 1]

# Loaders
trainLoader = torch.utils.data.DataLoader(trainData, batch_size=batch_size, shuffle=True,
                                      pin_memory=torch.cuda.is_available(), num_workers=2)
testLoader = torch.utils.data.DataLoader(testData, batch_size=batch_size, shuffle=False,
                                      pin_memory=torch.cuda.is_available(), num_workers=2)

# initialize some necessary metrics objects
train_loss, train_accuracy = AverageMeter(), AverageMeter()
test_loss, test_accuracy = AverageMeter(), AverageMeter()

# function to reset metrics
def reset_metrics():
    train_loss.reset()
    train_accuracy.reset()
    test_loss.reset()
    test_accuracy.reset()

@torch.no_grad()
def evaluate_model(data="train"):
    if data == "train":
        loader = trainLoader
        mean_loss, mean_accuracy = train_loss, train_accuracy
    elif data == "test":
        loader = testLoader
        mean_loss, mean_accuracy = test_loss, test_accuracy
    
    sys.stdout.write(f"Evaluation of {data} data:\n")
    for x_input, y_target in Bar(loader):
        x_input, y_target = x_input.to(device="cuda:0"), y_target.to(device="cuda:0")
        output = model.eval()(x_input)
        loss = loss_criterion(output, y_target)
        mean_loss(loss.item(), len(y_target))
        mean_accuracy(categorical_accuracy(y_true=y_target, output=output), len(y_target))

for epoch in range(nepochs + 1):
    start = time.time()
    reset_metrics()
    sys.stdout.write(f"\n\nEpoch {epoch}/{nepochs}\n")
    if epoch == 0:
        # Just evaluate the model once to get the metrics
        evaluate_model(data='train')
    else:
        # Train
        sys.stdout.write(f"Training:\n")
        for x_input, y_target in Bar(trainLoader):
            x_input, y_target = x_input.to(device="cuda:0"), y_target.to(device="cuda:0")
            optimizer.zero_grad()  # Zero the gradient buffers
            output = model.train()(x_input)
            loss = loss_criterion(output, y_target)
            loss.backward()  # Backpropagation
            if optimizer_name == "DFW":
              optimizer.step(lambda: float(loss), model, loss_criterion, x_input, y_target)
            else:
              optimizer.step() 
            train_loss(loss.item(), len(y_target))
            train_accuracy(categorical_accuracy(y_true=y_target, output=output), len(y_target))

    evaluate_model(data='test')
    sys.stdout.write(f"\n Finished epoch {epoch}/{nepochs}: Train Loss {train_loss.result()} | Test Loss {test_loss.result()} | Train Acc {train_accuracy.result()} | Test Acc {test_accuracy.result()}\n")

    train_losses.append(train_loss.result())
    train_accuracies.append(train_accuracy.result())
    test_losses.append(test_loss.result())
    test_accuracies.append(test_accuracy.result())


    elapsed_time = time.time()-start
    sys.stdout.write(f"Time elapsed for the current epoch {elapsed_time}")
    epochs_times.append(elapsed_time)



Epoch 0/10
Evaluation of train data:
Evaluation of test data:

 Finished epoch 0/10: Train Loss 1.0318777755355835 | Test Loss 1.0318778211593629 | Train Acc 0.1 | Test Acc 0.1
Time elapsed for the current epoch 60.96457648277283

Epoch 1/10
Training:
Evaluation of test data:

 Finished epoch 1/10: Train Loss 1.183716427268982 | Test Loss 1.4749269319534302 | Train Acc 0.31122 | Test Acc 0.4166
Time elapsed for the current epoch 240.85318732261658

Epoch 2/10
Training:
Evaluation of test data:

 Finished epoch 2/10: Train Loss 0.8551070518112183 | Test Loss 0.8091922302246094 | Train Acc 0.5443 | Test Acc 0.5908
Time elapsed for the current epoch 241.20625829696655

Epoch 3/10
Training:
Evaluation of test data:

 Finished epoch 3/10: Train Loss 0.621567981185913 | Test Loss 0.5698805132865906 | Train Acc 0.69824 | Test Acc 0.7381
Time elapsed for the current epoch 238.17877006530762

Epoch 4/10
Training:
Evaluation of test data:

 Finished epoch 4/10: Train Loss 0.4562594448661804 | 

In [None]:
#@title Save training results and plot

if load:
    output_folder = os.path.join(os.getcwd(), 'results')
    os.makedirs(output_folder, exist_ok=True)
    fname = output_folder + '/stats_dict_' + model_type + '.pkl'
    with open(fname, 'rb') as handle:
        stats_dict = pickle.load(handle)

results = {'epochs': nepochs, 'train_losses': train_losses, 
           'train_acc': train_accuracies, 'test_losses': test_losses, 
           'test_acc': test_accuracies, 'elapsed_time': elapsed_time}
stats_dict = {}
stats_dict.update({optimizer_name: results})

# save everything onto file
if save_stats: 
    output_folder = os.path.join(os.getcwd(), 'results')  # set the folder
    os.makedirs(output_folder, exist_ok=True)
    fname = output_folder + '/stats_dict_' + model_type + '.pkl'
    with open(fname, 'wb') as handle:
        pickle.dump(stats_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Parameters used in the report

In order to reproduce our results, the following set of parameters should be used.\
If not specified, the remaining parameters (e.g. $\epsilon$ for Adam and Adagrad) are set to their default values.

Deep Frank Wolfe:\
$η = 0.1$, $μ = 0.9$, $w_d = 0$

Stochastic Gradient Descent:\
$\gamma = 0.001$, $\mu = 0.9$, $w_d = 0$

Adam:\
$\gamma = 0.001$, $\mu = 0.9$, $\beta_1 = 0.9$, $\beta_2 = 0.99$

AdaGrad:\
$\gamma = 0.001$, $w_d = 0$
