In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/Othercomputers/Il mio laptop/Machine-Learning-Optimization

Mounted at /content/drive
/content/drive/Othercomputers/Il mio laptop/Machine-Learning-Optimization


In [None]:
from Frank_Wolfe.utils.utils import *
from Frank_Wolfe.SFW import *
from Frank_Wolfe.constraints.constraints import *
from Frank_Wolfe.architectures import *
!pip install barbar
from barbar import Bar
import os
import pickle

device = is_cuda_available()

Collecting barbar
  Downloading barbar-0.2.1-py3-none-any.whl (3.9 kB)
Installing collected packages: barbar
Successfully installed barbar-0.2.1


In [None]:
save_stats = True
save_figs = True

In [None]:
#@title Choose dataset name and architecture of the network 

dataset_name = 'CIFAR10' #@param ['CIFAR10', 'CIFAR100']
model_type = 'GoogLeNet' #@param ['DenseNet', 'WideResNet', 'GoogLeNet', 'ResNeXt']
if model_type == 'GoogLeNet':
  model = GoogleNet(num_class=10 if dataset_name == 'CIFAR10' else 100)
elif model_type == 'DenseNet':
  model = torchvision.models.densenet121(pretrained=False)
elif model_type == 'ResNeXt':
  model = torchvision.models.resnet101(pretrained=False)
elif model_type == 'WideResNet':
  model =  WideResNet(num_classes=10 if dataset_name == 'CIFAR10' else 100)
else:
  raise ValueError("Please, select an available architecture")

datasetDict = setDatasetAttributes(dataset_name)
trainTransformDict, testTransformDict = setTrainAndTest(dataset_name)

root = f"{dataset_name}-dataset"

trainData = datasetDict['datasetDict'](root=root, train=True, download=True,
                                            transform=trainTransformDict[dataset_name])
testData = datasetDict['datasetDict'](root=root, train=False,
                                        transform=testTransformDict[dataset_name])

Files already downloaded and verified


In [None]:
#@title Choosing Lp-Norm constraints

ord =  2 #@param [1, 2, 5, 'inf']
ord = float(ord)
value = 10 #@param {type:"number"}
mode = 'initialization' #@param ['initialization', 'radius', 'diameter']

assert value > 0

In [None]:
#@title Configuring the Frank-Wolfe Algorithm
#@markdown Choose momentum and learning rate rescaling, see Section 3.1 of [arXiv:2010.07243](https://arxiv.org/pdf/2010.07243.pdf).
momentum = 0.9 #@param {type:"number"}
rescale = 'gradient' #@param ['gradient', 'diameter', 'None']
rescale = None if rescale == 'None' else rescale

#@markdown Choose a learning rate for SFW. You can activate the learning rate scheduler which automatically multiplies the current learning rate by `lr_decrease_factor` every `lr_step_size epochs`
learning_rate = 0.1 #@param {type:"number"}
lr_scheduler_active = True #@param {type:"boolean"}
lr_decrease_factor = 0.1 #@param {type:"number"}
lr_step_size = 60 #@param {type:"integer"}

#@markdown You can also enable retraction of the learning rate, i.e., if enabled the learning rate is increased and decreased automatically depending on the two moving averages of different length of the train loss over the epochs.
retraction = True #@param {type:"boolean"}

assert learning_rate > 0
assert 0 <= momentum <= 1
assert lr_decrease_factor > 0
assert lr_step_size > 0

# Select optimizer
optimizer = SFW(params=model.parameters(), learning_rate=learning_rate, momentum=momentum, rescale=rescale)

In [None]:
different_norms_dict_other = {}

### We analyse now the dependence of the performance on the choice of the norm for the constraint, for a fixed $value$

In [None]:
train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []
epochs_times = []

nepochs = 10
batch_size = 64

# check cuda availability
device = is_cuda_available()

constraints = create_lp_constraints(model, ord=ord, value=value, mode=mode)

make_feasible(model, constraints)

# define the loss object
loss_criterion = torch.nn.CrossEntropyLoss().to(device=device)
model = model.to(device=device)

# Loaders
trainLoader = torch.utils.data.DataLoader(trainData, batch_size=batch_size, shuffle=True,
                                      pin_memory=torch.cuda.is_available(), num_workers=2)
testLoader = torch.utils.data.DataLoader(testData, batch_size=batch_size, shuffle=False,
                                      pin_memory=torch.cuda.is_available(), num_workers=2)

# initialize some necessary metrics objects
train_loss, train_accuracy = AverageMeter(), AverageMeter()
test_loss, test_accuracy = AverageMeter(), AverageMeter()

if lr_scheduler_active:
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=lr_step_size,
                                                gamma=lr_decrease_factor)

if retraction:
    retractionScheduler = RetractionLR(optimizer=optimizer)

# function to reset metrics
def reset_metrics():
    train_loss.reset()
    train_accuracy.reset()
    test_loss.reset()
    test_accuracy.reset()

@torch.no_grad()
def evaluate_model(data="train"):
    if data == "train":
        loader = trainLoader
        mean_loss, mean_accuracy = train_loss, train_accuracy
    elif data == "test":
        loader = testLoader
        mean_loss, mean_accuracy = test_loss, test_accuracy

    sys.stdout.write(f"Evaluation of {data} data:\n")
    for x_input, y_target in Bar(loader):
        x_input, y_target = x_input.to(device), y_target.to(device)  # Move to CUDA if possible
        output = model.eval()(x_input)
        loss = loss_criterion(output, y_target)
        mean_loss(loss.item(), len(y_target))
        mean_accuracy(Utilities.categorical_accuracy(y_true=y_target, output=output), len(y_target))


for epoch in range(nepochs + 1):
    start = time.time()
    reset_metrics()
    sys.stdout.write(f"\n\nEpoch {epoch}/{nepochs}\n")
    if epoch == 0:
        # Just evaluate the model once to get the metrics
        evaluate_model(data='train')
    else:
        # Train
        sys.stdout.write(f"Training:\n")
        for x_input, y_target in Bar(trainLoader):
            x_input, y_target = x_input.to(device), y_target.to(device)  # Move to CUDA if possible
            optimizer.zero_grad()  # Zero the gradient buffers
            output = model.train()(x_input)
            loss = loss_criterion(output, y_target)
            loss.backward()  # Backpropagation
            optimizer.step(constraints=constraints)
            train_loss(loss.item(), len(y_target))
            train_accuracy(Utilities.categorical_accuracy(y_true=y_target, output=output), len(y_target))

        if lr_scheduler_active:
            scheduler.step()
        if retraction:
            # Learning rate retraction
            retractionScheduler.update_averages(train_loss.result())
            retractionScheduler.step()

    evaluate_model(data='test')
    sys.stdout.write(f"\n Finished epoch {epoch}/{nepochs}: Train Loss {train_loss.result()} | Test Loss {test_loss.result()} | Train Acc {train_accuracy.result()} | Test Acc {test_accuracy.result()}\n")

    train_losses.append(train_loss.result())
    train_accuracies.append(train_accuracy.result())
    test_losses.append(test_loss.result())
    test_accuracies.append(test_accuracy.result())


    elapsed_time = time.time()-start
    sys.stdout.write(f"Time elapsed for the current epoch {elapsed_time}")
    epochs_times.append(elapsed_time)



Epoch 0/10
Evaluation of train data:
Evaluation of test data:

 Finished epoch 0/10: Train Loss 2.3027064724731447 | Test Loss 2.302706488800049 | Train Acc 0.1 | Test Acc 0.1
Time elapsed for the current epoch 160.03240823745728

Epoch 1/10
Training:
Evaluation of test data:

 Finished epoch 1/10: Train Loss 1.314090299320221 | Test Loss 1.117825666809082 | Train Acc 0.5187 | Test Acc 0.5981
Time elapsed for the current epoch 613.5114524364471

Epoch 2/10
Training:
Evaluation of test data:

 Finished epoch 2/10: Train Loss 0.8299031141281128 | Test Loss 0.8585513844490051 | Train Acc 0.71022 | Test Acc 0.7144
Time elapsed for the current epoch 616.481103181839

Epoch 3/10
Training:
 3200/50000: [==>.............................] - ETA 619.3s

In [None]:
current_dict = {'epochs': nepochs, 'train_losses': train_losses, 'test_losses': test_losses,
            'train_accuracies': train_accuracies, 'test_accuracies': test_accuracies, 'elapsed_time': elapsed_time}
different_norms_dict_other.update({ord: current_dict})

In [None]:
# save everything onto file
if save_stats: 
    output_folder = os.path.join(os.getcwd(), 'results')  # set the folder
    os.makedirs(output_folder, exist_ok=True)
    fname = output_folder + '/stats_dict_different_norms_other_list_' + dataset_name + '_' + model_type + '.pkl'
    with open(fname, 'wb') as handle:
        pickle.dump(different_norms_dict_other, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
different_norms_dict_other

{0: {'elapsed_time': 606.9135293960571,
  'epochs': 10,
  'test_accuracies': [0.1,
   0.6934,
   0.7447,
   0.8048,
   0.8194,
   0.8175,
   0.7918,
   0.828,
   0.8265,
   0.8203,
   0.8568],
  'test_losses': [2.3026767009735107,
   0.8827920928955079,
   0.73717989153862,
   0.5900344198226929,
   0.5351551820516587,
   0.5244382320404053,
   0.6094213529586792,
   0.5032419197082519,
   0.5103842622995377,
   0.5290620605945587,
   0.42982167949676514],
  'train_accuracies': [0.1,
   0.55534,
   0.73572,
   0.79454,
   0.8255,
   0.84164,
   0.8514,
   0.86056,
   0.86748,
   0.87194,
   0.87668],
  'train_losses': [2.302676717681885,
   1.2216925020980836,
   0.7586758876609803,
   0.601818448047638,
   0.5193995813941955,
   0.46962812870025633,
   0.4394497441673279,
   0.41368210065841676,
   0.3916200085258484,
   0.3789891492748261,
   0.3645921782207489]}}