In [1]:
import os
import optuna
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms

In [2]:
DEVICE = torch.device("cpu")
BATCHSIZE = 128
CLASSES = 10     #10分类
DIR = os.getcwd()#'/Users/zhongshannan/Documents/Finetune/'
EPOCHS = 10
LOG_INTERVAL = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 30  #128*30
N_VALID_EXAMPLES = BATCHSIZE * 10  #128*10

In [3]:
def define_model(trial):
    # 优化layers层数量、每一层的隐藏单元和dropout率
    n_layers = trial.suggest_int("n_layers", 1, 3) #设置层数为1-3
    layers = []

    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128)#隐藏单元4-128
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        # dropout_ratio越大舍弃的信息越多，loss下降的越慢，准确率增加的越慢
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)#dropout_ratio从0.2-0.5
        layers.append(nn.Dropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

In [4]:
def get_mnist():
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    return train_loader, valid_loader

In [5]:
def objective(trial):

    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    #生成优化器 ["Adam", "RMSprop", "SGD"]
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    #设置学习率 1e-5到1e-1
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_mnist()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:#batch_idx*128>30*128
                break

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
            
            optimizer.zero_grad()#清空过往梯度，设为0
            output = model(data)
            loss = F.nll_loss(output, target)#计算损失
            loss.backward()#反向传播，计算当前梯度
            optimizer.step()#根据梯度更新网络参数

        # 评估模型
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES: #batch_idx*128>=30*128
                    break
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data)
                # Get the index of the max log-probability.
                #获得最大log概率的索引
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy, epoch)

        # 剪枝
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
    return accuracy

In [6]:
if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100, timeout=600)

    pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[32m[I 2022-03-25 12:15:29,270][0m A new study created in memory with name: no-name-18546704-e0af-4361-a8cd-46f87ce96699[0m
[32m[I 2022-03-25 12:15:32,350][0m Trial 0 finished with value: 0.75859375 and parameters: {'n_layers': 1, 'n_units_l0': 102, 'dropout_l0': 0.43718999380914475, 'optimizer': 'RMSprop', 'lr': 0.01147261821316032}. Best is trial 0 with value: 0.75859375.[0m
[32m[I 2022-03-25 12:15:35,454][0m Trial 1 finished with value: 0.76875 and parameters: {'n_layers': 1, 'n_units_l0': 106, 'dropout_l0': 0.283085437461691, 'optimizer': 'RMSprop', 'lr': 0.00027637306281355314}. Best is trial 1 with value: 0.76875.[0m
[32m[I 2022-03-25 12:15:38,553][0m Trial 2 finished with value: 0.7703125 and parameters: {'n_layers': 3, 'n_units_l0': 51, 'dropout_l0': 0.3554200290842655, 'n_units_l1': 82, 'dropout_l1': 0.37662123644914547, 'n_units_l2': 127, 'dropout_l2': 0.4889829187826453, 'optimizer': 'RMSprop', 'lr': 0.0008908392574833357}. Best is trial 2 with value: 0.7703125.[

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  61
  Number of complete trials:  39
Best trial:
  Value:  0.85234375
  Params: 
    n_layers: 1
    n_units_l0: 128
    dropout_l0: 0.4693805468746302
    optimizer: Adam
    lr: 0.005036196303833372


In [7]:
import optuna

# 这个冗长的更改只是为了简化笔记本的输出
optuna.logging.set_verbosity(optuna.logging.WARNING)

study = optuna.create_study(direction='maximize', pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=100)

In [8]:
optuna.visualization.plot_contour(study)

[33m[W 2022-03-25 12:20:36,493][0m Param dropout_l2 unique value length is less than 2.[0m
[33m[W 2022-03-25 12:20:36,557][0m Param n_units_l2 unique value length is less than 2.[0m
[33m[W 2022-03-25 12:20:36,592][0m Param dropout_l2 unique value length is less than 2.[0m
[33m[W 2022-03-25 12:20:36,626][0m Param n_units_l2 unique value length is less than 2.[0m
[33m[W 2022-03-25 12:20:36,642][0m Param dropout_l2 unique value length is less than 2.[0m
[33m[W 2022-03-25 12:20:36,646][0m Param dropout_l2 unique value length is less than 2.[0m
[33m[W 2022-03-25 12:20:36,655][0m Param dropout_l2 unique value length is less than 2.[0m
[33m[W 2022-03-25 12:20:36,662][0m Param dropout_l2 unique value length is less than 2.[0m
[33m[W 2022-03-25 12:20:36,667][0m Param dropout_l2 unique value length is less than 2.[0m
[33m[W 2022-03-25 12:20:36,673][0m Param dropout_l2 unique value length is less than 2.[0m
[33m[W 2022-03-25 12:20:36,678][0m Param dropout_l2 unique

In [9]:
optuna.visualization.plot_contour(study, params=['n_layers', 'lr'])

In [10]:
from optuna.visualization import plot_optimization_history
plot_optimization_history(study)

In [11]:
from optuna.visualization import plot_intermediate_values

plot_intermediate_values(study)

In [12]:
from optuna.visualization import plot_parallel_coordinate

plot_parallel_coordinate(study)

In [13]:
plot_parallel_coordinate(study, params=['lr', 'n_layers'])

In [14]:
from optuna.visualization import plot_slice
plot_slice(study)

In [15]:
plot_slice(study, params=['n_units_l0', 'n_units_l1'])

In [16]:
from optuna.visualization import plot_param_importances

plot_param_importances(study)