In [279]:
import torch
import torch.optim as optim
import torch.nn as nn

In [280]:
# CNN 모델 정의

class CNN(nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        # Conv
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU()
        )
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3),
            nn.ReLU()
        )
        self.pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3),
            nn.ReLU()
        )

        # Fully Connected
        self.fc1 = nn.Sequential(
            nn.Linear(64 * 4 * 4, 64),
            nn.Sigmoid()
        )
        self.fc_final = nn.Sequential(
            nn.Linear(64, 10),
            nn.Softmax()  # Classification Task 의 Output Layer 이므로 Softmax 고정
        )

    def forward(self, x):

        # Conv
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)

        x = x.view(-1, 64 * 4 * 4)

        # Fully Connected
        x = self.fc1(x)
        x = self.fc_final(x)

        return x

In [281]:
# 각 scheduler 의 Learning Rate chart 표시

import plotly.graph_objects as go

# args:
# - scheduler_name : name of scheduler to create
# - optimizer      : optimizer for the scheduler

def create_scheduler(scheduler_name, optimizer):

    if scheduler_name == 'multiplicative':
        return optim.lr_scheduler.MultiplicativeLR(optimizer=optimizer,
                                                   lr_lambda=lambda epoch: 0.95 ** epoch)

    elif scheduler_name == 'exponential':
        return optim.lr_scheduler.ExponentialLR(optimizer=optimizer,
                                                gamma=0.95)

    elif scheduler_name == 'step':
        return optim.lr_scheduler.StepLR(optimizer=optimizer,
                                         step_size=10,
                                         gamma=0.5)

    elif scheduler_name == 'multistep':
        return optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                              milestones=[15, 25, 30, 35, 45],
                                              gamma=0.5)

#    elif scheduler_name == 'reduce_lr_on_plateau':
#        return optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
#                                                    mode='min')  # min = Loss 가 더 이상 감소하지 않을 때

    elif scheduler_name == 'cosine_annealing':
        return optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer,
                                                    T_max=10,
                                                    eta_min=0)

    elif scheduler_name == 'cosine_annealing_warm_restarts':
        return optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer=optimizer,
                                                              T_0=5,
                                                              T_mult=2,
                                                              eta_min=1e-5)

    elif scheduler_name.startswith('cyclic_'):
        return optim.lr_scheduler.CyclicLR(optimizer=optimizer,
                                           base_lr=1e-5,
                                           step_size_up=5,
                                           max_lr=0.0016,
                                           gamma=0.92,  # for only 'exp_range' mode
                                           mode=scheduler_name[7:])

    elif scheduler_name == 'lambda':
        return optim.lr_scheduler.LambdaLR(optimizer=optimizer,
                                           lr_lambda=lambda epoch: 0.95 ** epoch)

    else:
        raise Exception(f'Error: wrong scheduler name {scheduler_name}')


# args :
# - scheduler_name : name of scheduler to plot learning rate chart
# - epochs         : the number of epoch count for x-axis

def plot_learning_rate_chart(scheduler_name, epochs=60):

    learning_rates = []
    epoch_nums = list(range(epochs))

    # create optimizer and scheduler
    model = CNN()
    optimizer = optim.AdamW(model.parameters(), lr=0.0016)
    scheduler = create_scheduler(scheduler_name, optimizer)

    # run scheduler to record learning rate values
    for epoch in epoch_nums:
        learning_rates.append(scheduler.get_last_lr()[0])
        optimizer.step()
        scheduler.step()

    # plot the chart
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=epoch_nums,
                            y=learning_rates,
                            mode='lines',
                            name=f'Learning Rate Trend'))

    fig.update_layout(width=800,
                      height=500,
                      title=f'Learning Rate Trend of {scheduler_name} L.R. scheduler',
                      xaxis_title='Epoch',
                      yaxis_title='Learning Rate')

    fig.update_xaxes(range=[0, epochs], dtick=5)
    fig.update_yaxes(range=[0.0, 0.0016], dtick=0.0002)

    fig.show()

In [282]:
plot_learning_rate_chart(scheduler_name='multiplicative')

In [283]:
plot_learning_rate_chart(scheduler_name='exponential')

In [284]:
plot_learning_rate_chart(scheduler_name='step')

In [285]:
plot_learning_rate_chart(scheduler_name='multistep')

In [286]:
# 실제로 학습 중 Loss 가 더 이상 감소하지 않을 때 Learning Rate 감소
# plot_learning_rate_chart(scheduler_name='reduce_lr_on_plateau')

In [287]:
plot_learning_rate_chart(scheduler_name='cosine_annealing')

In [288]:
plot_learning_rate_chart(scheduler_name='cosine_annealing_warm_restarts')

In [289]:
plot_learning_rate_chart(scheduler_name='cyclic_triangular')

In [290]:
plot_learning_rate_chart(scheduler_name='cyclic_triangular2')

In [291]:
plot_learning_rate_chart(scheduler_name='cyclic_exp_range')

In [292]:
plot_learning_rate_chart(scheduler_name='lambda')