In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from hw1_1 import *
from torch.optim import Adam,SGD
from torch.autograd import Variable
import datetime

In [3]:
model,trans = myResnet(finetune=True)
layer_names = []
for idx, (name, param) in enumerate(model.named_parameters()):
    layer_names.append(name)

layer_names.reverse()
lr      = 3e-5
lr_mult = 0.7

# placeholder
parameters      = []
prev_group_name = layer_names[2].split('.')[0]

# store params & learning rates
for idx, name in enumerate(layer_names):
    if idx in[0,1]:
        parameters += [{'params': [p for n, p in model.named_parameters() if n == name and p.requires_grad],
                    'lr':     0.001}]
        print(f'{idx}: lr = {lr:.6f}, {name}')
        
        continue
    # parameter group name
    cur_group_name = name.split('.')[0]
    
    # update learning rate
    if cur_group_name != prev_group_name:
        lr *= lr_mult
    prev_group_name = cur_group_name
    
    # display info
    print(f'{idx}: lr = {lr:.6f}, {name}')
    
    # append layer parameters
    parameters += [{'params': [p for n, p in model.named_parameters() if n == name and p.requires_grad],
                    'lr':     lr}]

0: lr = 0.000030, fc.bias
1: lr = 0.000030, fc.weight
2: lr = 0.000030, layer4.2.bn3.bias
3: lr = 0.000030, layer4.2.bn3.weight
4: lr = 0.000030, layer4.2.conv3.weight
5: lr = 0.000030, layer4.2.bn2.bias
6: lr = 0.000030, layer4.2.bn2.weight
7: lr = 0.000030, layer4.2.conv2.weight
8: lr = 0.000030, layer4.2.bn1.bias
9: lr = 0.000030, layer4.2.bn1.weight
10: lr = 0.000030, layer4.2.conv1.weight
11: lr = 0.000030, layer4.1.bn3.bias
12: lr = 0.000030, layer4.1.bn3.weight
13: lr = 0.000030, layer4.1.conv3.weight
14: lr = 0.000030, layer4.1.bn2.bias
15: lr = 0.000030, layer4.1.bn2.weight
16: lr = 0.000030, layer4.1.conv2.weight
17: lr = 0.000030, layer4.1.bn1.bias
18: lr = 0.000030, layer4.1.bn1.weight
19: lr = 0.000030, layer4.1.conv1.weight
20: lr = 0.000030, layer4.0.downsample.1.bias
21: lr = 0.000030, layer4.0.downsample.1.weight
22: lr = 0.000030, layer4.0.downsample.0.weight
23: lr = 0.000030, layer4.0.bn3.bias
24: lr = 0.000030, layer4.0.bn3.weight
25: lr = 0.000030, layer4.0.conv3.

In [4]:
train_loader = loadData('hw1_data/hw1_data/p1_data/train_50',trans,data_aug=True)
val_loader = loadData('hw1_data/hw1_data/p1_data/val_50',trans)



loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(parameters)

In [5]:
def saveModel():
    path = "./hw1_1_myResnet.pth"
    torch.save(model.state_dict(), path)

def valAccuracy():
    model.eval().cuda()
    accuracy = 0.0
    total = 0.0
    
    with torch.no_grad():
        for data in val_loader:
            images, labels = data
            outputs = model(images.cuda())
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            accuracy += (predicted == labels.cuda()).sum().item()
    
    accuracy = (100 * accuracy / total)
    return(accuracy)


def train(num_epochs):
    
    best_accuracy = 0.0

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("The model will be running on", device, "device")
    model.to(device)

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        start = datetime.datetime.now()
        for i, (images, labels) in enumerate(train_loader, 0):
            
            # get the inputs
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))

            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = model(images)
            # compute the loss based on model output and real labels
            loss = loss_fn(outputs, labels)
            # backpropagate the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()

        end = datetime.datetime.now()
        # Compute and print the average accuracy fo this epoch when tested over all test images
        accuracy = valAccuracy()
        print('For epoch', epoch+1,': test accuracy:{:.4f}%, loss:{:.4f}, time:{}'.format(accuracy,loss.item(),end-start))
        
        # we want to save the model if the accuracy is the best
        if accuracy > best_accuracy:
            saveModel()
            best_accuracy = accuracy

In [6]:
train(50)

The model will be running on cuda:0 device
For epoch 1 : test accuracy:81.8800%, loss:0.5380, time:0:04:51.513415
For epoch 2 : test accuracy:83.4800%, loss:0.3306, time:0:04:38.019422
For epoch 3 : test accuracy:83.4400%, loss:0.3063, time:0:04:38.436704
For epoch 4 : test accuracy:84.3600%, loss:0.0403, time:0:04:39.273481
For epoch 5 : test accuracy:83.9600%, loss:0.0002, time:0:04:38.219280
For epoch 6 : test accuracy:85.3600%, loss:0.0011, time:0:04:38.172458
For epoch 7 : test accuracy:84.6800%, loss:0.0126, time:0:04:38.251480
For epoch 8 : test accuracy:83.4400%, loss:0.0054, time:0:04:40.007428
For epoch 9 : test accuracy:85.0800%, loss:0.0075, time:0:04:39.761769
For epoch 10 : test accuracy:85.5200%, loss:0.0552, time:0:04:39.180326
For epoch 11 : test accuracy:84.0000%, loss:0.0011, time:0:04:38.352262
For epoch 12 : test accuracy:82.6400%, loss:0.0003, time:0:04:38.059576
For epoch 13 : test accuracy:84.4800%, loss:0.0001, time:0:04:39.468355
For epoch 14 : test accuracy:8