In [2]:
from load_data import train_dl,val_dl
import torch
from torch import nn
from torch import optim
from torchvision.models.segmentation import deeplabv3_resnet101

Using downloaded and verified file: ./data/VOCtrainval_11-May-2012.tar
Using downloaded and verified file: ./data/VOCtrainval_11-May-2012.tar


In [3]:
# torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
# params (iterable) – iterable of parameters to optimize or dicts defining parameter groups
# lr (float, optional) – learning rate (default: 1e-3)
model = deeplabv3_resnet101(pretrained=False,num_classes=21)
loss_function = nn.CrossEntropyLoss(reduction='sum')
opt = optim.Adam(model.parameters(),lr=3e-2)

In [4]:
"""
output: A tensor of shape (batch_size, num_classes, height, width) containing predictions
target: A tensor of shape (batch_size, height, width) containing the ground truth
"""
def loss_batch(loss_func, output, target, opt=None):
    loss = loss_func(output, target) # loss is a tensor
    
    # All optimizers implement a step() method, that updates the parameters.
    if opt is not None:
        opt.zero_grad() # Sets the gradients of all optimized torch.Tensor s to zero.
        loss.backward() 
        opt.step() # Performs a single optimization step (parameter update)
        
    # loss.ietm() is a float type data(get the value/number of a tensor)
    return loss.item(), None

In [5]:
"""
torch.optim.lr_scheduler provides several methods to adjust the learning rate based on the number of epochs. 
torch.optim.lr_scheduler.ReduceLROnPlateau allows dynamic learning rate reducing based on some validation measurements.
"""
from torch.optim.lr_scheduler import ReduceLROnPlateau
lr_scheduler = ReduceLROnPlateau(opt, mode='min',factor=0.5, patience=20, verbose=1)

In [6]:
"""
opt.self():
 'defaults': self.defaults,
 'state': self.state,
 'param_groups': self.param_groups,
"""
def get_lr(opt):
    for param_group in opt.param_groups: # param_group is a dict and opt.param_groups is a list
        return param_group['lr']
    
current_lr = get_lr(opt)
print('current_lr={}'.format(current_lr))

current_lr=0.03


In [7]:
# test loss function
a = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = a(input, target)
print(output)
b = output.item()

tensor(1.7703, grad_fn=<NllLossBackward>)


In [8]:
def loss_epoch(model,loss_func,dataset_dl,opt=None):
    running_loss = 0.0
    len_data = len(dataset_dl.dataset) # dataset (Dataset): dataset from which to load the data.
    
    for xb,yb in dataset_dl:
        output = model(xb)['out']
        loss_b, metric_b = loss_batch(loss_func,output,yb,opt) # loss in a batch 
        running_loss = running_loss + loss_b # sum of iterable all batches(one epoch)
            
    loss = running_loss/float(len_data) # get the average loss over an epoch
    return loss, None
    # return the average loss value of a single epoch 

In [9]:
import copy
def train_val(model,params): 
    # we need to feed a dict(including all parameters) into the func
    num_epochs = params['num_epochs']
    loss_func = params['loss_func']
    opt = params['optimizer']
    train_dl = params['train_dl']
    val_dl = params['val_dl']
    lr_scheduler = params['lr_scheduler']
    path2weights = params['path2weights']
    
    loss_history = {
        'train': [],
        'val': []
    }
    # Define the best trained weights and loss
    # model.state_dict() is a dictionary containing a whole state of the module
    best_model_weights = copy.deepcopy(model.state_dict())
    best_loss = float('inf') # float('inf') represents the +infinity in Python
    
    for epoch in range(num_epochs):
        current_lr = get_lr(opt)
        print('Epoch {}/{}, current lr={}'.format(epoch,num_epochs-1,current_lr))
        
        model.train()
        train_loss, _ = loss_epoch(model,loss_func,train_dl,opt)
        loss_history['train'].append(train_loss) # record loss in every epoch
        
        model.eval()
        with torch.no_grad(): # no backward(weights don't change)
            val_loss, _ = loss_epoch(model,loss_func,val_dl)
        loss_history['val'].append(val_loss)
            
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_weights = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(),path2weights)
                
        lr_scheduler.step(val_loss)
        if current_lr != get_lr(opt):
            model.load_state_dict(best_model_weights)
        
        print('train loss: %.6f' %(train_loss))
        print('val loss: %.6f' %(val_loss))
        print('_'*20)
        
    model.load_state_dict(best_model_weights)
    return model, loss_history

In [53]:
import os
path2models = './models/'
if not os.path.exists(path2models):
    os.mkdir(path2models)

params_train = {
    'num_epochs': 100,
    'optimizer': opt,
    'loss_func': loss_function,
    'train_dl': train_dl,
    'val_dl': val_dl,
    'lr_scheduler': lr_scheduler,
    'path2weights': path2models + "weights.pt",
}

model, loss_record = train_val(model, params_train)

Epoch 0/99, current lr=0.03


KeyboardInterrupt: 