In [1]:
%reset
import os
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import random
import cv2
import models.layers
import addons.trees as trees
from models.vision import HTCNN, LeNet5

def loadData(data_path, data_file):
    output = []
    with open(data_file, 'r') as f:
        for ln in f:
            fields = ln.rstrip('\n').split(',')
            output.append([os.path.join(data_path,fields[0]), int(fields[1])])
    return output
            
def loadInBatch(ds, r = 0, batchsize = 16, shuffle=False):
    output_data = None
    aux_labels = []
    fine_labels = None
    i = 0
    ndata = len(ds)
    hasDone = False
    while i<batchsize:
        data_rec = ds[r][0]
        img_data = cv2.imread(data_rec)
        base_label = ds[r][1]
        data_blob = torch.tensor(img_data).float().permute(2,0,1)
        if output_data is None:
            output_data = torch.zeros(batchsize, img_data.shape[2], img_data.shape[0], img_data.shape[1], device=device)
        output_data[i, ...] = data_blob
        if aux_labels == []:
            j = 0
            for lv in lookup_lv_list:
                output_label = torch.zeros(batchsize, coarst_dims[j], device=device)
                aux_labels.append(output_label)
                j += 1
        if fine_labels is None:
            fine_labels = torch.zeros(batchsize, n_fine, device=device)
        j = 0
        for lv in lookup_lv_list:
            up_cls = lookupParent(classTree, base_label, lv)
            aux_labels[j].data[i, up_cls] = 1.0
            j += 1
        fine_labels.data[i, base_label] = 1.0
        r += 1
        if r >= ndata:
            r = 0
            hasDone = True
            if shuffle:
                random.shuffle(ds)
        i += 1
    return output_data, aux_labels, fine_labels, r, hasDone
        
def lookupParent(tree, fine_node, upper_lv=1):
    return tree[fine_node][upper_lv-1]

def accumulateList(list1, list2):
    output = []
    for i in range(len(list1)):
        output.append((list1[i] + list2[i]) * 0.5)
    return output

def computeBatchAccuracy(pred, expected):
    output = []
    n_output = len(pred)
    n_batch = pred[0].shape[0]
    for i in range(n_output):
        local_result = 0.0
        for j in range(n_batch):
            cls_pred = pred[i][j].argmax()
            cls_exp = expected[i][j,...].argmax()
            #print((cls_pred, cls_exp))
            if cls_pred == cls_exp:
                local_result += 1.0
        local_result /= n_batch
        output.append(local_result)
    return output

def computeAccuracy(dataset, model, batchsize = 1):
    data_count = len(dataset)
    ptr = 0
    batch_len = int(np.floor(float(data_count)/batchsize))
    output = []
    for i in range(batch_len):
        batch_data, expected_aux, expected_fine, ptr, _ = loadInBatch(dataset, ptr, batchsize)
        pred_final, pred_aux = model(batch_data)
        batch_result = computeBatchAccuracy([pred_final], [expected_fine])
        if output == []:
            output = batch_result
        else:
            for j in range(len(output)):
                output[j] += batch_result[j]
    if batchsize!=1:
        tmp_batchsize = data_count - ptr
        batch_data, expected_aux, expected_fine, ptr, _ = loadInBatch(dataset, ptr, tmp_batchsize)
        pred_final, pred_aux = model(batch_data)
        batch_result = computeBatchAccuracy([pred_final], [expected_fine])
        for j in range(len(output)):
            output[j] += batch_result[j]
        output[j] /= batch_len + 1
    else:
        output[j] /= data_count
    return output

def train(trainset, valset, label_file, output_path, output_fname, 
          start_lr=0.1, lr_discount=0.1, lr_steps=[], epoch=30,
          train_batch = 16, val_batch = 16, val_at = 10,
          checkpoint = None, jud_at = -1):
    
    best_v_result = 0.0
    model = HTCNN(label_file, with_aux = True, with_fc = True, backbone=backbone,
              isCuda=True).cuda()
    
    output_filepath = os.path.join(output_path, output_fname)

    if checkpoint is not None and os.path.isfile(checkpoint):
        model.load_state_dict(torch.load(checkpoint))
    
    backbone.eval()
    model.eval()
    with torch.no_grad():
        v_result = computeAccuracy(valset, model, val_batch)[jud_at]
        print('Validation Accuracy: %f'%v_result)
        best_v_result = v_result
    
    lr = start_lr
    optimizer = optim.SGD(model.parameters(), lr=lr)
    
    # create losses
    losses = []
    aux_loss_names = []
    final_loss = nn.MSELoss()
    for lv in lookup_lv_list:
        losses.append(nn.MSELoss())
        aux_loss_names.append('coarst loss %d'%lv)
    n_aux = len(losses)
    
    for i in range(epoch):
        # training phase
        backbone.train()
        model.train()
        ptr = 0
        hasFinishEpoch = False
        epoch_result = []
        epoch_aux_losses_v = []
        epoch_loss_v = 0
        iter_c = 0
        while not hasFinishEpoch:
            optimizer.zero_grad()
            
            batch_input, gt_aux, gt_final, ptr, hasFinishEpoch = loadInBatch(trainset, ptr, train_batch, shuffle=True)
            pred_final, pred_aux = model(batch_input)
            
            iloss = 0
            total_loss = final_loss(pred_final, gt_final)
            for i_aux in range(n_aux):
                aux_loss = losses[i_aux](pred_aux[i_aux], gt_aux[i_aux])
                total_loss += aux_loss
                aux_loss_v = aux_loss.item()
                if epoch_aux_losses_v == []:
                    epoch_aux_losses_v.append(aux_loss_v)
                else:
                    epoch_aux_losses_v[iloss] += aux_loss_v
                iloss += 1
                 
            # compute gradients
            total_loss.backward()
            
            # update weights
            optimizer.step()
            
            if iter_c == 0:
                epoch_loss_v = total_loss.item()
            else:
                epoch_loss_v += total_loss.item()
            
            if epoch_loss_v == 0:
                epoch_loss_v = total_loss
            
            result = computeBatchAccuracy([pred_final],[gt_final])
            if epoch_result == []:
                epoch_result = result
            else:
                epoch_result = accumulateList(epoch_result, result)
            iter_c += 1
        
        
        print('Training Loss:', end='')
        for iloss in range(n_aux):
            epoch_aux_losses_v[iloss] /= iter_c
            print('%s: %f, '%(aux_loss_names[iloss], epoch_aux_losses_v[iloss]), end='')
        epoch_loss_v /= iter_c
        print('Fine loss: %f'%epoch_loss_v)
        
        # validation phase
        if i % val_at == 0:
            print('Validating...')
            backbone.eval()
            model.eval()
            with torch.no_grad():
                v_result = computeAccuracy(valset, model, val_batch)[jud_at]
                print('Validation Accuracy: %f'%v_result)
                if v_result > best_v_result:
                    print('Best model found and saving it.')
                    torch.save(model.state_dict(), output_filepath)
                    best_v_result = v_result
        if i in lr_steps:
            olr = lr
            lr *= lr_discount
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
            print('learning rate has been discounted from %f to %f'%(olr, lr))
            
    print('Model has been trained.')
    model = None
    
def main():
    
    checkpoint_path = os.path.join(model_path, model_fname)
    
    train_set = loadData(ds_root_path, training_file)
    val_set = loadData(ds_root_path, val_file)
    print('Training set has been buffered.')
    train(train_set, val_set, label_filepath,
          output_path = model_path, output_fname = model_fname, 
          epoch=300, val_at=5, lr_steps=[100, 200],
         train_batch=128, val_batch=64, checkpoint=checkpoint_path)
    
    
    #final_y, aux_y = nn(x_)
    #print('--------Final Output-----------')
    #print(final_y)
    #print(final_y.argmax())
    #print(final_y)
    #print('--------Partial Output---------')
    #print(aux_y)
    #print(aux_y[-1].argmax())
    
    #nn.eval()
    #with torch.no_grad():
    #    y = nn(x_)
    #    print(y)
    
    backbone = None
    torch.cuda.empty_cache()
    print('Done')

if __name__ == '__main__':
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    label_filepath = '/datasets/vision/cifar100_clean/tree.txt'
    #label_filepath = '/datasets/dummy/set1/tree.txt'
    classTree, n_coarst, coarst_dims = trees.build_itree(label_filepath)
    lookup_lv_list = [i+1 for i in range(n_coarst)]
    n_fine = len(list(classTree.keys()))
    
    ds_root_path = '/datasets/vision/cifar100_clean'
    training_file = '/datasets/vision/cifar100_clean/train.txt'
    val_file = '/datasets/vision/cifar100_clean/val.txt'
    test_file = '/datasets/vision/cifar100_clean/val.txt'
    
    model_path = '/models/cifar100_htcnn_1'
    if not os.path.isdir(model_path):
        os.mkdir(model_path)
    model_fname = 'model.pth'
    
    backbone = LeNet5(n_classes=n_fine).cuda()
    
    main()

Once deleted, variables cannot be recovered. Proceed (y/[n])? y
Training set has been buffered.


AttributeError: 'str' object has no attribute 'copy'