In [1]:
import torch
import numpy as np
torch.cuda.is_available()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
import sys
print(sys.version)

3.8.0 (default, Nov  6 2019, 16:00:02) [MSC v.1916 64 bit (AMD64)]


In [3]:
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import datetime
import os
import torchvision
import time
import copy
# from torch.utils.tensorboard import SummaryWriter

# from torchsummary import summary

from tqdm import tqdm

In [4]:
torch.__version__

'1.12.1+cu116'

In [5]:
# misc functions (https://github.com/choasma/HSIC-bottleneck/blob/master/source/hsicbt/utils/misc.py)

def to_categorical(y, num_classes):
    """ 1-hot encodes a tensor """
    return torch.squeeze(torch.eye(num_classes)[y])

def get_layer_parameters(model, idx_range):

    param_out = []
    param_out_name = []
    for it, (name, param) in enumerate(model.named_parameters()):
        if it in idx_range:
            param_out.append(param)
            param_out_name.append(name)

    return param_out, param_out_name

# https://github.com/choasma/HSIC-bottleneck/blob/master/source/hsicbt/utils/meter.py
class AverageMeter(object):
    """Basic meter"""
    def __init__(self):
        self.reset()

    def reset(self):
        """ reset meter
        """
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        """ incremental meter
        """
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def get_accuracy_hsic(model, dataloader):
    """ Computes the precision@k for the specified values of k
        https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    output_list = []
    target_list = []
    for batch_idx, (data, target) in enumerate(dataloader):
        output, hiddens = model(data.to(next(model.parameters()).device))
        output = output.cpu().detach().numpy()
        target = target.cpu().detach().numpy().reshape(-1,1)
        output_list.append(output)
        target_list.append(target)
    output_arr = np.vstack(output_list)
    target_arr = np.vstack(target_list)
    avg_acc = 0
    reorder_list = []
    for i in range(10):
        indices = np.where(target_arr==i)[0]
        select_item = output_arr[indices]
        out = np.array([np.argmax(vec) for vec in select_item])
        y = np.mean(select_item, axis=0)
        while np.argmax(y) in reorder_list:
            y[np.argmax(y)] = 0
        reorder_list.append(np.argmax(y))
        num_correct = np.where(out==np.argmax(y))[0]
        accuracy = float(num_correct.shape[0])/float(out.shape[0])
        avg_acc += accuracy
    avg_acc /= 10.

    return avg_acc*100., reorder_list


def get_accuracy(output, target, topk=(1,)):
    """ Computes the precision@k for the specified values of k
        https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    maxk = max(topk)
    batch_size = target.size(0)
    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))
    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

def get_accuracy_epoch(model, dataloader):
    """ Computes the precision@k for the specified values of k
        https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    output_list = []
    target_list = []
    acc = []
    loss = []
    cross_entropy_loss = torch.nn.CrossEntropyLoss()
    model = model.to('cuda')
    device = next(model.parameters()).device

    for batch_idx, (data, target) in enumerate(dataloader):
        data = data.to(device)
        target = target.to(device)
        output, hiddens = model(data)
        loss.append(cross_entropy_loss(output, target).cpu().detach().numpy())
        acc.append(get_accuracy(output, target)[0].cpu().detach().numpy())
    return np.mean(acc), np.mean(loss)


In [6]:
# Implement Gausian kernel function to calculate K_X and K_y
# gausian kernel, k(x, y) ~ exp(-(1/2)*||x - y||^2/sigma**2 )

def distmat(X):
    """ distance matrix
        Euclidean Distance Matrix (EDM)
        D = abs(a^2 + b^2 - 2ab_T)
    """
    r = torch.sum(X*X, 1)
    r = r.view([-1, 1])
    a = torch.mm(X, torch.transpose(X,0,1))
    D = r.expand_as(a) - 2*a +  torch.transpose(r,0,1).expand_as(a)
    D = torch.abs(D)

    return D

def kernelmat(X, sigma):
    """
    Kernel function

    m: training batch size
    H: centering matrix:: I_m - (1/m)*1_m.1_m
    gausian kernel: k(x, y) ~ exp(-(1/2)*||x - y||^2/sigma**2)
    """
    m = int(X.size()[0]) # batch size
    H = torch.eye(m) - (1./m) * torch.ones([m,m])

    Dxx = distmat(X)

    variance = 2.*sigma*sigma*X.size()[1]            
    Kx = torch.exp(-Dxx / variance).type(torch.FloatTensor)   # kernel
    Kxc = torch.mm(Kx, H) # kernel function centered with H

    return Kxc


def hsic_base(x, y, sigma=None, use_cuda=True):
    """
    Implement equation 3 in the paper
    HSIC: (m - 1)^-2 . trace(Kx H Ky H)
    """
    m = int(x.size()[0]) # batch size

    KxH = kernelmat(x, sigma=sigma)
    KyH = kernelmat(y, sigma=sigma)

    return torch.trace(KxH @ KyH)/(m - 1)**2

# taken from HSIC implementation 
# https://github.com/choasma/HSIC-bottleneck/blob/9f1fe2447592d61c0ba524aad0ff0820ae2ba9cb/source/hsicbt/core/train_misc.py#L26
# def hsic_objective(hidden, h_target, h_data, sigma):

#     hsic_hy_val = hsic_base( hidden, h_target, sigma=sigma)
#     hsic_hx_val = hsic_base( hidden, h_data,   sigma=sigma)

#     return hsic_hx_val, hsic_hy_val

def hsic_loss_obj(hidden, h_target, h_data, sigma):
    """
    calculate hsic between input (X) and hidden layer weights
    calculate hsic between hidden layer weights and target (Y)

    return: hx, hy for calculating loss in training pipeline
    """
    hsic_hx = hsic_base(hidden, h_data, sigma=sigma)
    hsic_hy = hsic_base(hidden, h_target, sigma=sigma)

    return hsic_hx, hsic_hy

In [7]:
from torchvision.datasets import CIFAR10, MNIST
from torch.utils.data import DataLoader

In [8]:
# prepare data loader for CIFAR10 and MNIST

train_transform = transforms.Compose([transforms.ToTensor()]) # , transforms.Resize(size=(227, 227))
valid_transform = train_transform

train_set = CIFAR10('./data/cifar10', train=True,
                  download=True, transform=train_transform)
valid_set = CIFAR10('./data/cifar10', train=False,
                  download=True, transform=valid_transform)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)
val_loader = torch.utils.data.DataLoader(valid_set, batch_size=128, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
# create primitive conv block with conv2d, bn, and activation.

def get_activation(atype):

    if atype=='relu':
        nonlinear = nn.ReLU()
    elif atype=='tanh':
        nonlinear = nn.Tanh() 
    elif atype=='sigmoid':
        nonlinear = nn.Sigmoid() 
    elif atype=='elu':
        nonlinear = nn.ELU()

    return nonlinear

def makeblock_conv(in_chs, out_chs, atype, stride=1):

    layer = nn.Conv2d(in_channels=in_chs, 
        out_channels=out_chs, kernel_size=5, stride=stride)
    bn = nn.BatchNorm2d(out_chs, affine=False)
    nonlinear = get_activation(atype)

    return nn.Sequential(*[layer, bn, nonlinear])

def makeblock_dense(in_dim, out_dim, atype):
    
    layer = nn.Linear(in_dim, out_dim)
    bn = nn.BatchNorm1d(out_dim, affine=False)
    nonlinear = get_activation(atype)
    out = nn.Sequential(*[layer, bn, nonlinear])
    
    return out

In [10]:
class ModelConv(nn.Module):

    def __init__(self, in_width=784, hidden_width=64, n_layers=5, atype='relu', 
        last_hidden_width=None, data_code='cifar10', **kwargs):
        super(ModelConv, self).__init__()
    
        block_list = []
        is_conv = False

        if data_code == 'cifar10':
            in_ch = 3
            last_hidden_width = 10
        elif data_code == 'mnist':
            in_ch = 1

        last_hw = hidden_width
        if last_hidden_width:
            last_hw = last_hidden_width
        
        for i in range(n_layers):
            block = makeblock_conv(hidden_width, hidden_width, atype)
            block_list.append(block)

        self.input_layer    = makeblock_conv(in_ch, hidden_width, atype)
        self.sequence_layer = nn.Sequential(*block_list)
        if data_code == 'mnist':
            dim = 512
        elif data_code == 'cifar10':
            dim = 8192

        self.output_layer = makeblock_dense(dim, last_hw, atype)

        self.is_conv = is_conv
        self.in_width = in_width

    def forward(self, x):

        output_list = []
        
        x = self.input_layer(x)
        output_list.append(x)
        
        for block in self.sequence_layer:
            x = block(x.clone())
            output_list.append(x)
            
        x = x.view(-1, np.prod(x.size()[1:]))

        # print(x.size())

        x = self.output_layer(x)
        output_list.append(x)

        return x, output_list

In [11]:
# device = 'cuda'
# model = ModelConv()
# model.to(device)

In [12]:
def hsic_train(cepoch, model, data_loader, config_dict):

    # cross_entropy_loss = torch.nn.CrossEntropyLoss()
    prec1 = total_loss = hx_l = hy_l = -1

    batch_acc    = AverageMeter()
    batch_loss   = AverageMeter()
    batch_hischx = AverageMeter()
    batch_hischy = AverageMeter()

    batch_log = {}
    batch_log['batch_acc'] = []
    batch_log['batch_loss'] = []
    batch_log['batch_hsic_hx'] = []
    batch_log['batch_hsic_hy'] = []

    model = model.to(config_dict['device'])

    n_data = config_dict['batch_size'] * len(data_loader)

    # sigma_optimizer = optim.SGD([sigma_tensor], lr=1E-5)

    # for batch_idx, (data, target) in enumerate(data_loader):
    pbar = tqdm(enumerate(data_loader), total=n_data/config_dict['batch_size'], ncols=120)
    for batch_idx, (data, target) in pbar:

        # if os.environ.get('HSICBT_DEBUG')=='4':
        #     if batch_idx > 5:
        #         break
                
        data   = data.to(config_dict['device'])
        target = target.to(config_dict['device'])
        output, hiddens = model(data)

        h_target = target.view(-1,1)
        h_target = to_categorical(h_target, num_classes=10).float()
        h_data = data.view(-1, np.prod(data.size()[1:]))

        idx_range = []
        it = 0

        # So the batchnorm is not learnable, making only @,b at layer
        for i in range(len(hiddens)):
            idx_range.append(np.arange(it, it+2).tolist())
            it += 2
    
        for i in range(len(hiddens)):
            
            output, hiddens = model(data)
            params, param_names = get_layer_parameters(model=model, idx_range=idx_range[i]) # so we only optimize one layer at a time
            optimizer = optim.SGD(params, lr = config_dict['learning_rate'], momentum=.9, weight_decay=0.001)
            optimizer.zero_grad()
            
            if len(hiddens[i].size()) > 2:
                hiddens[i] = hiddens[i].view(-1, np.prod(hiddens[i].size()[1:]))

            hx_l, hy_l = hsic_loss_obj(
                    hiddens[i],
                    h_target=h_target.float(),
                    h_data=h_data,
                    sigma=config_dict['sigma'],
            )
            #print(torch.max(hiddens[i]).cpu().detach().numpy(), torch.min(hiddens[i]).cpu().detach().numpy(), torch.std(hiddens[i]).cpu().detach().numpy())
            loss = (hx_l - config_dict['lambda_y']*hy_l)
            if i == 0:
                loss.backward()
            else:
                loss.backward()
            optimizer.step()
            # sigma_optimizer.step()
        # if config_dict['hsic_solve']:
        # prec1, reorder_list = get_accuracy_hsic(model, data_loader)
        batch_acc.update(prec1)
        batch_loss.update(total_loss)
        batch_hischx.update(hx_l.cpu().detach().numpy())
        batch_hischy.update(hy_l.cpu().detach().numpy())

        # print('H_hx:{H_hx:.8f} H_hy:{H_hy:.8f}'.format(H_hx = hx_l, H_hy = hy_l))

        # # # preparation log information and print progress # # #

        msg = 'Train Epoch: {cepoch} [ {cidx:5d}/{tolidx:5d} ({perc:2d}%)] H_hx:{H_hx:.8f} H_hy:{H_hy:.8f}'.format(
                        cepoch = cepoch,  
                        cidx = (batch_idx+1)*config_dict['batch_size'], 
                        tolidx = n_data,
                        perc = int(100. * (batch_idx+1)*config_dict['batch_size']/n_data), 
                        H_hx = batch_hischx.avg, 
                        H_hy = batch_hischy.avg,
                )

        if ((batch_idx+1) % config_dict['log_batch_interval'] == 0):

            batch_log['batch_acc'].append(batch_acc.avg)
            batch_log['batch_loss'].append(batch_loss.avg)
            batch_log['batch_hsic_hx'].append(batch_hischx.avg)
            batch_log['batch_hsic_hy'].append(batch_hischy.avg)

        pbar.set_description(msg)

    return batch_log, model

In [13]:
# train_transform = transforms.Compose([transforms.ToTensor()]) # , transforms.Resize(size=(227, 227))
# valid_transform = train_transform

# train_set = MNIST('./data/mnist', train=True,
#                   download=True, transform=train_transform)
# valid_set = MNIST('./data/mnist', train=False,
#                   download=True, transform=valid_transform)

# train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)
# val_loader = torch.utils.data.DataLoader(valid_set, batch_size=128, shuffle=False)

In [14]:
def load_mnist(dataFolderPath='./data/mnist', train=True, download=True, batchSize=64):
    
    train_transform = transforms.Compose([transforms.ToTensor()]) # , transforms.Resize(size=(227, 227))
    valid_transform = train_transform

    train_set = MNIST(dataFolderPath, train=train,
                  download=download, transform=train_transform)
    valid_set = MNIST(dataFolderPath, train=False,
                  download=True, transform=valid_transform)

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batchSize, shuffle=True)
    val_loader = torch.utils.data.DataLoader(valid_set, batch_size=batchSize, shuffle=False)    

    return train_loader, val_loader


def load_cifar10(dataFolderPath='./data/cifar10', train=True, download=True, batchSize=64):
    
    train_transform = transforms.Compose([transforms.ToTensor()]) # , transforms.Resize(size=(227, 227))
    valid_transform = train_transform

    train_set = MNIST(dataFolderPath, train=train,
                  download=download, transform=train_transform)
    valid_set = MNIST(dataFolderPath, train=False,
                  download=True, transform=valid_transform)

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batchSize, shuffle=True)
    val_loader = torch.utils.data.DataLoader(valid_set, batch_size=batchSize, shuffle=False)    

    return train_loader, val_loader

def get_data(data_name, batch_size):

    if data_name=='cifar10':
        dataPath = './data/cifar10'
        train_loader, test_loader=load_cifar10(dataPath, batchSize=batch_size)

    elif data_name=='mnist':
        dataPath = './data/mnist'
        train_loader, test_loader=load_mnist(dataPath, batchSize=batch_size)

    return train_loader, test_loader

In [14]:
config_dict = {}
config_dict['batch_size'] = 128
config_dict['learning_rate'] = 0.001
config_dict['lambda_y'] = 50.
config_dict['sigma'] = 2.
config_dict['task'] = 'hsic-train'
config_dict['device'] = 'cuda'
config_dict['log_batch_interval'] = 10

# # # data prepreation
# train_loader, test_loader = get_dataset_from_code('mnist', 128)

# # # simple fully-connected model
model = ModelConv(hidden_width=32,
                    n_layers=5,
                    atype='relu',
                    last_hidden_width=None,
                    data_code='mnist')

# # # start to train
epochs = 5
for cepoch in range(epochs):
    # you can also re-write hsic_train function
    batch_log, model = hsic_train(cepoch, model, train_loader, config_dict)
    # print(get_accuracy_epoch(model, train_loader))



In [20]:
from color import print_emph, print_highlight
from utils import misc
from utils import path
import datetime
# TTYPE_STANDARD = 'backprop'
TTYPE_HSICTRAIN = 'hsictrain'
# TTYPE_FORMAT = 'format'
# TTYPE_UNFORMAT = 'unformat'

# 1. train the HSIC model with last hidden demensions != 10 and save the model.T_destination
## 2. Create ensemble model with hsic model + linear model with softmax and train for 10 epochs
## without backprop (only update last layer params by SGD optim.)

def training_hsic(config_dict):

    print_emph("HSIC-Bottleneck training")
    code_name = [config_dict['task'], TTYPE_HSICTRAIN, config_dict['data_code'], config_dict['exp_index']]

    train_loader, test_loader = get_data(
        config_dict['data_code'], config_dict['batch_size'])
    #torch.manual_seed(config_dict['seed'])
    # model = model_distribution(config_dict)

    model = ModelConv(**config_dict)
    nepoch = config_dict['epochs']
    epoch_range = range(1, nepoch+1)

    batch_log_list = []
    epoch_log_dict = {}
    epoch_log_dict['train_acc'] = []
    epoch_log_dict['test_acc'] = []

    for cepoch in epoch_range:

        log = hsic_train(cepoch, model, train_loader, config_dict)

        batch_log_list.append(log)

        # save with each indexed
        filename = os.path.join(config_dict['data_code'], datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

        filename = "{}---{:04d}.pt".format(filename, cepoch)
        save_model(model, )

        log_dict = {}
        log_dict['batch_log_list'] = batch_log_list
        log_dict['epoch_log_dict'] = epoch_log_dict
        log_dict['config_dict'] = config_dict
        save_logs(log_dict, get_log_filepath(*code_name))

    return batch_log_list, epoch_log_dict

NameError: name 'strftime' is not defined

In [None]:
print(get_accuracy_epoch(model, train_loader))

In [25]:
prec1, reorder_list = get_accuracy_hsic(model, val_loader)

KeyboardInterrupt: 

In [None]:
def get_accuracy(output, target, topk=(1,)):
    """ Computes the precision@k for the specified values of k
        https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    maxk = max(topk)
    batch_size = target.size(0)
    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))
    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

def get_accuracy_epoch(model, dataloader):
    """ Computes the precision@k for the specified values of k
        https://github.com/pytorch/examples/blob/master/imagenet/main.py
    """
    output_list = []
    target_list = []
    acc = []
    loss = []
    cross_entropy_loss = torch.nn.CrossEntropyLoss()
    model = model.to('cuda')
    device = next(model.parameters()).device

    for batch_idx, (data, target) in enumerate(dataloader):
        data = data.to(device)
        target = target.to(device)
        output, hiddens = model(data)
        loss.append(cross_entropy_loss(output, target).cpu().detach().numpy())
        acc.append(get_accuracy(output, target)[0].cpu().detach().numpy())
    return np.mean(acc), np.mean(loss)

In [16]:
print(get_accuracy_epoch(model, train_loader))

(10.237372, 2.476208)


In [21]:
config_dict = {}
config_dict['batch_size'] = 32
config_dict['learning_rate'] = 0.001
config_dict['lambda_y'] = 100
config_dict['sigma'] = 2
config_dict['task'] = 'hsic-train'
config_dict['device'] = 'cuda'
config_dict['log_batch_interval'] = 10

# # # data prepreation
# train_loader, test_loader = get_dataset_from_code('mnist', 128)

# # # simple fully-connected model
model = ModelConv(hidden_width=16,
                    n_layers=2,
                    atype='relu',
                    last_hidden_width=10,
                    data_code='cifar10')

# # # start to train
epochs = 0
# for cepoch in range(epochs):
#     # you can also re-write hsic_train function
#     hsic_train(cepoch, model, train_loader, config_dict)

data_loader = train_loader

# def hsic_train(cepoch, model, data_loader, config_dict):

# cross_entropy_loss = torch.nn.CrossEntropyLoss()
prec1 = total_loss = hx_l = hy_l = -1

batch_acc    = AverageMeter()
batch_loss   = AverageMeter()
batch_hischx = AverageMeter()
batch_hischy = AverageMeter()

batch_log = {}
batch_log['batch_acc'] = []
batch_log['batch_loss'] = []
batch_log['batch_hsic_hx'] = []
batch_log['batch_hsic_hy'] = []

model = model.to(config_dict['device'])

n_data = config_dict['batch_size'] * len(data_loader)

# sigma_optimizer = optim.SGD([sigma_tensor], lr=1E-5)

# for batch_idx, (data, target) in enumerate(data_loader):
pbar = tqdm(enumerate(data_loader), total=n_data/config_dict['batch_size'], ncols=120)
for batch_idx, (data, target) in pbar:

    # if os.environ.get('HSICBT_DEBUG')=='4':
    #     if batch_idx > 5:
    #         break
            
    data   = data.to(config_dict['device'])
    target = target.to(config_dict['device'])
    output, hiddens = model(data)

    h_target = target.view(-1,1)
    h_target = to_categorical(h_target, num_classes=10).float()
    h_data = data.view(-1, np.prod(data.size()[1:]))

    idx_range = []
    it = 0

    # So the batchnorm is not learnable, making only @,b at layer
    for i in range(len(hiddens)):
        idx_range.append(np.arange(it, it+2).tolist())
        it += 2


    break

    # for i in range(len(hiddens)):
        
    #     # output, hiddens = model(data)
    #     params, param_names = get_layer_parameters(model=model, idx_range=idx_range[i]) # so we only optimize one layer at a time
    #     optimizer = optim.SGD(params, lr = config_dict['learning_rate'], momentum=.9, weight_decay=0.001)
    #     optimizer.zero_grad()
        
    #     if len(hiddens[i].size()) > 2:
    #         hiddens[i] = hiddens[i].view(-1, np.prod(hiddens[i].size()[1:]))

    #     hx_l, hy_l = hsic_loss_obj(
    #             hiddens[i],
    #             h_target=h_target.float(),
    #             h_data=h_data,
    #             sigma=config_dict['sigma'],
    #     )
    #     #print(torch.max(hiddens[i]).cpu().detach().numpy(), torch.min(hiddens[i]).cpu().detach().numpy(), torch.std(hiddens[i]).cpu().detach().numpy())
    #     loss = hx_l - config_dict['lambda_y']*hy_l
    #     if i == 0:
    #         loss.backward(retain_graph=True)
    #     else:
    #         loss.backward()
    #     optimizer.step()
    #     # sigma_optimizer.step()
    # # if config_dict['hsic_solve']:
    # #     prec1, reorder_list = misc.get_accuracy_hsic(model, data_loader)
    # batch_acc.update(prec1)
    # batch_loss.update(total_loss)
    # batch_hischx.update(hx_l.cpu().detach().numpy())
    # batch_hischy.update(hy_l.cpu().detach().numpy())

    # # # # preparation log information and print progress # # #

    # msg = 'Train Epoch: {cepoch} [ {cidx:5d}/{tolidx:5d} ({perc:2d}%)] H_hx:{H_hx:.4f} H_hy:{H_hy:.4f}'.format(
    #                 cepoch = cepoch,  
    #                 cidx = (batch_idx+1)*config_dict['batch_size'], 
    #                 tolidx = n_data,
    #                 perc = int(100. * (batch_idx+1)*config_dict['batch_size']/n_data), 
    #                 H_hx = batch_hischx.avg, 
    #                 H_hy = batch_hischy.avg,
    #         )

    # if ((batch_idx+1) % config_dict['log_batch_interval'] == 0):

    #     batch_log['batch_acc'].append(batch_loss.avg)
    #     batch_log['batch_loss'].append(batch_acc.avg)
    #     batch_log['batch_hsic_hx'].append(batch_hischx.avg)
    #     batch_log['batch_hsic_hy'].append(batch_hischy.avg)

    # pbar.set_description(msg)

    # return batch_log

  0%|                                                                                         | 0/391.0 [00:00<?, ?it/s]

torch.Size([128, 6400])





In [22]:
params, param_names = get_layer_parameters(model=model, idx_range=idx_range[2])

In [26]:
len(params)

2

In [25]:
hiddens[3].shape

torch.Size([128, 10])

In [26]:
for i in range(len(hiddens)):
    print(i)

0
1
2
3


In [None]:
# create neural network model

class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        
        self.layer1 = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 5,
                                stride = 2, padding = 2)
        self.layer2 = nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 5, 
                               stride = 2, padding = 2)
        self.layer3 = nn.Conv2d(in_channels = 128, out_channels = 256, kernel_size = 5, 
                               stride = 2, padding = 2)
        self.mxpool = nn.MaxPool2d(2, 2)
        self.mxpool1 = nn.MaxPool2d(2, 2, padding = 1)

        self.linear1 = nn.Linear(4096, 1024)
        self.linear2 = nn.Linear(1024, 1024)
        self.linear3 = nn.Linear(1024, 100)
        
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = self.mxpool(x)
        x = F.relu(self.layer2(x))
        x = self.mxpool1(x)
        x = F.relu(self.layer3(x))
        x = self.mxpool(x)
        
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x) # using nn.CrossEntropy() that combines nn.LogSoftmax() and nn.NLLLoss() in one single class
        return x


In [None]:
# create training pipeline