# Project : Designing and training with differentiable lossy sparsifier layer

Authors:
Aebel Joe Shibu(EE16B102) and
Ajinkya Ganeshrao Ambatwar(EE16B104)

Links to the Documents:
1. https://docs.google.com/document/d/15DdROczeJn9JMNlx0P7aMGeZnhPEqDoARaWEvgn4BLI/edit?ts=5e55107f

2. https://docs.google.com/document/d/1Y_yeR_qsn0Va8Kk5PM1O3up9M-Q003CyRYfGuocJQLA/edit?usp=drive_open&ouid=117769023112022294046

In [1]:
!pip install mlflow

Collecting mlflow
[?25l  Downloading https://files.pythonhosted.org/packages/9e/a7/40679fdb5ac44ad922902b560818682038be169f88c23ad719b9d1f82090/mlflow-1.8.0-py3-none-any.whl (10.4MB)
[K     |████████████████████████████████| 10.4MB 24kB/s 
Collecting databricks-cli>=0.8.7
[?25l  Downloading https://files.pythonhosted.org/packages/49/d1/fe0ba3d5c2b4b76ec035aa243bbc2fd0d60607a391f192ebe1656e17a4e2/databricks-cli-0.10.0.tar.gz (45kB)
[K     |████████████████████████████████| 51kB 8.0MB/s 
[?25hCollecting simplejson
[?25l  Downloading https://files.pythonhosted.org/packages/98/87/a7b98aa9256c8843f92878966dc3d8d914c14aad97e2c5ce4798d5743e07/simplejson-3.17.0.tar.gz (83kB)
[K     |████████████████████████████████| 92kB 10.1MB/s 
[?25hCollecting querystring-parser
  Downloading https://files.pythonhosted.org/packages/4a/fa/f54f5662e0eababf0c49e92fd94bf178888562c0e7b677c8941bbbcd1bd6/querystring_parser-1.2.4.tar.gz
Collecting sqlalchemy<=1.3.13
[?25l  Downloading https://files.pythonh

In [0]:
import mlflow
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.models as models
import torch.optim as optim
import torchvision, torch, random, copy, os, cv2, time
from torch.autograd import Function
from torchsummary import summary
from imutils import paths
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from tqdm.notebook import tqdm
import numpy as np
from collections import OrderedDict
import matplotlib.pyplot as mpt

In [0]:
#https://pytorch.org/tutorials/beginner/examples_nn/two_layer_net_module.html

class LossySparsifier(nn.Module):
    """
    Takes an activation map as input and outputs the sparsified map.
    The sparsified map has to be computed using differentiable functions.
    Has a learnable parameter which is used to threshold based on absolute values.
    """
    def __init__(self, eps = None, a = 1.0):
        """
        In the constructor we randomly instantiate the threshold value
        """
        super(LossySparsifier, self).__init__()
        # initialize eps as trainable parameter
        if eps == None:
            self.eps = nn.Parameter(torch.tensor(0.1))
        else:
            self.eps = nn.Parameter(torch.tensor(eps))
            
        self.eps.requires_grad = True
        self.a = a

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        e = torch.abs(self.eps)
        y = torch.sigmoid(self.a * (x - e)) * x
        z = torch.sigmoid(self.a * (-e - x)) * x
        return y + z

In [0]:
class SparseLoss(nn.Module):
    """
    Custom loss function which has the norm term
    """
    def __init__(self):
        super(SparseLoss,self).__init__()
        
    def forward(self, out, target, list_, memRegParam, lossObj, normPenalty):
        norm_loss = 0.
        sum_ = 0.
        for x in list_:
            norm_loss += memRegParam * normPenalty(x)
            sum_ += np.prod(x.shape)
        return lossObj(out, target) + norm_loss / sum_
        

In [0]:
sparse_layer_outputs = [] # clear this after each pass in training
sparse_layer_params = []

def storeSparseLayerOutput(model, input, output):
    sparse_layer_outputs.append(output)

def storeSparsificationParam(model, input, output):
    sparse_layer_params.append(model._modules['sparse'].eps)

layers_to_be_sparsified = (nn.modules.conv.Conv1d,
                            nn.modules.conv.Conv2d,
                            nn.modules.conv.Conv3d,
                            nn.modules.conv.ConvTranspose1d,
                            nn.modules.conv.ConvTranspose2d,
                            nn.modules.conv.ConvTranspose3d,
                            nn.modules.linear.Linear,
                            nn.modules.linear.Identity,
                            nn.modules.linear.Bilinear)

layers_to_be_recursed = (nn.modules.container.Sequential,
                         torchvision.models.resnet.BasicBlock,
                         torchvision.models.mobilenet.InvertedResidual,
                         torchvision.models.squeezenet.Fire,
                         torchvision.models.mnasnet._InvertedResidual
                         )

def addSparsifierLayers(model, eps_start):
    sparse_layers = OrderedDict()
    for var_name, module in model._modules.items():
        # each module is an OrderedDict
        if(isinstance(module, layers_to_be_sparsified)):
            model._modules[var_name] = nn.Sequential(OrderedDict([('0', module), ('sparse', LossySparsifier(eps_start))]))
            sparse_layers[var_name] = OrderedDict()
        elif(isinstance(module, layers_to_be_recursed)):
            sparse_layers[var_name] = addSparsifierLayers(module, eps_start)
    return sparse_layers

def registerHooks(model, dict_):
    for key, value in dict_.items():
        if(len(value) == 0):
            model._modules[key].register_forward_hook(storeSparseLayerOutput)
            model._modules[key].register_forward_hook(storeSparsificationParam)
        else:
            registerHooks(model._modules[key], value)
     
def sparsify(model, eps_start):
    sparse_layers = addSparsifierLayers(model, eps_start)
    registerHooks(model, sparse_layers)
    return sparse_layers


In [0]:
def weights_init(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight.data)
        if m.bias is not None:
          nn.init.zeros_(m.bias)

In [0]:
transform = transforms.Compose([transforms.ToPILImage(),
                                transforms.Resize((224, 224)),
                               transforms.RandomHorizontalFlip(),
                               transforms.RandomRotation(10),
                               transforms.ToTensor(),
                               transforms.Normalize((0.4914, 0.4822, 0.4465),(0.2023, 0.1994, 0.2010))])

In [9]:
trainset = torchvision.datasets.Caltech101(root = '/content/train', download = True)

Downloading http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz to /content/train/caltech101/101_ObjectCategories.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /content/train/caltech101/101_ObjectCategories.tar.gz to /content/train/caltech101
Downloading http://www.vision.caltech.edu/Image_Datasets/Caltech101/Annotations.tar to /content/train/caltech101/101_Annotations.tar


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /content/train/caltech101/101_Annotations.tar to /content/train/caltech101


In [0]:
image_paths = list(paths.list_images('/content/train/caltech101/101_ObjectCategories/'))
 
data = []
labels = []
for image_path in image_paths:
    label = image_path.split(os.path.sep)[-2]
    if label == 'BACKGROUND_Google':
        continue
 
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 
    data.append(image)
    labels.append(label)
 
data = np.array(data)
labels = np.array(labels)

In [11]:
# one hot encode
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
print(f"Total number of classes: {len(lb.classes_)}")

Total number of classes: 101


In [0]:
# divide the data into train, validation, and test set
(X, x_val , Y, y_val) = train_test_split(data, labels, 
                                          test_size=0.2,  
                                          stratify=labels,
                                          random_state=42)
 
(x_train, x_test, y_train, y_test) = train_test_split(X, Y, 
                                                    test_size=0.25, 
                                                    random_state=42)

In [0]:
# custom dataset
class ImageDataset(Dataset):
    def __init__(self, images, labels=None, transforms=None):
        self.X = images
        self.y = labels
        self.transforms = transforms
         
    def __len__(self):
        return (len(self.X))
    
    def __getitem__(self, i):
        data = self.X[i][:]
        
        if self.transforms:
            data = self.transforms(data)
            
        if self.y is not None:
            return (data, self.y[i])
        else:
            return data

In [0]:
train_data = ImageDataset(x_train, y_train, transform)
val_data = ImageDataset(x_val, y_val, transform)
test_data = ImageDataset(x_test, y_test, transform)

In [0]:
BATCH_SIZE = 32

In [0]:
# dataloaders
trainloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
valloader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
testloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

In [0]:
def L1_penalty(x):
    return torch.norm(x, p = 1)

def L2_penalty(x):
    return torch.norm(x, p = 2)

def L0_penalty(x):
    return torch.sum(x**2 / (x**2 + b))

def normalized_L1_penalty(x):
    return torch.norm(x, p = 1) / np.prod(x.shape)

def normalized_L2_penalty(x):
    return torch.norm(x, p = 2) / np.prod(x.shape)

def normalized_L0_penalty(x):
    return torch.sum(x**2 / (x**2 + b)) / np.prod(x.shape)

In [0]:
def updateSigmoidParam(model, dict_):
    for key, value in dict_.items():
        if(len(value) == 0):
            model._modules[key]._modules['sparse'].a *= 2.
        else:
            updateSigmoidParam(model._modules[key], value)

In [0]:
def train(trainloader, net, normRegParam, normPenalty):
    # print("training")
    running_loss = 0.0
    total_train = 0
    correct_train = 0
    for i, data in tqdm(enumerate(trainloader), total = int(len(train_data)/trainloader.batch_size)):
        inputs, labels = data[0], data[1];
        sparse_layer_outputs.clear()
        sparse_layer_params.clear()
        if torch.cuda.is_available():
            inputs = inputs.cuda()
            labels = labels.cuda()
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, torch.max(labels, 1)[1], sparse_layer_outputs, normRegParam, lossObj, normPenalty)
        #loss = criterion(outputs, torch.max(labels, 1)[1])
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted==torch.max(labels, 1)[1]).sum().item()

  
    train_accuracy = 100*correct_train/total_train
    print(f"Train Loss: {running_loss/(len(trainloader.dataset)):.4f}, Train Acc: {train_accuracy:.2f}")
    return [running_loss/(len(trainloader.dataset)), train_accuracy]

In [0]:
def val(valloader, net, normRegParam, normPenalty):
    correct = 0
    total = 0
    running_loss = 0.0    
    with torch.no_grad():
        for i,data in tqdm(enumerate(valloader), total = int(len(val_data)/valloader.batch_size)):
            images, labels = data[0], data[1]
            sparse_layer_outputs.clear()
            sparse_layer_params.clear()
            if torch.cuda.is_available():
                images, labels = images.cuda(), labels.cuda()
            outputs = net(images)
            loss = criterion(outputs, torch.max(labels, 1)[1], sparse_layer_outputs, normRegParam, lossObj, normPenalty)
            #loss = criterion(outputs, torch.max(labels, 1)[1])
            running_loss+= loss.item();
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == torch.max(labels, 1)[1]).sum().item()
    val_loss = running_loss/len(valloader.dataset)
    val_accuracy = 100*correct/total
    print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}')
    return val_loss, val_accuracy
    

In [0]:
def test(testloader, net):
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in tqdm(enumerate(testloader), total = int(len(test_data)/testloader.batch_size)):
            images, labels = data[0], data[1]
            sparse_layer_outputs.clear()
            sparse_layer_params.clear()
            if torch.cuda.is_available():
                images = images.cuda()
                labels = labels.cuda()
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == torch.max(labels, 1)[1]).sum().item()
            acc = (correct/total)*100
    print(f'Test accuracy: {acc:.2f}')
    return acc

In [0]:
def plotLoss(trainLoss, valLoss, name):
    mpt.figure()
    mpt.plot(trainLoss,label ="Train")
    mpt.plot(valLoss,label = "Validation")
    mpt.xlabel('Epoch')
    mpt.ylabel('Loss')
    mpt.title('Loss for ' + name)
    mpt.grid(True)
    mpt.legend()
    mpt.savefig('Loss_' + name + '.png')
    mpt.close()

In [0]:
def plotAcc(trainAcc, testAcc, name):
    mpt.figure()
    mpt.plot(trainAcc,label ="Train")
    mpt.plot(testAcc,label = "Test")
    mpt.xlabel('Epoch')
    mpt.ylabel('Accuracy')
    mpt.title('Accuracy for ' + name)
    mpt.grid(True)
    mpt.legend()
    mpt.savefig('Accuracy_' + name + '.png')
    mpt.close()

In [24]:
def plotEps(eps_list, name):
    n = len(sparse_layer_params)
    m = int(len(eps_list) / n)
    y = []
    for i in range(n):
        y = []
        for j in range(m):
            y.append(eps_list[j * n + i])
        mpt.figure()
        mpt.plot(y)
        mpt.xlabel('Epoch')
        mpt.ylabel('$\epsilon$ for layer ' + str(i + 1))
        mpt.title("Sparsity threshold for " + name)
        mpt.grid(True)
        mpt.savefig('eps'+str(i + 1)+'_' + name + '.png')
        mpt.close()

  mpt.ylabel('$\epsilon$ for layer ' + str(i + 1))


In [0]:
def addHardSparsifierLayers(model, dict_):
    for key, value in dict_.items():
        if(len(value) == 0):
            e = model._modules[key]._modules['sparse'].eps
            model._modules[key]._modules['sparse'] = LossyHardSparsifier(e)
        else:
            addHardSparsifierLayers(model._modules[key], value)
     
def hardSparsify(model, sparse_layers):
    addHardSparsifierLayers(model, sparse_layers)

In [0]:
class LossyHardSparsifier(nn.Module):
    """
    Takes an activation map as input and outputs the sparsified map.
    The sparsified map has to be computed using differentiable functions.
    Has a learnable parameter which is used to threshold based on absolute values.
    """
    def __init__(self, eps = None):
        """
        In the constructor we randomly instantiate the threshold value
        """
        super(LossyHardSparsifier, self).__init__()
        # initialize eps as trainable parameter
        if eps == None:
            self.eps = nn.Parameter(torch.tensor(0.1))
        else:
            self.eps = nn.Parameter(torch.tensor(eps))
            
        self.eps.requires_grad = True

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        x[torch.abs(x) <= torch.abs(self.eps)] = 0.
        return x

In [27]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
!mkdir('/content/drive/My Drive/SysDL_project_alexnet_weighted_norm')

In [0]:
mlflow.set_tracking_uri('/content/drive/My Drive/SysDL_project_alexnet_weighted_norm')

In [0]:
#  models to test 'Alexnet', 'Resnet18/34', 'mnasnet1_3', 'squeezenet', 'mobilenetv2'
# Take the line
# Also create the folder in Google drive to store the results
# net = models.AlexNet()
# net = models.resnet18()
# net = models.resnet34()
# net = models.mnasnet1_3()
# net = models.squeezenet1_1()
# net = models.mobilenet_v2()

In [30]:
loss_methods = [L1_penalty]
#normRegParams =  [0., 3e-10, 1e-9, 3e-9, 1e-8, 3e-8, 1e-7, 3e-7, 1e-6]
normRegParams =  [0.001, 0.01, 0.1, 0.25]
#normRegParams =  [0.25]
experiment_id = mlflow.set_experiment('alexnet_weighted_norm')

for i in range(len(loss_methods)):
    loss_method = loss_methods[i]
    # if(i in (0, 1, 2)):
    #     normRegParams =  [0., 1e-4, 1e-3, 1e-2, 1e-1]
    # else:
    #     normRegParams =  [0., 3e-9, 1e-8, 3e-8, 1e-7, 3e-7, 1e-6]

    for normRegParam in normRegParams:
        net = models.alexnet()
        net.apply(weights_init)
        eps_start = 0.5
        sparse_layers = sparsify(net, eps_start)
        if(torch.cuda.is_available()):
            net = net.cuda()
        criterion = SparseLoss()
        lossObj = nn.CrossEntropyLoss()
        a = 1.0
        b = 0.1
        num_epochs = 30
        
        weight_reg = 1e-3
        learning_rate = 0.01
        optimizer = optim.SGD(net.parameters(),lr = learning_rate,weight_decay=weight_reg)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
        run_name = 'L{}_LR={}_NRP={}_eps={}_a={}_b={}'.format(i,
                                                      learning_rate,
                                                      normRegParam,
                                                      eps_start,
                                                      a,
                                                      b)
        with mlflow.start_run(experiment_id=experiment_id, run_name=run_name):
            train_loss, train_accuracy = [],[]
            val_loss, val_acc = [], []
            test_acc = []
            print(run_name)
            print(f"Training on {len(train_data)} examples, validating on {len(val_data)} examples...")

            lr_init = scheduler.get_last_lr()
            eps_list = []

            mlflow.log_param('norm', i)
            mlflow.log_param('init_LR', learning_rate)
            mlflow.log_param('norm_reg_param', normRegParam)
            mlflow.log_param('eps_start', eps_start)
            mlflow.log_param('init_sigmoid_coeff', a)
            mlflow.log_param('approx_norm_sigma', b)

            for epoch in range(num_epochs):
                print('Epoch:', epoch+1,'LR:', scheduler.get_last_lr())
                if(epoch == 4 or epoch == 9 or epoch == 14 or epoch == 19):
                    updateSigmoidParam(net, sparse_layers)
                # Training and validation
                [train_epoch_loss, train_epoch_accuracy] = train(trainloader, net, normRegParam, loss_method)
                [val_epoch_loss, val_epoch_accuracy] = val(valloader, net, normRegParam, loss_method)
                # Append for plotting
                train_loss.append(train_epoch_loss)
                train_accuracy.append(train_epoch_accuracy)
                val_loss.append(val_epoch_loss)
                val_acc.append(val_epoch_accuracy)
                # Scheduler step for lr
                scheduler.step()
                # Testing step
                acc_score = test(testloader, net)
                test_acc.append(acc_score)
                # MLflow logging
                mlflow.log_metric('train_loss', train_epoch_loss)
                mlflow.log_metric('val_loss', val_epoch_loss)
                mlflow.log_metric('train_acc', train_epoch_accuracy)
                mlflow.log_metric('val_acc', val_epoch_accuracy)
                mlflow.log_metric('test_acc', acc_score)

                for j in range(len(sparse_layer_params)):
                    eps_list.append(sparse_layer_params[j].item())
            # Plotting
            plotLoss(train_loss, val_loss, run_name)
            plotAcc(train_accuracy, test_acc, run_name) 
            plotEps(eps_list, run_name)
            torch.save(net.state_dict(), 'soft_model.pth')
            mlflow.log_artifact('Loss_' + run_name + '.png')
            mlflow.log_artifact('Accuracy_' + run_name + '.png')
            n = len(sparse_layer_params)
            weighted_eps_sum = 0.
            den = 0.
            for j in range(n):
                mlflow.log_artifact('eps'+str(j + 1)+'_' + run_name + '.png')
                mlflow.log_metric('eps'+str(j + 1), sparse_layer_params[j].item())
                weighted_eps_sum += sparse_layer_params[j].item() * np.prod(sparse_layer_outputs[j].shape)
                den += np.prod(sparse_layer_outputs[j].shape)
            mlflow.log_metric('eps_weighted', weighted_eps_sum / den)
            print('weighted_eps', weighted_eps_sum / den)

            net_cpy = copy.deepcopy(net)
            hardSparsify(net_cpy, sparse_layers)
            torch.save(net_cpy.state_dict(), 'hard_model.pth')
            test_acc = test(testloader, net_cpy)
            print('hard_test_acc', test_acc)
            mlflow.log_metric('hard_threshold_acc', test_acc)
            print("Finished training")
            # if(train_epoch_accuracy < 15.):
            #     break


Val Loss: 0.0593, Val Acc: 58.35


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 56.80
Epoch: 29 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0364, Train Acc: 70.41


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0611, Val Acc: 57.03


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 56.85
Epoch: 30 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0363, Train Acc: 70.45


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0573, Val Acc: 59.10


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 58.12
weighted_eps 0.2632442639010953


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 59.27
hard_test_acc 59.2741935483871
Finished training
L0_LR=0.01_NRP=0.25_eps=0.5_a=1.0_b=0.1
Training on 5205 examples, validating on 1736 examples...
Epoch: 1 LR: [0.01]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.2134, Train Acc: 7.40


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.1886, Val Acc: 11.46


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 12.04
Epoch: 2 LR: [0.01]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.1606, Train Acc: 10.64


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.1348, Val Acc: 16.13


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 16.30
Epoch: 3 LR: [0.01]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.1272, Train Acc: 20.06


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.1201, Val Acc: 24.42


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 23.50
Epoch: 4 LR: [0.01]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.1141, Train Acc: 26.38


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.1123, Val Acc: 26.79


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 26.96
Epoch: 5 LR: [0.01]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.1072, Train Acc: 29.91


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.1122, Val Acc: 27.13


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 28.23
Epoch: 6 LR: [0.01]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.1000, Train Acc: 33.51


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.1008, Val Acc: 33.58


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 33.35
Epoch: 7 LR: [0.01]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0934, Train Acc: 36.45


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0919, Val Acc: 37.96


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 37.73
Epoch: 8 LR: [0.01]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0874, Train Acc: 39.08


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0939, Val Acc: 37.50


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 36.81
Epoch: 9 LR: [0.01]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0822, Train Acc: 42.44


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0855, Val Acc: 39.80


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 41.99
Epoch: 10 LR: [0.01]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0764, Train Acc: 45.46


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0844, Val Acc: 41.13


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 41.76
Epoch: 11 LR: [0.005]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0685, Train Acc: 50.05


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0762, Val Acc: 46.89


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 47.00
Epoch: 12 LR: [0.005]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0651, Train Acc: 52.05


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0708, Val Acc: 48.96


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 49.77
Epoch: 13 LR: [0.005]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0625, Train Acc: 53.51


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0699, Val Acc: 50.23


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 49.77
Epoch: 14 LR: [0.005]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0614, Train Acc: 54.31


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0674, Val Acc: 51.79


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 50.40
Epoch: 15 LR: [0.005]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0583, Train Acc: 56.75


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0688, Val Acc: 51.04


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 50.35
Epoch: 16 LR: [0.005]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0561, Train Acc: 57.37


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0721, Val Acc: 49.31


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 48.85
Epoch: 17 LR: [0.005]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0544, Train Acc: 58.12


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0647, Val Acc: 53.74


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 52.25
Epoch: 18 LR: [0.005]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0520, Train Acc: 60.77


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0634, Val Acc: 55.13


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 54.32
Epoch: 19 LR: [0.005]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0506, Train Acc: 61.44


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0699, Val Acc: 50.98


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 50.63
Epoch: 20 LR: [0.005]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0488, Train Acc: 61.88


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0644, Val Acc: 53.97


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 53.63
Epoch: 21 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0447, Train Acc: 65.15


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0596, Val Acc: 56.97


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 55.53
Epoch: 22 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0431, Train Acc: 66.51


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0596, Val Acc: 57.60


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 56.11
Epoch: 23 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0420, Train Acc: 67.42


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0601, Val Acc: 58.06


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 56.74
Epoch: 24 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0414, Train Acc: 67.32


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0587, Val Acc: 58.47


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 56.97
Epoch: 25 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0403, Train Acc: 68.38


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0594, Val Acc: 57.95


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 56.97
Epoch: 26 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0397, Train Acc: 68.41


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0577, Val Acc: 60.02


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 57.55
Epoch: 27 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0390, Train Acc: 69.07


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0565, Val Acc: 59.33


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 57.72
Epoch: 28 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0385, Train Acc: 69.47


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0597, Val Acc: 58.06


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 57.83
Epoch: 29 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0370, Train Acc: 70.12


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0574, Val Acc: 59.33


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 58.58
Epoch: 30 LR: [0.0025]


HBox(children=(FloatProgress(value=0.0, max=162.0), HTML(value='')))


Train Loss: 0.0366, Train Acc: 71.35


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Val Loss: 0.0563, Val Acc: 59.10


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 57.26
weighted_eps 0.26812002983286126


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))


Test accuracy: 57.37
hard_test_acc 57.373271889400925
Finished training


In [31]:
print(test_acc)
print(sparse_layer_params)

57.373271889400925
[Parameter containing:
tensor(0.4860, device='cuda:0', requires_grad=True), Parameter containing:
tensor(0.2404, device='cuda:0', requires_grad=True), Parameter containing:
tensor(0.0763, device='cuda:0', requires_grad=True), Parameter containing:
tensor(0.1190, device='cuda:0', requires_grad=True), Parameter containing:
tensor(-0.1221, device='cuda:0', requires_grad=True), Parameter containing:
tensor(-0.0395, device='cuda:0', requires_grad=True), Parameter containing:
tensor(-0.0054, device='cuda:0', requires_grad=True), Parameter containing:
tensor(0.1407, device='cuda:0', requires_grad=True)]


In [32]:
drive.flush_and_unmount()
print('All changes made in this colab session should now be visible in Drive.')

All changes made in this colab session should now be visible in Drive.


In [33]:
t = sparse_layer_params[0]
x = sparse_layer_outputs[0]
print(((x > 0.) & (x < t)).any())

tensor(False, device='cuda:0')


In [34]:
print(sparse_layer_params[0].grad)

None
