<a href="https://colab.research.google.com/github/Tulnertje5/engineer/blob/master/torchdiffeq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount("/content/drive/")

%cd /content/drive/MyDrive/Afstudeer_project/torchdiffeq

In [None]:
!pip install torchdiffeq
import os
import argparse
import logging
import time
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt


In [None]:
class Linear_Experiment(nn.Module):
    r"""Applies a linear transformation to the incoming data: :math:`y = x(A^T-A) + b`
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        bias: If set to ``False``, the layer will not learn an additive bias.
            Default: ``True``
    Shape:
        - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
          additional dimensions and :math:`H_{in} = \text{in\_features}`
        - Output: :math:`(N, *, H_{out})` where all but the last dimension
          are the same shape as the input and :math:`H_{out} = \text{out\_features}`.
    Attributes:
        weight: the learnable weights of the module of shape
            :math:`(\text{out\_features}, \text{in\_features})`. The values are
            initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
            :math:`k = \frac{1}{\text{in\_features}}`
        bias:   the learnable bias of the module of shape :math:`(\text{out\_features})`.
                If :attr:`bias` is ``True``, the values are initialized from
                :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
                :math:`k = \frac{1}{\text{in\_features}}`
    Examples::
        >>> m = Linear_Experiment(20, 30)
        >>> input = torch.randn(128, 20)
        >>> output = m(input)
        >>> print(output.size())
        torch.Size([128, 30])
    """
    # __constants__ = ['in_features', 'out_features']
    # in_features: int
    # out_features: int
    # weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
        super(Linear_Experiment, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = torch.rand(out_features, in_features ,  requires_grad=True)
        if bias:
            self.bias = torch.rand(out_features , requires_grad=True)
        else:
            self.register_parameter('bias', None)
        # self.reset_parameters()

    # def reset_parameters(self) -> None:
    #     init.kaiming_uniform_(self.weight, a=math.sqrt(5))
    #     if self.bias is not None:
    #         fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
    #         bound = 1 / math.sqrt(fan_in)
    #         init.uniform_(self.bias, -bound, bound)

    def forward(self, input):
        return F.linear(input, self.weight-torch.transpose(self.weight,0,1), self.bias)

    def extra_repr(self) -> str:
        return 'in_features={}, out_features={}, bias={}'.format(
            self.in_features, self.out_features, self.bias is not None
        )

In [None]:
class Args:
  def __init__(self,network='odenet', tol=1e-3,adjoint=True,downsampling_method='conv',nepochs=5,data_aug=True,lr=0.1,batch_size=254,test_batch_size=1000,save='./experimet1',gpu=0 ):
    self.network=network
    self.tol=tol
    self.adjoint=adjoint
    self.downsampling_method=downsampling_method
    self.nepochs=nepochs
    self.data_aug=data_aug
    self.lr=lr
    self.batch_size=batch_size
    self.test_batch_size=test_batch_size
    self.save=save
    self.gpu=gpu
args=Args()


if args.adjoint:
    from torchdiffeq import odeint_adjoint as odeint
else:
    from torchdiffeq import odeint

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)


def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


def norm(dim):
    return nn.GroupNorm(min(32, dim), dim)

class ResBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(ResBlock, self).__init__()
        self.norm1 = norm(inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.norm2 = norm(planes)
        self.conv2 = conv3x3(planes, planes)

    def forward(self, x):
        shortcut = x

        out = self.relu(self.norm1(x))

        if self.downsample is not None:
            shortcut = self.downsample(out)

        out = self.conv1(out)
        out = self.norm2(out)
        out = self.relu(out)
        out = self.conv2(out)

        return out + shortcut


class ConcatConv2d(nn.Module):

    def __init__(self, dim_in, dim_out, ksize=3, stride=1, padding=0, dilation=1, groups=1, bias=True, transpose=False):
        super(ConcatConv2d, self).__init__()
        module = nn.ConvTranspose2d if transpose else nn.Conv2d
        self._layer = module(
            dim_in + 1, dim_out, kernel_size=ksize, stride=stride, padding=padding, dilation=dilation, groups=groups,
            bias=bias
        )

    def forward(self, t, x):
        tt = torch.ones_like(x[:, :1, :, :]) * t
        ttx = torch.cat([tt, x], 1)
        return self._layer(ttx)


class ODEfunc(nn.Module):

    def __init__(self, dim):
        super(ODEfunc, self).__init__()
        self.norm1 = norm(dim)
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = ConcatConv2d(dim, dim, 3, 1, 1)
        self.norm2 = norm(dim)
        self.conv2 = ConcatConv2d(dim, dim, 3, 1, 1)
        self.norm3 = norm(dim)
        self.nfe = 0

    def forward(self, t, x):
        self.nfe += 1
        out = self.norm1(x)
        out = self.relu(out)
        out = self.conv1(t, out)
        out = self.norm2(out)
        out = self.relu(out)
        out = self.conv2(t, out)
        out = self.norm3(out)
        return out

class ODEBlock(nn.Module):

    def __init__(self, odefunc):
        super(ODEBlock, self).__init__()
        self.odefunc = odefunc
        self.integration_time = torch.tensor([0, 1]).float()

    def forward(self, x):
        self.integration_time = self.integration_time.type_as(x)
        out = odeint(self.odefunc, x, self.integration_time, rtol=args.tol, atol=args.tol)
        return out[1]

    @property
    def nfe(self):
        return self.odefunc.nfe

    @nfe.setter
    def nfe(self, value):
        self.odefunc.nfe = value


class Flatten(nn.Module):

    def __init__(self):
        super(Flatten, self).__init__()

    def forward(self, x):
        shape = torch.prod(torch.tensor(x.shape[1:])).item()
        return x.view(-1, shape)

class RunningAverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self, momentum=0.99):
        self.momentum = momentum
        self.reset()

    def reset(self):
        self.val = None
        self.avg = 0

    def update(self, val):
        if self.val is None:
            self.avg = val
        else:
            self.avg = self.avg * self.momentum + val * (1 - self.momentum)
        self.val = val


def get_mnist_loaders(data_aug=False, batch_size=128, test_batch_size=1000, perc=1.0):
    if data_aug:
        transform_train = transforms.Compose([
            transforms.RandomCrop(28, padding=4),
            transforms.ToTensor(),
        ])
    else:
        transform_train = transforms.Compose([
            transforms.ToTensor(),
        ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
    ])

    train_loader = DataLoader(
        datasets.MNIST(root='.data/mnist', train=True, download=True, transform=transform_train), batch_size=batch_size,
        shuffle=True, num_workers=2, drop_last=True
    )

    train_eval_loader = DataLoader(
        datasets.MNIST(root='.data/mnist', train=True, download=True, transform=transform_test),
        batch_size=test_batch_size, shuffle=False, num_workers=2, drop_last=True
    )

    test_loader = DataLoader(
        datasets.MNIST(root='.data/mnist', train=False, download=True, transform=transform_test),
        batch_size=test_batch_size, shuffle=False, num_workers=2, drop_last=True
    )

    return train_loader, test_loader, train_eval_loader


def inf_generator(iterable):
    """Allows training with DataLoaders in a single infinite loop:
        for i, (x, y) in enumerate(inf_generator(train_loader)):
    """
    iterator = iterable.__iter__()
    while True:
        try:
            yield iterator.__next__()
        except StopIteration:
            iterator = iterable.__iter__()


def learning_rate_with_decay(batch_size, batch_denom, batches_per_epoch, boundary_epochs, decay_rates):
    initial_learning_rate = args.lr * batch_size / batch_denom

    boundaries = [int(batches_per_epoch * epoch) for epoch in boundary_epochs]
    vals = [initial_learning_rate * decay for decay in decay_rates]

    def learning_rate_fn(itr):
        lt = [itr < b for b in boundaries] + [True]
        i = np.argmax(lt)
        return vals[i]

    return learning_rate_fn


def one_hot(x, K):
    return np.array(x[:, None] == np.arange(K)[None, :], dtype=int)


def accuracy(model, dataset_loader):
    total_correct = 0
    with torch.no_grad():
      for x, y in dataset_loader:
          x = x.to(device)
          y = one_hot(np.array(y.numpy()), 10)
          target_class = np.argmax(y, axis=1)
          predicted_class = np.argmax(model(x).cpu().detach().numpy(), axis=1)
          total_correct += np.sum(predicted_class == target_class)
    return total_correct / len(dataset_loader.dataset)


def count_parameters(model):#neet to optimize
  param=0
  for _ ,p in model.named_parameters():
    if p.requires_grad:
      param+=torch.sum(p!=0).item()
  return param


def makedirs(dirname):
    if not os.path.exists(dirname):
        os.makedirs(dirname)


def get_logger(logpath, filepath, package_files=[], displaying=True, saving=True, debug=False):
    logger = logging.getLogger()
    if debug:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logger.setLevel(level)
    if saving:
        info_file_handler = logging.FileHandler(logpath, mode="a")
        info_file_handler.setLevel(level)
        logger.addHandler(info_file_handler)
    if displaying:
        console_handler = logging.StreamHandler()
        console_handler.setLevel(level)
        logger.addHandler(console_handler)
    logger.info(filepath)
    with open(filepath, "r") as f:
        logger.info(f.read())

    for f in package_files:
        logger.info(f)
        with open(f, "r") as package_f:
            logger.info(package_f.read())

    return logger

device = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')

is_odenet = args.network == 'odenet'

class Ourmodel(nn.Module):
  def __init__(self):
      super(Ourmodel,self).__init__()
      if args.downsampling_method == 'conv':
        downsampling_layers = [
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1 , padding=0),     #so we get a 64*26*26 tensor
            norm(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=4, stride=2, padding=1),     # gives back a 64*12*12 tensor
            norm(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=4, stride=2, padding=1),     #gives a 62*5*5 tensor
            ]
      elif args.downsampling_method == 'res':
          downsampling_layers = [
              nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1),             
              ResBlock(64, 64, stride=2, downsample=conv1x1(64, 64, 2)),
              ResBlock(64, 64, stride=2, downsample=conv1x1(64, 64, 2)),
          ]

      feature_layers = [ODEBlock(ODEfunc(64))] if is_odenet else [ResBlock(64, 64) for _ in range(6)]
      fc_layers = [norm(64), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)), Flatten(), nn.Linear(64, 10)]

      self.model = nn.Sequential(*downsampling_layers, *feature_layers, *fc_layers).to(device)

  def forward(self,x):
    out=self.model(x)
    return out

In [None]:
input = torch.empty(2, 35)
tt=torch.ones_like(input[:,:1])
ttx=torch.cat([tt, input], 1)
print(ttx.shape)


# Model creation with htan inear



In [None]:

class ODEfunc_experiment(nn.Module):

    def __init__(self,dim=64):
        super(ODEfunc_experiment, self).__init__()
        self.Linear=nn.Linear(dim+2, dim)
        self.Tanh=nn.Tanh()
        self.nfe = 0

    def forward(self, t, x):
        self.nfe += 1
        tt = torch.ones_like(x[:, :1]) * t
        tt2 = torch.ones_like(x[:, :1]) * t * t
        ttx = torch.cat([tt,tt2,  x], 1)
        out=self.Linear(ttx)
        out=self.Tanh(out)
        return out


class ODEfunc_experiment_transpose(nn.Module):

    def __init__(self,dim=64):
        super(ODEfunc_experiment_transpose, self).__init__()
        self.Linear=Linear_Experiment(dim+2, dim)
        self.Tanh=nn.Tanh()
        self.nfe = 0

    def forward(self, t, x):
        self.nfe += 1
        tt = torch.ones_like(x[:, :1]) * t
        tt2 = torch.ones_like(x[:, :1]) * t * t
        ttx = torch.cat([tt,tt2,  x], 1)
        out=self.Linear(ttx)
        out=self.Tanh(out)
        return out



ODEfunc_experiment = ODEfunc_experiment()
ODEfunc_experiment_transpose=ODEfunc_experiment_transpose()
def downsampling_layers():
    if args.downsampling_method == 'conv':
            downsampling_layers = [
                nn.Conv2d(in_channels=1, out_channels=32, kernel_size=6, stride=2 , padding=0),       #so we get a 32*11*11 tensor
                norm(32),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=2, padding=0),       # gives back a 64*5*5 tensor
                norm(32),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=0),       # gives back a 128*1*1 tensor
                Flatten()
                ]
    elif args.downsampling_method == 'res':
            downsampling_layers = [
                nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1),             
                ResBlock(64, 64, stride=2, downsample=conv1x1(64, 64, 2)),
                ResBlock(64, 64, stride=2, downsample=conv1x1(64, 64, 2)),
            ]
    return downsampling_layers

feature_layers_normal = [ODEBlock(ODEfunc_experiment)]
feature_layers_transpose = [ODEBlock(ODEfunc_experiment_transpose)]

fc_layers_1 = [nn.Linear(64, 10)]
fc_layers_2 = [nn.Linear(64, 10)]
fc_layers_3 = [nn.Linear(64, 10)]

downsampling_layers_1=downsampling_layers()
downsampling_layers_2=downsampling_layers()
downsampling_layers_3=downsampling_layers()

model_baseline  = nn.Sequential(*downsampling_layers_1, *fc_layers_1).to(device)
model_normal = nn.Sequential(*downsampling_layers_2, *feature_layers_normal, *fc_layers_2).to(device)
model_transpose = nn.Sequential(*downsampling_layers_3, *feature_layers_transpose, *fc_layers_3).to(device)
model_transpose[8].odefunc.Linear.weight=model_transpose[8].odefunc.Linear.weight.to(device)
model_transpose[8].odefunc.Linear.bias=model_transpose[8].odefunc.Linear.bias.to(device)


In [None]:
args=Args(network='odenet', tol=1e-3,adjoint=True,downsampling_method='conv',nepochs=75,data_aug=True,lr=0.0005,batch_size=128,test_batch_size=500,save='./experimet1',gpu=0 )
device = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')
is_odenet = args.network == 'odenet'

def fit_model(model):
  criterion = nn.CrossEntropyLoss().to(device)

  train_loader, test_loader, train_eval_loader = get_mnist_loaders(
      args.data_aug, args.batch_size, args.test_batch_size
  )

  data_gen = inf_generator(train_loader)
  batches_per_epoch = len(train_loader)

  lr_fn = learning_rate_with_decay(
      args.batch_size, batch_denom=128, batches_per_epoch=batches_per_epoch, boundary_epochs=[60, 100, 140],
      decay_rates=[1, 0.1, 0.01, 0.01]
  )

  # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=1)
  optimizer=torch.optim.Adagrad(model.parameters() ,lr=args.lr)
  best_acc = 0
  batch_time_meter = RunningAverageMeter()
  f_nfe_meter = RunningAverageMeter()
  b_nfe_meter = RunningAverageMeter()
  end = time.time()
  test_acc_array=[]
  avrage_loss_array=[]
  avrage_loss=0
  for itr in range(args.nepochs * batches_per_epoch):

          for param_group in optimizer.param_groups:
              param_group['lr'] = lr_fn(itr)

          optimizer.zero_grad()
          x, y = data_gen.__next__()
          x = x.to(device)
          y = y.to(device)
          logits = model(x)
          print("hoi")
          loss = criterion(logits, y)

          # if is_odenet:
          #     nfe_forward = feature_layers[0].nfe
          #     feature_layers[0].nfe = 0

          loss.backward()
          optimizer.step()
          avrage_loss+=loss

          # if is_odenet:
          #     nfe_backward = feature_layers[0].nfe
          #     feature_layers[0].nfe = 0

          batch_time_meter.update(time.time() - end)
          # if is_odenet:
          #     f_nfe_meter.update(nfe_forward)
          #     b_nfe_meter.update(nfe_backward)
          end = time.time()

          if itr % batches_per_epoch == 0:
            with torch.no_grad():
              sum_loss=0
              i=0
              for x, y in test_loader:
                    x = x.to(device)
                    y = y.to(device)
                    logits = model(x)
                    sum_loss+= criterion(logits, y).item()
                    i+=1
              avrage_loss_array.append((sum_loss/i))
              train_acc = accuracy(model, train_eval_loader)
              val_acc = accuracy(model, test_loader)
              test_acc_array.append(val_acc)
              # print(test_acc_array)
              if val_acc > best_acc:
                  torch.save({'state_dict': model.state_dict(), 'args': args}, os.path.join(args.save, 'model.pth'))
                  best_acc = val_acc
              # logger.info(
              #     "Epoch {:04d} | Time {:.3f} ({:.3f}) | NFE-F {:.1f} | NFE-B {:.1f} | "
              #     "Train Acc {:.4f} | Test Acc {:.4f}".format(
              #         itr // batches_per_epoch, batch_time_meter.val, batch_time_meter.avg, f_nfe_meter.avg,
              #         b_nfe_meter.avg, train_acc, val_acc
              #     )
              print("Epoch {:04d} | Time {:.3f} ({:.3f}) | NFE-F {:.1f} | NFE-B {:.1f} | Train Acc {:.4f} | Test Acc {:.4f}| test Loss{:.4f}" .format(
                      itr // batches_per_epoch, batch_time_meter.val, batch_time_meter.avg, f_nfe_meter.avg,
                      b_nfe_meter.avg, train_acc, val_acc , avrage_loss_array[-1])
              )
  return test_acc_array , avrage_loss_array , model
# test_acc_array , avrage_loss_array , trained_model_normal=fit_model(model_normal)
# np.save("acc_Linear_tanh_normal.npy" , np.array(test_acc_array))
# np.save("loss_Linear_tanh_normal.npy" , np.array(avrage_loss_array))

test_acc_array,avrage_loss_array  , trained_model_transpose=fit_model(model_transpose)
np.save("acc_Linear_tanh_transpose.npy" , np.array(test_acc_array))
np.save("loss_Linear_tanh_transpose.npy" , np.array(avrage_loss_array))

test_acc_array ,avrage_loss_array, trained_model_baseline=fit_model(model_baseline)
np.save("acc_Linear_tanh_base.npy" , np.array(test_acc_array))
np.save("loss_Linear_tanh_base.npy" , np.array(avrage_loss_array))


In [None]:
Linear_tanh_normal=np.load("loss_Linear_tanh_normal.npy" )
Linear_tanh_transpose=np.load("loss_Linear_tanh_transpose.npy" )

print(len(Linear_tanh_normal))
print(len(Linear_tanh_transpose))

In [None]:
Linear_tanh_normal=np.load("loss_Linear_tanh_normal.npy" )
Linear_tanh_transpose=np.load("loss_Linear_tanh_transpose.npy" )
Linear_tanh_transpose=np.load("loss_Linear_tanh_base.npy")

first_n_epochs=30
x=range(len(Linear_tanh_normal))[:first_n_epochs]
y1=Linear_tanh_normal[:first_n_epochs]
y2=Linear_tanh_transpose[:first_n_epochs]
y3=Linear_tanh_transpose[:first_n_epochs]
plt.plot(x,y1,label="Normal weight:    tanh(Wx+b)")
plt.plot(x,y2,label="Anti-symetric: tanh((W-W^t)x+b)")
plt.plot(x,y3,label="baseline:  no dinamic ")
plt.xlabel("epochs")
plt.ylabel("loss on test set")
plt.title("experiment with dinamic")
# plt.yscale('log')
plt.legend()
plt.show()

transpose linear tanh experiment


# Load a model 

In [None]:
import torch
import torch.nn.utils.prune as prune 
args=Args(network='odenet', tol=1e-3,adjoint=True,downsampling_method='conv',nepochs=5,data_aug=True,lr=0.1,batch_size=256,test_batch_size=1000,save='./experimet1',gpu=0 )
device = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')
is_odenet = args.network == 'odenet'
def load_model(checkpoint, model):
  model.load_state_dict(checkpoint['state_dict'])
  return model

checkpoint = torch.load("odenet_adjoint_50_epoch/model.pth")
# print(checkpoint['state_dict'])
dummie=Ourmodel()
# print(dummie)
dummie=load_model(checkpoint, dummie)
model=dummie.model
# print(dict(model.feature_layers.named_parameters()))


# Global pruning

In [None]:
import matplotlib.pyplot as plt


def glabal_unstructuerd_pruning(pruning_method=prune.RandomUnstructured, index_list_layers=[0,2,4,6,8,11]):
        prune_params = np.logspace (0,-2,num=100)
        # prune_params = np.arange(10)/10
        prune_params=1-prune_params
        accuracy_array=[None]*len(prune_params)
        loss_array=[None]*len(prune_params)
        j=0

        train_loader, test_loader, train_eval_loader = get_mnist_loaders(
              args.data_aug, args.batch_size, args.test_batch_size
          )
        criterion = nn.CrossEntropyLoss().to(device)

        for prune_param in prune_params:
          torch.manual_seed(50)
          l2=prune.LnStructured(amount=prune_param,n=2)
          dummie=Ourmodel()
          dummie=load_model(checkpoint, dummie)
          model=dummie.model
          Parameters_to_prun=((model[0] , 'weight'),
                              (model[1] , 'weight'),  #norm
                              (model[3] , 'weight'),  
                              (model[4] , 'weight'),    #norm
                              (model[6] , 'weight'),
                              (model[7].odefunc.norm1 , 'weight'),      #norm
                              (model[7].odefunc.conv1._layer, 'weight'),
                              (model[7].odefunc.norm2 , 'weight'),      #norm
                              (model[7].odefunc.conv2._layer , 'weight'),
                              (model[7].odefunc.norm3 , 'weight'),    #norm
                              (model[8] , 'weight'),        #norm
                              (model[12] , 'weight')
                              )
        
          Parameters_to_prun=tuple(list(list(Parameters_to_prun)[i] for i in index_list_layers))
          

          prune.global_unstructured(Parameters_to_prun, pruning_method=pruning_method , amount=prune_param,)

          #Is to make the pruning permanent
          for module , name in Parameters_to_prun:
            prune.remove(module, name)
          
          # is to get the losses and acuracy
          with torch.no_grad():
            sum_loss=0
            sum_acc=0
            for x, y in test_loader:
              y_one_hot = one_hot(np.array(y.numpy()), 10)
              y = y.to(device)
              x = x.to(device)
              logits = model(x)
              sum_loss+= criterion(logits, y).item()
              target_class = np.argmax(y_one_hot, axis=1)
              predicted_class = np.argmax(model(x).cpu().detach().numpy(), axis=1)
              sum_acc += np.sum(predicted_class == target_class)
          accuracy_array[j]=sum_acc/len(test_loader.dataset)
          loss_array[j]=(sum_loss/len(test_loader.dataset))
          j+=1
          print_sparity=False
          if print_sparity:
            print(
              "Sparsity in model[0].weight: {:.4f}%".format(
                  100. * float(torch.sum(model[0].weight == 0))
                  / float(model[0].weight.nelement())
              )
            )
            print(
              "Sparsity in model[1].weight: {:.4f}%".format(
                  100. * float(torch.sum(model[1].weight == 0))
                  / float(model[1].weight.nelement())
              )
            )
            print(
              "Sparsity in model[3].weight: {:.4f}%".format(
                  100. * float(torch.sum(model[3].weight == 0))
                  / float(model[3].weight.nelement())
              )
            )
            print(
              "Sparsity in model[4].weight: {:.4f}%".format(
                  100. * float(torch.sum(model[4].weight == 0))
                  / float(model[4].weight.nelement())
              )
            )
            print(
              "Sparsity in model[6].weight: {:.4f}%".format(
                  100. * float(torch.sum(model[6].weight == 0))
                  / float(model[6].weight.nelement())
              )
            )
            print(
              "Sparsity in model[7].odefunc.norm1.weight: {:.4f}%".format(
                  100. * float(torch.sum(model[7].odefunc.norm1.weight == 0))
                  / float(model[7].odefunc.norm1.weight.nelement())
              )
            )
            print(
              "Sparsity in model[7].odefunc.conv1._layer.weight: {:.4f}%".format(
                  100. * float(torch.sum(model[7].odefunc.conv1._layer.weight == 0))
                  / float(model[7].odefunc.conv1._layer.weight.nelement())
              )
            )
            print(
              "Sparsity in model[7].odefunc.norm2.weight: {:.4f}%".format(
                  100. * float(torch.sum(model[7].odefunc.norm2.weight == 0))
                  / float(model[7].odefunc.norm2.weight.nelement())
              )
            )
            print(
              "Sparsity in model[7].odefunc.conv2._layer.weight: {:.4f}%".format(
                  100. * float(torch.sum(model[7].odefunc.conv2._layer.weight == 0))
                  / float(model[7].odefunc.conv2._layer.weight.nelement())
              )
            )
            print(
              "Sparsity in model[7].odefunc.norm3.weight: {:.4f}%".format(
                  100. * float(torch.sum(model[7].odefunc.norm3.weight == 0))
                  / float(model[7].odefunc.norm3.weight.nelement())
              )
            )
            print(
              "Sparsity in model[8].weight: {:.4f}%".format(
                  100. * float(torch.sum(model[8].weight == 0))
                  / float(model[8].weight.nelement())
              )
            )
            print(
              "Sparsity in model[12].weight: {:.4f}%".format(
                  100. * float(torch.sum(model[12].weight == 0))
                  / float(model[12].weight.nelement())
              )
            )
        return   accuracy_array ,  loss_array

accuracy_array, loss_array=glabal_unstructuerd_pruning(pruning_method=prune.RandomUnstructured, index_list_layers=[0,1,2,3,4,5,6,7,8,9,10,11])
np.save("acc_RandomUnstructered_all weights and norms.npy" , np.array(accuracy_array))
np.save("loss_RandomUnstructered_all weights end norms.npy" , np.array(loss_array))

accuracy_array, loss_array=glabal_unstructuerd_pruning(pruning_method=prune.RandomUnstructured, index_list_layers=[0,2,4,6,8,11])
np.save("acc_RandomUnstructered_all weights.npy" , np.array(accuracy_array))
np.save("loss_RandomUnstructered_all weights.npy" , np.array(loss_array))

accuracy_array, loss_array=glabal_unstructuerd_pruning(pruning_method=prune.RandomStructured, index_list_layers=[0,2,4,6,8,11])
np.save("acc_RandomStructured_all weights.npy" , np.array(accuracy_array))
np.save("loss_RandomStructured_all weights.npy" , np.array(loss_array))

accuracy_array, loss_array=glabal_unstructuerd_pruning(pruning_method=prune.RandomUnstructured, index_list_layers=[6,8])
np.save("acc_RandomUnstructered_only_dynamic_weights.npy" , np.array(accuracy_array))
np.save("loss_RandomUnstructered_only_dynamic_weights.npy" , np.array(loss_array))

accuracy_array, loss_array=glabal_unstructuerd_pruning(pruning_method=prune.L1Unstructured, index_list_layers=[0,1,2,3,4,5,6,7,8,9,10,11])
np.save("acc_L1Unstructered_all weights and norms.npy" , np.array(accuracy_array))
np.save("loss_L1Unstructered_all weights end norms.npy" , np.array(loss_array))

accuracy_array, loss_array =glabal_unstructuerd_pruning(pruning_method=prune.L1Unstructured, index_list_layers=[0,2,4,6,8,11])
np.save("acc_L1Unstructered_all weights.npy" , np.array(accuracy_array))
np.save("loss_L1Unstructered_all weights.npy" , np.array(loss_array))




print(accuracy_array)



```
 # print(prune.is_pruned(model))
  # print(model)
  # print(model._forward_pre_hooks)
  # print(model.named_parameters())
  # print(model.named_buffers())
  # for hook in module._forward_pre_hooks.values():
  #   if hook._tensor_name == "weight":  # select out the correct hook hook is a costumfromMask opject
  #     print(hook)
  #     break\

  # for name , module in model.named_modules():
  #   if name!='':
  #     print(name)
  #     print(module.state_dict().keys())
  #     print(len(module.state_dict().keys()))

  
  # print(list(model.named_modules()))
  # for name in model.state_dict().keys():
  #   if 'weight' in name:
  #     print(name) 
  # print(model.state_dict())
  # print(list(model.named_parameters()))
  # print(list(model.named_buffers()))
  # print(count_parameters(model))
```



In [None]:
# random pruning
acc_RandomUnstructered_all_weights_and_norms=np.load("acc_RandomUnstructered_all weights and norms.npy" )
acc_RandomUnstructered_all_weights=np.load("acc_RandomUnstructered_all weights.npy")
acc_RandomUnstructered_only_dynamic_weights=np.load("acc_RandomUnstructered_only_dynamic_weights.npy" )

#L1pruning 
acc_L1Unstructered_all_weights_and_norms=np.load("acc_L1Unstructered_all weights and norms.npy" )
acc_L1Unstructered_all_weights=np.load("acc_L1Unstructered_all weights.npy" )



In [None]:
acc_tuple=( 
            ("Random Weights and Norms",acc_RandomUnstructered_all_weights_and_norms),
            ("Random Weights" , acc_RandomUnstructered_all_weights) , 
            ("Random only Dynamic weights " , acc_RandomUnstructered_only_dynamic_weights) , 
            ("L1 Weights and Norms" , acc_L1Unstructered_all_weights_and_norms), 
            ("L1 Weights" , acc_L1Unstructered_all_weights)
          )

prune_params = np.logspace (0,-2,num=100)
prune_params=1-prune_params
x=prune_params
for label , array in acc_tuple:
    plt.plot(x,array,label=label)
plt.xlabel("proporsion that is left")
plt.ylabel("accuracy on test set")
plt.title("Title")
plt.xscale('logit')
plt.legend()
plt.show()

In [None]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

### Pruning per layer 


In [None]:


prune_params = np.logspace (0,-2,num=50)
prune_params=1-prune_params
accuracy_array1=[]
accuracy_array2=[]
saved=False
for prune_param in prune_params:
  # print('parameter')
  # print(prune_param)
  torch.manual_seed(5050)
  dummie1=Ourmodel()
  dummie2=Ourmodel()
  dummie1=load_model(checkpoint, dummie1)
  dummie2=load_model(checkpoint, dummie2)
  model1=dummie1.model
  model2=dummie2.model
  # val_acc = accuracy(model, test_loader)
  # print(val_acc)
  for param_tensor in model1.state_dict():
        if "weight" in param_tensor:
          # if prune_param==0.:
            # print(param_tensor)
          if param_tensor[1]=='.':
            a=int(param_tensor[0])
            i=0
          else:
            a=int(param_tensor[:2])
            i=1
          # print(model[a])
          if a==100:
            prune.random_unstructured(model1[a], name=param_tensor[i+2:], amount=prune_param)
            val_acc1 = accuracy(model1, test_loader)
            prune.l1_unstructured(model2[a], name=param_tensor[i+2:], amount=prune_param)
            val_acc2 = accuracy(model2, test_loader)
          # if a==7:
              # prune.random_unstructured(model[a].odefunc.norm1, name=param_tensor[-6:], amount=prune_param)
              # prune.random_unstructured(model[a].odefunc.conv1._layer, name=param_tensor[-6:], amount=prune_param)
              # prune.random_unstructured(model[a].odefunc.norm2, name=param_tensor[-6:], amount=prune_param)
              # prune.random_unstructured(model[a].odefunc.conv2._layer, name=param_tensor[-6:], amount=prune_param)
              # prune.random_unstructured(model[a].odefunc.norm3, name=param_tensor[-6:], amount=prune_param)

              # prune.l1_unstructured(model[a].odefunc.norm1, name=param_tensor[-6:], amount=prune_param)
              # prune.l1_unstructured(model[a].odefunc.conv1._layer, name=param_tensor[-6:], amount=prune_param)
              # prune.l1_unstructured(model[a].odefunc.norm2, name=param_tensor[-6:], amount=prune_param)
              # prune.l1_unstructured(model[a].odefunc.conv2._layer, name=param_tensor[-6:], amount=prune_param)
              # prune.l1_unstructured(model[a].odefunc.norm3, name=param_tensor[-6:], amount=prune_param)
 
  accuracy_array1.append(val_acc1)
  accuracy_array2.append(val_acc2)
print(prune_params)
print(accuracy_array1)
print(accuracy_array2)

ODE code for MNIST data set in blocks 


In [None]:
# makedirs(args.save)
# logger = get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__))
# logger.info(args)
args=Args(network='odenet', tol=1e-3,adjoint=False,downsampling_method='conv',nepochs=5,data_aug=True,lr=0.1,batch_size=254,test_batch_size=1000,save='./experimet1',gpu=0 )

device = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')

is_odenet = args.network == 'odenet'

model=Ourmodel()


# if args.downsampling_method == 'conv':
#   downsampling_layers = [
#     nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1 , padding=0),     #so we get a 64*26*26 tensor
#     norm(64),
#     nn.ReLU(inplace=True),
#     nn.Conv2d(in_channels=64, out_channels=64, kernel_size=4, stride=2, padding=1),     # gives back a 64*12*12 tensor
#     norm(64),
#     nn.ReLU(inplace=True),
#     nn.Conv2d(in_channels=64, out_channels=64, kernel_size=4, stride=2, padding=1),     #gives a 62*5*5 tensor
#     ]
# elif args.downsampling_method == 'res':
#     downsampling_layers = [
#         nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1),             
#         ResBlock(64, 64, stride=2, downsample=conv1x1(64, 64, 2)),
#         ResBlock(64, 64, stride=2, downsample=conv1x1(64, 64, 2)),
#         ]

# feature_layers = [ODEBlock(ODEfunc(64))] if is_odenet else [ResBlock(64, 64) for _ in range(6)]
# fc_layers = [norm(64), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)), Flatten(), nn.Linear(64, 10)]

# model = nn.Sequential(*downsampling_layers, *feature_layers, *fc_layers).to(device)

# logger.info(model)
# logger.info('Number of parameters: {}'.format(count_parameters(model)))

criterion = nn.CrossEntropyLoss().to(device)

train_loader, test_loader, train_eval_loader = get_mnist_loaders(
    args.data_aug, args.batch_size, args.test_batch_size
)

data_gen = inf_generator(train_loader)
batches_per_epoch = len(train_loader)

lr_fn = learning_rate_with_decay(
    args.batch_size, batch_denom=128, batches_per_epoch=batches_per_epoch, boundary_epochs=[60, 100, 140],
    decay_rates=[1, 0.1, 0.01, 0.001]
)

optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)

best_acc = 0
batch_time_meter = RunningAverageMeter()
f_nfe_meter = RunningAverageMeter()
b_nfe_meter = RunningAverageMeter()
end = time.time()

train=False
if train:
  for itr in range(args.nepochs * batches_per_epoch):

      for param_group in optimizer.param_groups:
          param_group['lr'] = lr_fn(itr)

      optimizer.zero_grad()
      x, y = data_gen.__next__()
      x = x.to(device)
      y = y.to(device)
      logits = model(x)
      loss = criterion(logits, y)

      if is_odenet:
          nfe_forward = model.feature_layers[0].nfe
          model.feature_layers[0].nfe = 0

      loss.backward()
      optimizer.step()

      if is_odenet:
          nfe_backward = model.feature_layers[0].nfe
          model.feature_layers[0].nfe = 0

      batch_time_meter.update(time.time() - end)
      if is_odenet:
          f_nfe_meter.update(nfe_forward)
          b_nfe_meter.update(nfe_backward)
      end = time.time()

      if itr % batches_per_epoch == 0:
          with torch.no_grad():
              train_acc = accuracy(model, train_eval_loader)
              val_acc = accuracy(model, test_loader)
              if val_acc > best_acc:
                  torch.save({'state_dict': model.state_dict(), 'args': args}, os.path.join(args.save, 'model.pth'))
                  best_acc = val_acc
              # logger.info(
              #     "Epoch {:04d} | Time {:.3f} ({:.3f}) | NFE-F {:.1f} | NFE-B {:.1f} | "
              #     "Train Acc {:.4f} | Test Acc {:.4f}".format(
              #         itr // batches_per_epoch, batch_time_meter.val, batch_time_meter.avg, f_nfe_meter.avg,
              #         b_nfe_meter.avg, train_acc, val_acc
              #     )
              print("Epoch {:04d} | Time {:.3f} ({:.3f}) | NFE-F {:.1f} | NFE-B {:.1f} | Train Acc {:.4f} | Test Acc {:.4f}".format(
                      itr // batches_per_epoch, batch_time_meter.val, batch_time_meter.avg, f_nfe_meter.avg,
                      b_nfe_meter.avg, train_acc, val_acc)
              )

This is to see some predictions

In [None]:
from torchvision.datasets import MNIST
dataset = MNIST(root='data/', download=True)
dataset = MNIST(root='data/', 
                train=True,
                transform=transforms.ToTensor())

In [None]:
img_tensor, label = dataset[190]
print(img_tensor.shape)
x=img_tensor
x=x.unsqueeze(0)
y=torch.tensor([label])

train_loader, test_loader, train_eval_loader = get_mnist_loaders(
    args.data_aug, args.batch_size, args.test_batch_size
)
data_gen = inf_generator(train_loader)

x, y = data_gen.__next__()
x = x.to(device)
y=y.to(device)
# print(x.shape)
# print(y.shape)
model=model.to(device)
logits = model(x)
predicted_class = np.argmax(logits.cpu().detach().numpy(), axis=1)
loss = criterion(logits, y)
loss.backward()
# print(model.feature_layers[0])
# for param_tensor in model.state_dict():
#     if "weight" in param_tensor:
#       # model.state_dict()[param_tensor]=model.state_dict()[param_tensor].prune(method='random', amount=8)
#       print(param_tensor, "\t", model.state_dict()[param_tensor].size())
# # print(y)
# print(predicted_class)

In [None]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
# print("Optimizer's state_dict:")
# for var_name in optimizer.state_dict():
#     print(var_name, "\t", optimizer.state_dict()[var_name])


In [None]:
import torch
import torch.nn as nn
input = torch.randn(20, 6)
print(input.size())
m = nn.GroupNorm(1, 6)
m(input).size()