In [1]:
from __future__ import print_function
import argparse
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader
from torchsummary import summary
import matplotlib.pyplot as plt

In [2]:
print(torch.__version__)

1.10.0+cu111


### Download the dataset before.

In [3]:
import torchvision
import torch
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [4]:
evens = list(range(0, len(trainset), 2))
odds = list(range(1, len(trainset), 2))
trainset_1 = torch.utils.data.Subset(trainset, evens)
trainset_2 = torch.utils.data.Subset(trainset, odds)

trainloader_1 = torch.utils.data.DataLoader(trainset_1, batch_size=4,
                                            shuffle=True, num_workers=2)
trainloader_2 = torch.utils.data.DataLoader(trainset_2, batch_size=4,
                                            shuffle=True, num_workers=2)

In [5]:
quarter = list(range(0, len(trainset), 4))
trainset_quarter = torch.utils.data.Subset(trainset, quarter)

In [6]:
dataset1 = trainset
dataset2 = testset

### Prepare Simple or simple-ResNet Architecture

In [7]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

#### Version 1

In [8]:
## version 2
class ResidualBlock(nn.Module):
    """Following the structure of the one implemented in
    https://arxiv.org/pdf/1806.10909.pdf
    """
    def __init__(self, data_dim, hidden_dim):
        super(ResidualBlock, self).__init__()
        self.data_dim = data_dim
        self.hidden_dim = hidden_dim

        self.mlp = nn.Sequential(
            nn.Conv2d(data_dim, data_dim, kernel_size=3),
            nn.Linear(data_dim, hidden_dim),
            nn.ReLU(True),
            nn.BatchNorm2d(32),
            nn.Linear(hidden_dim, data_dim),
            nn.ReLU(True)
        )

    def forward(self, x):
        return x + self.mlp(x)


class ResNet(nn.Module):
    """ResNet which maps data_dim dimensional points to an output_dim
    dimensional output.
    """
# ResNet(data_dim = 28*28*1, hidden_dim = 128 , num_layers = 5, output_dim=10, is_img=True)

    def __init__(self, data_dim, hidden_dim, num_layers, output_dim=1,
                 is_img=False):
        super(ResNet, self).__init__()
        residual_blocks = \
            [ResidualBlock(data_dim, hidden_dim) for _ in range(num_layers)]
        self.residual_blocks = nn.Sequential(*residual_blocks)
        self.linear_layer = nn.Linear(data_dim, output_dim)
        self.num_layers = num_layers
        self.output_dim = output_dim
        self.is_img = is_img

    def forward(self, x, return_features=False):
        if self.is_img:
            # Flatten image, i.e. (batch_size, channels, height, width) to
            # (batch_size, channels * height * width)
            features = self.residual_blocks(x.view(x.size(0), -1))
        else:
            features = self.residual_blocks(x)
        pred = self.linear_layer(features)
        if return_features:
            return features, pred
        return pred

    def hidden_dim(self):
        return self.residual_blocks.hidden_dim

class MLPNet(nn.Module):
    """
    """
    def __init__(self, data_dim, hidden_dim):
        super(MLPNet, self).__init__()
        self.data_dim = data_dim
        self.hidden_dim = hidden_dim

        self.mlp = nn.Sequential(
            nn.Linear(data_dim, hidden_dim),
            nn.ReLU(True),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(True),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        return self.mlp(x)

#### Previous of our model's dimension 
doubl-check on our architecture and develop a feeling of how many parameters we are going to train

In [9]:
for data, target in dataset2:
  pass
x = data.shape

In [10]:
print(x)

torch.Size([3, 32, 32])


In [11]:
from torchsummary import summary
# model = ResNet(data_dim = 32*32*3, hidden_dim = 128 , num_layers = 5, output_dim=10, is_img=True)
model = Net()
if torch.cuda.is_available():
    model.cuda()
summary(model,(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             456
         MaxPool2d-2            [-1, 6, 14, 14]               0
            Conv2d-3           [-1, 16, 10, 10]           2,416
         MaxPool2d-4             [-1, 16, 5, 5]               0
            Linear-5                  [-1, 120]          48,120
            Linear-6                   [-1, 84]          10,164
            Linear-7                   [-1, 10]             850
Total params: 62,006
Trainable params: 62,006
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.06
Params size (MB): 0.24
Estimated Total Size (MB): 0.31
----------------------------------------------------------------


#### Define train & test methods


In [12]:
def test(trainer, model, device, test_loader, epoch):
    model.eval()

    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    # print('\nTest set: Epoch: {} Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    #     epoch, test_loss, correct, len(test_loader.dataset),
    #     (100. * round(correct / len(test_loader.dataset), 6)))
    
    trainer.accuracy_list.append(100. * round(correct / len(test_loader.dataset), 6))
    trainer.loss_list.append(test_loss)
    

In [13]:
import json
import torch.nn as nn
from numpy import mean


class Trainer():
    """Class used to train ODENets, ConvODENets and ResNets.
    Parameters
    ----------
    model : one of models.ODENet, conv_models.ConvODENet, discrete_models.ResNet
    optimizer : torch.optim.Optimizer instance
    device : torch.device
    classification : bool
        If True, trains a classification model with cross entropy loss,
        otherwise trains a regression model with Huber loss.
    print_freq : int
        Frequency with which to print information (loss, nfes etc).
    record_freq : int
        Frequency with which to record information (loss, nfes etc).
    verbose : bool
        If True prints information (loss, nfes etc) during training.
    save_dir : None or tuple of string and string
        If not None, saves losses and nfes (for ode models) to directory
        specified by the first string with id specified by the second string.
        This is useful for training models when underflow in the time step or
        excessively large NFEs may occur.
    """
    def __init__(self, model, optimizer, device, classification=False,
                 print_freq=2000, record_freq=2000, verbose=False, save_dir=None):
        self.model = model
        self.optimizer = optimizer
        self.classification = classification
        self.device = device
        if self.classification:
            self.loss_func = nn.CrossEntropyLoss()
        else:
            self.loss_func = nn.SmoothL1Loss()
        self.print_freq = print_freq
        self.record_freq = record_freq
        self.steps = 0
        self.save_dir = save_dir
        self.verbose = verbose

        self.histories = {'loss_history': [], 'nfe_history': [],
                          'bnfe_history': [], 'total_nfe_history': [],
                          'epoch_loss_history': [], 'epoch_nfe_history': [],
                          'epoch_bnfe_history': [], 'epoch_total_nfe_history': []}
        self.buffer = {'loss': [], 'nfe': [], 'bnfe': [], 'total_nfe': []}

        # Only resnets have a number of layers attribute
        self.is_resnet = hasattr(self.model, 'num_layers')

        # Lists for visualization of loss and accuracy 
        self.loss_list = []
        self.iteration_list = []
        self.accuracy_list = []
        self.avg_loss_list = []


        # Lists for knowing classwise accuracy
        self.predictions_list = []
        self.labels_list = []


    def train(self, data_loader, num_epochs, device, test_loader):
        """Trains model on data in data_loader for num_epochs.
        Parameters
        ----------
        data_loader : torch.utils.data.DataLoader
        num_epochs : int
        """
        for epoch in range(num_epochs):
            avg_loss = self._train_epoch(data_loader)

            self.iteration_list.append(epoch)
            self.avg_loss_list.append(round(avg_loss,6))

            if self.verbose:
                print("Epoch {}: {:.3f}".format(epoch + 1, avg_loss))
                
            test(self, self.model, self.device, test_loader, epoch)

            if (epoch+1 == num_epochs):
              print('\nTest set: Epoch: {} Average Test loss: {:.4f}, Accuracy: ({:.4f}%)\n'.format(
                epoch+1, self.loss_list[-1], self.accuracy_list[-1]))
        
        plt.plot(self.iteration_list, self.loss_list)
        plt.xlabel("No. of Iteration")
        plt.ylabel("Loss")
        plt.title("Iterations vs Test Loss")
        plt.show()

        plt.plot(self.iteration_list, self.accuracy_list)
        plt.xlabel("No. of Iteration")
        plt.ylabel("Accuracy")
        plt.title("Iterations vs Test Accuracy")
        plt.show()

        plt.plot(self.iteration_list, self.avg_loss_list)
        plt.xlabel("No. of Iteration")
        plt.ylabel("Accuracy")
        plt.title("Iterations vs Traning avg_loss")
        plt.show()

    def _train_epoch(self, data_loader):
        """Trains model for an epoch.
        Parameters
        ----------
        data_loader : torch.utils.data.DataLoader
        """
        epoch_loss = 0.
        epoch_nfes = 0
        epoch_backward_nfes = 0
        for i, (x_batch, y_batch) in enumerate(data_loader):
            self.optimizer.zero_grad()

            x_batch = x_batch.to(self.device)
            y_batch = y_batch.to(self.device)

            y_pred = self.model(x_batch)

            # ResNets do not have an NFE attribute
            if not self.is_resnet:
                iteration_nfes = self._get_and_reset_nfes()
                epoch_nfes += iteration_nfes

            loss = self.loss_func(y_pred, y_batch)
            loss.backward()
            self.optimizer.step()
            epoch_loss += loss.item()

            if not self.is_resnet:
                iteration_backward_nfes = self._get_and_reset_nfes()
                epoch_backward_nfes += iteration_backward_nfes

            self.steps += 1

        # Record epoch mean information
        return epoch_loss / len(data_loader)

    def _get_and_reset_nfes(self):
        """Returns and resets the number of function evaluations for model."""
        if hasattr(self.model, 'odeblock'):  # If we are using ODENet
            iteration_nfes = self.model.odeblock.odefunc.nfe
            # Set nfe count to 0 before backward pass, so we can
            # also measure backwards nfes
            self.model.odeblock.odefunc.nfe = 0
        else:  # If we are using ODEBlock
            iteration_nfes = self.model.odefunc.nfe
            self.model.odefunc.nfe = 0
        return iteration_nfes

#### previous version

### Declare simulated Args Class [contain learning rate and momentum coeff]

In [14]:
class SimulatedArgs():
    def __init__(self, epochs, batch_size, optimizer, dry_run, lr_AdaDelta=1, lr_SGD=0.1, momentum=0.9, lr_Adam = 0.001):
        # Training settings
        self.epochs = epochs

        self.batch_size = batch_size  # or 128 [later]
        self.test_batch_size = 1000
        self.no_cuda = False
        
        self.optimizer = optimizer
          # for AdaDelta optimizer
        if (self.optimizer == "AdaDelta"):
          self.lr = lr_AdaDelta
        elif (self.optimizer == "SGD"):
          # for SGD optimizer
          self.lr_SGD = lr_SGD
          self.momentum_SGD = 0.9
        elif (self.optimizer == "Adam"):
          self.lr_Adam = lr_Adam

        self.gamma = 0.7
        self.seed = 1

        self.log_interval = 10
        self.dry_run = dry_run
        self.save_model = False

        # # Lists for visualization of loss and accuracy 
        # self.loss_list = []
        # self.iteration_list = []
        # self.accuracy_list = []

        # # Lists for knowing classwise accuracy
        # self.predictions_list = []
        # self.labels_list = []

In [19]:
def main(args, dataset1, dataset2, hidden_dim=128):
  use_cuda = not args.no_cuda and torch.cuda.is_available()

  torch.manual_seed(args.seed)

  device = torch.device("cuda" if use_cuda else "cpu")

  train_kwargs = {'batch_size': args.batch_size}
  test_kwargs = {'batch_size': args.test_batch_size}
  train_kwargs = {'num_workers': 2}
  test_kwargs = {'num_workers': 2}
  if use_cuda:
      cuda_kwargs = {'num_workers': 1,
                      'pin_memory': True,
                      'shuffle': True}
      train_kwargs.update(cuda_kwargs)
      test_kwargs.update(cuda_kwargs)


  train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
  test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

  # model = ResNet(data_dim = 28*28*1, hidden_dim = hidden_dim , num_layers = 5, output_dim=10, is_img=True)
  model = Net()
  model = torch.nn.DataParallel(model(), device_ids=[0])
  # model.to(device)

  ## ---------------------------- IMPORTANT ----------------------------------
  # choose correct optimizer
  print('\nTraining SetUp: batch size: {}, optimizer: {} \n'.format(args.batch_size, args.optimizer))
  if (args.optimizer == "AdaDelta"):
    print('Inner Parameters: lr: {} \n'.format(args.lr))
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

  elif (args.optimizer == "SGD"):
    print('Inner Parameters: lr_SGD: {}, momentum_SGD: {} \n'.format(args.lr_SGD, args.momentum_SGD))
    optimizer = optim.SGD(model.parameters(), lr=args.lr_SGD, momentum=args.momentum_SGD)
  elif (args.optimizer == "Adam"):
    print('Inner Parameters: lr_Adam: {} \n'.format(args.lr_Adam))
    optimizer = optim.Adam(model.parameters(), lr=args.lr_Adam)

  # set linear rate updater
  # scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)

  trainer = Trainer(model, optimizer, device, classification=True)

  ## ---------------------------- IMPORTANT ----------------------------------
  trainer.train(train_loader, args.epochs, device, test_loader)



### Start Testing!!!

#### Choose the inner hidden dimension as 128 [Standard Model]

##### Figure out the approximate size (num of parameter) of the Toy example from Pytorch offical github (some CNN)

Then adjust the hidden dim of our ResNet to have similar trainable parameter


In [20]:
# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = nn.Conv2d(1, 32, 3, 1)
#         self.conv2 = nn.Conv2d(32, 64, 3, 1)
#         self.dropout1 = nn.Dropout(0.25)
#         self.dropout2 = nn.Dropout(0.5)
#         self.fc1 = nn.Linear(9216, 128)
#         self.fc2 = nn.Linear(128, 10)

#     def forward(self, x):
#         x = self.conv1(x)
#         x = F.relu(x)
#         x = self.conv2(x)
#         x = F.relu(x)
#         x = F.max_pool2d(x, 2)
#         x = self.dropout1(x)
#         x = torch.flatten(x, 1)
#         x = self.fc1(x)
#         x = F.relu(x)
#         x = self.dropout2(x)
#         x = self.fc2(x)
#         output = F.log_softmax(x, dim=1)
#         return output


# for data, target in dataset2:
#   pass
# # x = torch.randn(1,1,32,32)
# x = data.shape
# print(x)
# model = Net()
# # model = ResNet(data_dim = 28*28*1, hidden_dim = 128 , num_layers = 5, output_dim=10, is_img=True)
# if torch.cuda.is_available():
#     model.cuda()
# summary(model,(1,28,28))

# model = ResNet(data_dim = 28*28*1, hidden_dim = 128 , num_layers = 5, output_dim=10, is_img=True)
# if torch.cuda.is_available():
#     model.cuda()
# summary(model,(1,28,28))

### Start our testing with hidden_dim = 128

#### SGD -- (batch_size = 128)


In [21]:
args = SimulatedArgs(epochs=50, batch_size=128, optimizer = "Adam", dry_run = False)
main(args, dataset1, dataset2)

TypeError: ignored

In [None]:
# [1] Training SetUp: batch size: 128, optimizer: SGD 
args = SimulatedArgs(epochs=50, batch_size=128, optimizer = "Adam", dry_run = False)
main(args, trainset_1, dataset2)

In [None]:
args = SimulatedArgs(epochs=50, batch_size=128, optimizer = "Adam", dry_run = False)
main(args, trainset_2, dataset2)

In [None]:
args = SimulatedArgs(epochs=50, batch_size=128, optimizer = "Adam", dry_run = False)
main(args, trainset_quarter, dataset2)