<a href="https://colab.research.google.com/github/bipinKrishnan/torchkeras/blob/master/functional_api_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pkbar

In [91]:
import torch
from torch import nn
from torch import optim
from torch.autograd import Variable
from torchsummary import summary as summary_
import pkbar

import warnings
warnings.filterwarnings('ignore')

# TorchKeras

In [92]:
def Input(shape):
  Input.shape = shape
  return Input.shape

def get_conv_output(shape, inputs):
  bs = 1
  data = Variable(torch.rand(bs, *shape))
  output_feat = inputs(data)

  return output_feat.size(1)

def same_pad(h_in, kernal, stride, dilation):
  return (stride*(h_in-1)-h_in+(dilation*(kernal-1))+1) / 2.0

In [93]:
class Dense(nn.Module):
  def __init__(self, outputs, activation):
    super().__init__()
    self.outputs = outputs
    self.activation = activation

  def __call__(self, inputs):
    self.inputs_size = 1
    
    if type(inputs) == tuple:
      for i in range(len(inputs)):
        self.inputs_size *= inputs[i]
      
      self.layers = nn.Sequential(
        nn.Linear(self.inputs_size, self.outputs),
        self.activation
    )

      return self.layers

    elif isinstance(inputs[-2], nn.Linear):
      self.inputs = inputs
      self.layers = list(self.inputs)
      self.layers.extend([nn.Linear(self.layers[-2].out_features, self.outputs), self.activation])

      self.layers = nn.Sequential(*self.layers)

      return self.layers

    else:
      self.inputs = inputs
      self.layers = list(self.inputs)
      self.layers.extend([nn.Linear(get_conv_output(Input.shape, self.inputs), self.outputs), self.activation])

      self.layers = nn.Sequential(*self.layers)

      return self.layers


class FlattenedLayer(nn.Module):
  def __init__(self):
    super().__init__()
    pass

  def forward(self, input):
      self.inputs = input.view(input.size(0), -1)
      return self.inputs


class Flatten():
  def __init__(self):
    pass

  def __call__(self, inputs):
    self.inputs = inputs
    self.layers = list(self.inputs)
    self.layers.extend([FlattenedLayer()])
    self.layers = nn.Sequential(*self.layers)

    return self.layers

In [94]:
class Conv2d(nn.Module):
  def __init__(self, filters, kernel_size, strides, padding, dilation, activation):
    super().__init__()
    self.filters = filters
    self.kernel = kernel_size
    self.strides = strides
    self.padding = padding
    self.dilation = dilation
    self.activation = activation

  def __call__(self, inputs):

    if type(inputs) == tuple:
      self.inputs_size = inputs

      if self.padding == 'same':
        self.padding = int(same_pad(self.inputs_size[-2], self.kernel, self.strides, self.dilation))
      else:
        self.padding = self.padding

      self.layers = nn.Sequential(
        nn.Conv2d(self.inputs_size[-3],
                  self.filters, 
                  self.kernel, 
                  self.strides, 
                  self.padding,
                  self.dilation),
        self.activation
    )

      return self.layers

    else:
      if self.padding == 'same':
        self.padding = int(same_pad(get_conv_output(Input.shape, inputs), self.kernel, self.strides, self.dilation))
      else:
        self.padding = self.padding

      self.inputs = inputs
      self.layers = list(self.inputs)
      self.layers.extend(
             [nn.Conv2d(self.layers[-2].out_channels, 
                    self.filters, 
                    self.kernel, 
                    self.strides, 
                    self.padding,
                    self.dilation),
             self.activation]
          )
      self.layers = nn.Sequential(*self.layers)

      return self.layers


In [95]:
class Model():
  def __init__(self, inputs, outputs, device):
    self.input_size = inputs
    self.device = device
    self.model = outputs.to(self.device)

  def parameters(self):
    return self.model.parameters()

  def compile(self, optimizer, loss):
    self.opt = optimizer
    self.criterion = loss

  def summary(self):
    summary_(self.model, self.input_size, device=self.device)
    print("Device Type:", self.device)

  def fit(self, data_x, data_y, epochs):
    self.model.train()

    for epoch in range(epochs):
      print("Epoch {}/{}".format(epoch+1, epochs))
      progress = pkbar.Kbar(target=len(data_x), width=25)
      
      for i, (data, target) in enumerate(zip(data_x, data_y)):
        self.opt.zero_grad()

        train_out = self.model(data.to(self.device))
        loss = self.criterion(train_out, target.to(self.device))
        loss.backward()

        self.opt.step()

        progress.update(i, values=[("loss: ", loss.item())])

      progress.add(1)

  def evaluate(self, test_x, test_y):
    self.model.eval()
    correct, loss = 0.0, 0.0

    progress = pkbar.Kbar(target=len(test_x), width=25)

    for i, (data, target) in enumerate(zip(test_x, test_y)):
      out = self.model(data.to(self.device))
      loss += self.criterion(out, target.to(self.device))

      correct += ((torch.max(out, 1)[1]) == target.to(self.device)).sum()

      progress.update(i, values=[("loss", loss.item()/len(test_x)), ("acc", (correct/len(test_x)).item())])
    progress.add(1)


  def fit_generator(self, generator, epochs):
    self.model.train()

    for epoch in range(epochs):
      print("Epoch {}/{}".format(epoch+1, epochs))
      progress = pkbar.Kbar(target=len(generator), width=25)

      for i, (data, target) in enumerate(generator):
        self.opt.zero_grad()

        train_out = self.model(data.to(self.device))
        loss = self.criterion(train_out.squeeze(), target.to(self.device))
        loss.backward()

        self.opt.step()

        progress.update(i, values=[("loss: ", loss.item())])

      progress.add(1)
      

  def evaluate_generator(self, generator):
    self.model.eval()
    correct, loss = 0.0, 0.0

    progress = pkbar.Kbar(target=len(generator), width=25)

    for i, (data, target) in enumerate(generator):
      out = self.model(data.to(self.device))
      loss += self.criterion(out.squeeze(), target.to(self.device))

      correct += (torch.max(out.squeeze(), 1)[1] == target.to(self.device)).sum()

      progress.update(i, values=[("test_acc", (correct/len(generator)).item()), ("test_loss", loss.item()/len(generator))])

    progress.add(1)

  def predict_generator(self, generator):
    self.model.train()
    out = []
    for i, (data, labels) in enumerate(generator):
      out.append(self.model(data.to(self.device)))

    return out
      

# Test data

In [96]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

inputs = Input((1, 1024))
x = Dense(54, nn.ReLU())(inputs)
y = Dense(34, nn.ReLU())(x)
z = Dense(43, activation=nn.ReLU())(y)
a = Dense(10, activation=nn.ReLU())(z)
b = Dense(1, activation=nn.Sigmoid())(a)

In [97]:
model = Model(inputs, b, 'cpu')
model.compile(optim.Adam(model.parameters(), lr=0.001), nn.BCELoss())

In [98]:
model.summary()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 54]          55,350
              ReLU-2                [-1, 1, 54]               0
            Linear-3                [-1, 1, 34]           1,870
              ReLU-4                [-1, 1, 34]               0
            Linear-5                [-1, 1, 43]           1,505
              ReLU-6                [-1, 1, 43]               0
            Linear-7                [-1, 1, 10]             440
              ReLU-8                [-1, 1, 10]               0
            Linear-9                 [-1, 1, 1]              11
          Sigmoid-10                 [-1, 1, 1]               0
Total params: 59,176
Trainable params: 59,176
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.23
Estimated Tot

In [99]:
torch.manual_seed(42)

#input should be of the dimension - (batch_size, 1, n_rows*n_columns)
x = torch.rand((10, 1, 1024), dtype=torch.float)
y = torch.tensor((torch.rand(10, 1) < 0.5), dtype=torch.float)
#y = torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], dtype=torch.int64).reshape(-1, 1)

In [100]:
model.fit(x, y, 4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [101]:
model.evaluate(x, y)



# CIFAR100 -Dense layer

In [102]:
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

import numpy as np
import matplotlib.pyplot as plt

In [None]:
bs = 128

transform = transforms.Compose([transforms.Grayscale(num_output_channels=1),
                                transforms.Resize((1, 28*28)),
                                transforms.ToTensor()
])

trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

In [104]:
#images for feeding fit_generator dense network 
#must be resized to (1, 1, height*width*channel)
trainset[0][0].shape

torch.Size([1, 1, 784])

In [105]:
trainloader = DataLoader(trainset, batch_size=bs, shuffle=True)
testloader = DataLoader(testset, batch_size=bs)

In [106]:
input = Input((1, 784))
x = Dense(1000, nn.ReLU())(input)
y = Dense(500, nn.ReLU())(x)
z = Dense(700, activation=nn.ReLU())(y)
a = Dense(200, activation=nn.ReLU())(z)
b = Dense(100, activation=nn.ReLU())(a)

model = Model(input, b, device)
model.compile(optim.Adam(model.parameters(), lr=0.001), nn.CrossEntropyLoss())
model.summary()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1              [-1, 1, 1000]         785,000
              ReLU-2              [-1, 1, 1000]               0
            Linear-3               [-1, 1, 500]         500,500
              ReLU-4               [-1, 1, 500]               0
            Linear-5               [-1, 1, 700]         350,700
              ReLU-6               [-1, 1, 700]               0
            Linear-7               [-1, 1, 200]         140,200
              ReLU-8               [-1, 1, 200]               0
            Linear-9               [-1, 1, 100]          20,100
             ReLU-10               [-1, 1, 100]               0
Total params: 1,796,500
Trainable params: 1,796,500
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.04
Params size (MB): 6.85
Estimat

In [107]:
model.fit_generator(trainloader, 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [108]:
model.evaluate_generator(testloader)



In [109]:
out = model.predict_generator(testloader)

In [None]:
torch.max(out[0][0], 1)[1]

# CIFAR100 -Conv2d

In [None]:
bs = 128

transform = transforms.ToTensor()

trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

In [112]:
trainset[0][0].shape

torch.Size([3, 32, 32])

In [113]:
trainloader = DataLoader(trainset, batch_size=bs, shuffle=True)
testloader = DataLoader(testset, batch_size=bs)

In [114]:
input = Input((3, 32, 32))
x = Conv2d(3, 3, 1, 'same', 1, nn.ReLU())(input)
y = Conv2d(5, 3, 1, 0, 1, nn.ReLU())(x)
z = Conv2d(6, 3, 1, 'same', 1, nn.ReLU())(y)
a = Flatten()(z)
b = Dense(100, activation=nn.ReLU())(a)

model = Model(input, b, device)
model.compile(optim.Adam(model.parameters(), lr=0.001), nn.CrossEntropyLoss())
model.summary()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 3, 32, 32]              84
              ReLU-2            [-1, 3, 32, 32]               0
            Conv2d-3            [-1, 5, 30, 30]             140
              ReLU-4            [-1, 5, 30, 30]               0
            Conv2d-5            [-1, 6, 30, 30]             276
              ReLU-6            [-1, 6, 30, 30]               0
    FlattenedLayer-7                 [-1, 5400]               0
            Linear-8                  [-1, 100]         540,100
              ReLU-9                  [-1, 100]               0
Total params: 540,600
Trainable params: 540,600
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.24
Params size (MB): 2.06
Estimated Total Size (MB): 2.31
-------------------------------------------

In [115]:
model.fit_generator(trainloader, 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [116]:
model.evaluate_generator(testloader)



In [117]:
out = model.predict_generator(testloader)
torch.max(out[0], 1)[1]

tensor([61, 61,  0,  0,  0,  0,  0, 90, 61, 40,  0, 86, 90, 61, 40, 40,  0, 86,
        61,  0, 61, 61, 90, 90,  0,  0, 61, 61,  0, 86, 24, 61, 86,  0,  0,  0,
        86, 90, 86, 90,  0, 24, 61, 61, 86,  0,  0, 86, 90, 90, 40, 40, 86, 86,
         0,  0,  0, 90,  0, 61, 86,  0,  0,  0,  0,  0, 40, 24,  0,  0, 61, 90,
        61,  0,  0, 90,  0,  0,  0, 61, 90, 61, 40,  0,  0, 90, 61, 90, 61,  0,
        90,  9, 90,  0, 90,  0, 86, 86, 90, 86,  0, 90, 90, 86, 90, 61,  0, 90,
        24, 90, 61, 61,  0,  0,  9, 90, 61, 86,  0, 40, 90,  0,  0,  0,  0,  0,
        90, 90], device='cuda:0')

# CIFAR10- Dense layer

In [None]:
bs = 64

transform = transforms.Compose([transforms.Grayscale(num_output_channels=1),
                                transforms.Resize((1, 28*28)),
                                transforms.ToTensor()
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

In [119]:
trainset[0][0].shape

torch.Size([1, 1, 784])

In [120]:
trainloader = DataLoader(trainset, batch_size=bs, shuffle=True)
testloader = DataLoader(testset, batch_size=bs)

In [121]:
input = Input(shape=(1, 784))
x = Dense(1024, nn.ReLU())(input)
y = Dense(800, nn.ReLU())(x)
z = Dense(300, activation=nn.ReLU())(y)
a = Dense(200, activation=nn.ReLU())(z)
b = Dense(10, activation=nn.ReLU())(a)

model = Model(input, b, 'cuda')
model.compile(optim.Adam(model.parameters(), lr=0.001), nn.CrossEntropyLoss())
model.summary()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1              [-1, 1, 1024]         803,840
              ReLU-2              [-1, 1, 1024]               0
            Linear-3               [-1, 1, 800]         820,000
              ReLU-4               [-1, 1, 800]               0
            Linear-5               [-1, 1, 300]         240,300
              ReLU-6               [-1, 1, 300]               0
            Linear-7               [-1, 1, 200]          60,200
              ReLU-8               [-1, 1, 200]               0
            Linear-9                [-1, 1, 10]           2,010
             ReLU-10                [-1, 1, 10]               0
Total params: 1,926,350
Trainable params: 1,926,350
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.04
Params size (MB): 7.35
Estimat

In [122]:
model.fit_generator(trainloader, 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [123]:
model.evaluate_generator(testloader)



In [None]:
out = model.predict_generator(testloader)
torch.max(out[0][0], 1)[1]

# CIFAR10 -Conv2d

In [None]:
bs = 64

transform = transforms.ToTensor()

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

In [126]:
trainset[0][0].shape

torch.Size([3, 32, 32])

In [127]:
trainloader = DataLoader(trainset, batch_size=bs, shuffle=True)
testloader = DataLoader(testset, batch_size=bs)

In [128]:
input = Input(shape=(3, 32, 32))
x = Conv2d(7, 3, 1, 'same', 1, nn.ReLU())(input)
y = Conv2d(10, 3, 1, 'same', 1, nn.ReLU())(x)
z = Conv2d(6, 3, 1, 'same', 1, nn.ReLU())(y)
a = Flatten()(z)
b = Dense(10, activation=nn.ReLU())(a)

model = Model(input, b, 'cuda')
model.compile(optim.Adam(model.parameters(), lr=0.001), nn.CrossEntropyLoss())
model.summary()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 7, 32, 32]             196
              ReLU-2            [-1, 7, 32, 32]               0
            Conv2d-3           [-1, 10, 32, 32]             640
              ReLU-4           [-1, 10, 32, 32]               0
            Conv2d-5            [-1, 6, 32, 32]             546
              ReLU-6            [-1, 6, 32, 32]               0
    FlattenedLayer-7                 [-1, 6144]               0
            Linear-8                   [-1, 10]          61,450
              ReLU-9                   [-1, 10]               0
Total params: 62,832
Trainable params: 62,832
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.41
Params size (MB): 0.24
Estimated Total Size (MB): 0.66
---------------------------------------------

In [129]:
model.fit_generator(trainloader, 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [130]:
model.evaluate_generator(testloader)



In [131]:
out = model.predict_generator(testloader)
torch.max(out[0], 1)[1]

tensor([3, 0, 0, 0, 4, 0, 0, 3, 4, 0, 0, 0, 4, 0, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0,
        4, 4, 4, 0, 0, 4, 3, 4, 4, 3, 0, 0, 4, 0, 0, 3, 0, 3, 3, 4, 0, 0, 3, 0,
        4, 4, 0, 0, 3, 3, 0, 0, 0, 3, 3, 3, 4, 3, 3, 0], device='cuda:0')

# MNIST- Dense layer

In [132]:
bs = 32

transform = transforms.Compose([
                                transforms.Resize((1, 28*28)),
                                transforms.ToTensor()
])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

In [133]:
trainset[0][0].shape

torch.Size([1, 1, 784])

In [134]:
trainloader = DataLoader(trainset, batch_size=bs, shuffle=True)
testloader = DataLoader(testset, batch_size=bs)

In [135]:
input = Input(shape=(1, 784))
x = Dense(700, nn.ReLU())(input)
y = Dense(200, nn.ReLU())(x)
z = Dense(300, activation=nn.ReLU())(y)
a = Dense(100, activation=nn.ReLU())(z)
b = Dense(10, activation=nn.ReLU())(a)

model = Model(input, b, 'cuda')
model.compile(optim.Adam(model.parameters(), lr=0.001), nn.CrossEntropyLoss())
model.summary()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 700]         549,500
              ReLU-2               [-1, 1, 700]               0
            Linear-3               [-1, 1, 200]         140,200
              ReLU-4               [-1, 1, 200]               0
            Linear-5               [-1, 1, 300]          60,300
              ReLU-6               [-1, 1, 300]               0
            Linear-7               [-1, 1, 100]          30,100
              ReLU-8               [-1, 1, 100]               0
            Linear-9                [-1, 1, 10]           1,010
             ReLU-10                [-1, 1, 10]               0
Total params: 781,110
Trainable params: 781,110
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.02
Params size (MB): 2.98
Estimated T

In [136]:
model.fit_generator(trainloader, 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [137]:
model.evaluate_generator(testloader)



In [None]:
out = model.predict_generator(testloader)
torch.max(out[0][0], 1)[1]

# MNIST -Conv2d

In [152]:
bs = 32

transform = transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize(0.5, 1)
                                ])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

In [153]:
#images for feeding fit_generator dense network 
#must be of the form (1, 1, height*width*channel)
trainset[0][0].shape

torch.Size([1, 28, 28])

In [154]:
trainloader = DataLoader(trainset, batch_size=bs, shuffle=True)
testloader = DataLoader(testset, batch_size=bs)

In [178]:
input = Input(shape=(1, 28, 28))
x = Conv2d(25, 3, 1, 3, 1, nn.ReLU())(input)
y = Conv2d(50, 3, 1, 'same',1 , nn.ReLU())(x)
y = Conv2d(20, 3, 1, 'same', 1, nn.ReLU())(y)
z = Flatten()(y)
a = Dense(100, activation=nn.ReLU())(z)
b = Dense(10, activation=nn.ReLU())(a)

model = Model(input, b, 'cuda')
model.compile(optim.Adam(model.parameters(), lr=0.0001), nn.CrossEntropyLoss())
model.summary()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 25, 32, 32]             250
              ReLU-2           [-1, 25, 32, 32]               0
            Conv2d-3           [-1, 50, 32, 32]          11,300
              ReLU-4           [-1, 50, 32, 32]               0
            Conv2d-5           [-1, 20, 32, 32]           9,020
              ReLU-6           [-1, 20, 32, 32]               0
    FlattenedLayer-7                [-1, 20480]               0
            Linear-8                  [-1, 100]       2,048,100
              ReLU-9                  [-1, 100]               0
           Linear-10                   [-1, 10]           1,010
             ReLU-11                   [-1, 10]               0
Total params: 2,069,680
Trainable params: 2,069,680
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forw

In [179]:
model.fit_generator(trainloader, 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [180]:
model.evaluate_generator(testloader)



In [181]:
x = model.predict_generator(testloader)

In [182]:
torch.max(x[0], 1)[1]

tensor([0, 2, 1, 0, 4, 1, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 4, 0, 0, 0, 0,
        4, 0, 0, 4, 0, 1, 3, 1], device='cuda:0')

In [184]:
for i in range(32):
  print(testloader.dataset[i][1])

7
2
1
0
4
1
4
9
5
9
0
6
9
0
1
5
9
7
3
4
9
6
6
5
4
0
7
4
0
1
3
1
