In [1]:
! pip install git+https://github.com/firekind/athena

Collecting git+https://github.com/firekind/athena
  Cloning https://github.com/firekind/athena to c:\users\shyam\appdata\local\temp\pip-req-build-rm7na0lh
Building wheels for collected packages: athena
  Building wheel for athena (setup.py): started
  Building wheel for athena (setup.py): finished with status 'done'
  Created wheel for athena: filename=athena-0.0.1-py3-none-any.whl size=17745 sha256=fa522e2228befd5a7091e40c47cc44c23b4c1c8d5d88b02acbeff7b19fb37af8
  Stored in directory: C:\Users\shyam\AppData\Local\Temp\pip-ephem-wheel-cache-wmxwqraz\wheels\c2\36\ea\fe5a118d0035f6f760fc49471824263f1b0e611bcba3555bde
Successfully built athena


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchsummary import summary
from torchvision import transforms
import torchvision as tv

from athena import ClassificationSolver, Experiment, datasets
from athena.layers import GhostBatchNorm

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
batch_size = 128 if torch.cuda.is_available() else 64
# batch_size = 4
epochs = 50

In [4]:
class DepthwiseConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding=0):
        super(DepthwiseConv2d, self).__init__()

        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size, padding=padding, groups=in_channels)
        self.point = nn.Conv2d(in_channels, out_channels, 1)

    def forward(self, x):
        x = self.conv(x)
        return self.point(x)

In [5]:
class SirNet(nn.Module):
    def __init__(self):
        super(SirNet, self).__init__()
        # self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv1 = DepthwiseConv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Conv2d(6, 16, 5)
        self.conv2 = DepthwiseConv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [6]:
class Model(nn.Module):
    def __init__(self, in_channels = 3, dropout_value = 0.25):
        super(Model, self).__init__()

        self.block1 = nn.Sequential(
            DepthwiseConv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Dropout(dropout_value),
            
            DepthwiseConv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(dropout_value),

            DepthwiseConv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(dropout_value),
        )

        self.transition1 = nn.Sequential(
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 32, 1),
        )

        self.block2 = nn.Sequential(
            DepthwiseConv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(dropout_value),
            
            DepthwiseConv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(dropout_value),

            DepthwiseConv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Dropout(dropout_value),
        )

        self.transition2 = nn.Sequential(
            nn.MaxPool2d(2, 2),
            nn.Conv2d(256, 64, 1),
        )

        self.block3 = nn.Sequential(
            DepthwiseConv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(dropout_value),
            
            DepthwiseConv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Dropout(dropout_value),

            DepthwiseConv2d(256, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Dropout(dropout_value),
        )

        self.out_block = nn.Sequential(
            nn.AvgPool2d(7),
            nn.Conv2d(256, 64, 1),
            nn.Conv2d(64, 10, 1),
        )

    def forward(self, x):
        x = self.block1(x)
        x = self.transition1(x)
        x = self.block2(x)
        x = self.transition2(x)
        x = self.block3(x)
        x = self.out_block(x)

        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

In [7]:
summary(Model().to(device), input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 3, 32, 32]              30
            Conv2d-2           [-1, 32, 32, 32]             128
   DepthwiseConv2d-3           [-1, 32, 32, 32]               0
       BatchNorm2d-4           [-1, 32, 32, 32]              64
              ReLU-5           [-1, 32, 32, 32]               0
           Dropout-6           [-1, 32, 32, 32]               0
            Conv2d-7           [-1, 32, 32, 32]             320
            Conv2d-8           [-1, 64, 32, 32]           2,112
   DepthwiseConv2d-9           [-1, 64, 32, 32]               0
      BatchNorm2d-10           [-1, 64, 32, 32]             128
             ReLU-11           [-1, 64, 32, 32]               0
          Dropout-12           [-1, 64, 32, 32]               0
           Conv2d-13           [-1, 64, 32, 32]             640
           Conv2d-14          [-1, 128,

In [8]:
train_transform = transforms.Compose([
            transforms.Grayscale(3),
            transforms.ToTensor(),  # Converting to Tensor
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),  # Normalizing
])

train_loader = datasets.cifar10(download=True, batch_size=batch_size, use_default_transforms=True)
test_loader = datasets.cifar10(train=False, download=True, batch_size=batch_size, use_default_transforms=True)

Files already downloaded and verified
Files already downloaded and verified


In [9]:
# dataiter = iter(train_loader)
# img, labels = dataiter.next()
# print(img.shape)
# print(labels.shape)

In [10]:
exps = []
net = SirNet().to(device)
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
exp1 = Experiment(
    name="Sir's Model",
    model=net,
    solver_cls=ClassificationSolver,
    train_args=dict(
        epochs=epochs,
        train_loader=train_loader,
        test_loader=test_loader,
        optimizer=optimizer,
        device=device,
        loss_fn=F.cross_entropy
    )
)
exps.append(exp1)

In [11]:
model = Model().to(device)
# model = tv.models.vgg13(pretrained=False).to(device)
optimizer = optim.SGD(model.parameters(), lr=0.008, momentum=0.95)
exp2 = Experiment(
    name="My Model",
    model=model,
    solver_cls=ClassificationSolver,
    train_args=dict(
        epochs=epochs,
        train_loader=train_loader,
        test_loader=test_loader,
        optimizer=optimizer,
        device=device,
    )
)
exps.append(exp2)

In [12]:
# for e in exps:
#     e.run()
exp2.run()

[1m[92m=> Running experiment: My Model[0m
Epoch: 1 / 50
Test set: Average loss: 2.7876, Accuracy: 1754/10000 (17.54%)

Epoch: 2 / 50
Test set: Average loss: 3.1927, Accuracy: 1913/10000 (19.13%)

Epoch: 3 / 50
Test set: Average loss: 2.3994, Accuracy: 2845/10000 (28.45%)

Epoch: 4 / 50
Test set: Average loss: 2.4022, Accuracy: 3191/10000 (31.91%)

Epoch: 5 / 50
Test set: Average loss: 1.9437, Accuracy: 3875/10000 (38.75%)

Epoch: 6 / 50
Test set: Average loss: 1.7407, Accuracy: 4469/10000 (44.69%)

Epoch: 7 / 50
Test set: Average loss: 1.2082, Accuracy: 5755/10000 (57.55%)

Epoch: 8 / 50
Test set: Average loss: 1.0124, Accuracy: 6389/10000 (63.89%)

Epoch: 9 / 50
Test set: Average loss: 1.0071, Accuracy: 6426/10000 (64.26%)

Epoch: 10 / 50
Test set: Average loss: 0.9588, Accuracy: 6616/10000 (66.16%)

Epoch: 11 / 50
Test set: Average loss: 0.8384, Accuracy: 7026/10000 (70.26%)

Epoch: 12 / 50
Test set: Average loss: 0.8456, Accuracy: 7098/10000 (70.98%)

Epoch: 13 / 50
Test set: Ave