# MNIST trained with MobileNet v2

### Links
- how to convert MNIST from 1 channel to 3 channels
  - https://discuss.pytorch.org/t/best-way-to-deal-with-1-channel-images/26699
  - Lambda transform: 
- some person's code - https://github.com/marvis/pytorch-mobilenet/blob/master/main.py
- another person's code - https://github.com/tonylins/pytorch-mobilenet-v2/blob/master/MobileNetV2.py


In [1]:
import sys
sys.path.append("./pytorch-mobilenet-v2/")
from MobileNetV2 import MobileNetV2

In [2]:
import argparse

# from kindling.nan_police import torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [3]:
print(torch.__version__)
print(torch.__file__)

1.0.1.post2
/Users/william/.local/share/virtualenvs/william-oBc2a6gD/lib/python3.7/site-packages/torch/__init__.py


In [4]:
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.0001, metavar='LR',
                    help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                    help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                    help='how many batches to wait before logging training status')
parser.add_argument('--save-model', action='store_true', default=False,
                    help='For Saving the current Model')


_StoreTrueAction(option_strings=['--save-model'], dest='save_model', nargs=0, const=True, default=False, type=None, choices=None, help='For Saving the current Model', metavar=None)

In [5]:
args = parser.parse_args("")
args

Namespace(batch_size=64, epochs=10, log_interval=10, lr=0.0001, momentum=0.5, no_cuda=False, save_model=False, seed=1, test_batch_size=1000)

In [6]:
use_cuda = not args.no_cuda and torch.cuda.is_available()
use_cuda

False

In [7]:
torch.manual_seed(args.seed)

<torch._C.Generator at 0x111e9f290>

In [8]:
device = torch.device("cuda" if use_cuda else "cpu")
device

device(type='cpu')

In [9]:
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
kwargs

{}

In [19]:
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.Resize((32, 32), interpolation=2),
                       transforms.Lambda(lambda image: image.convert('RGB')),
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)


In [20]:
model = MobileNetV2(n_class=10, input_size=32, width_mult=1.).to(device)
model

MobileNetV2(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace)
        (3): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace)
        (3): Conv2d(96

In [21]:
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.0001
    momentum: 0.5
    nesterov: False
    weight_decay: 0
)

In [22]:
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
#         print(data.shape, model)
        data, target = data.to(device), target.to(device)  # to do: needed?
        optimizer.zero_grad()  # what does this do? zero-out any previous gradient info?
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


In [None]:
for epoch in range(100000):
    train(args, model, device, train_loader, optimizer, epoch=epoch)







In [None]:
torch.optim.Optimizer

In [None]:
type(optimizer)

In [None]:
isinstance(optimizer, torch.optim.SGD)

In [None]:
issubclass(type(optimizer), torch.optim.Optimizer)