<a href="https://colab.research.google.com/github/XinyiYS/FairAndPrivateFederatedLearning/blob/master/pytorch_MLP_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchsummary import summary
from torch.utils import data

import numpy as np

In [0]:
import os
root = './mnist'
if not os.path.exists(root):
    os.mkdir(root)

trans = transforms.Compose([transforms.Pad((2,2,2,2)), transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])
# if not exist, download mnist dataset
train_set = datasets.MNIST(root=root, train=True, transform=trans, download=True)
test_set = datasets.MNIST(root=root, train=False, transform=trans, download=True)
training_generator = data.DataLoader(train_set, **params)
validation_generator = data.DataLoader(test_set, **params)

In [0]:
def test(model, device, test_loader, verbose=True):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data.float())
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(1, keepdim=True) # get the index of the max log-probability 
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    if verbose:
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))
    test_acc = 1.* correct / len(test_loader.dataset)
    return test_acc

def train(model, device, data_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(data_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

In [0]:
# Parameters
params = {'batch_size': 10, 
          'shuffle': True,
          'num_workers': 6}
max_epochs = 5
lr = 1e-2

device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device('cuda')

In [9]:
# MLP = nn.Sequential(
#     nn.Linear(1024, 128),
#     nn.ReLU(),
#     nn.Linear(128, 64),
#     nn.ReLU(),
#     nn.Linear(64, 10),
#     nn.LogSoftmax()).to(device)

class MLP_Net(nn.Module):
    def __init__(self):
        super(MLP_Net, self).__init__()        
        self.fc1 = nn.Linear(1024, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1,  1024)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)        
        return F.log_softmax(x, dim=1)

MLP = MLP_Net().to(device)
optimizer = optim.SGD(MLP.parameters(), lr=lr) # TODO momentum is not supported at the moment
summary(MLP, (1, 32, 32))

train(MLP, device, training_generator, optimizer, max_epochs)
test(MLP, device, validation_generator)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 128]         131,200
            Linear-2                   [-1, 64]           8,256
            Linear-3                   [-1, 10]             650
Total params: 140,106
Trainable params: 140,106
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.53
Estimated Total Size (MB): 0.54
----------------------------------------------------------------

Test set: Average loss: 0.1673, Accuracy: 9501/10000 (95%)



0.9501

In [10]:
# CNN = nn.Sequential(
#     nn.Conv2d()
#     nn.Tanh(),
#     nn.MaxPool2d(),
#     nn.Conv2d(),
#     nn.Tanh(),
#     nn.MaxPool2d(),
#     nn.Flatten(),
#     nn.Linear(256, 200),
#     nn.Tanh(),
#     nn.Linear(200, 10),
#     nn.LogSoftmax()).to(device)

class CNN_Net(nn.Module):
    def __init__(self):
        super(CNN_Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, 3, 1)
        self.conv2 = nn.Conv2d(64, 16, 7, 1)
        self.fc1 = nn.Linear(4*4*16, 200)
        self.fc2 = nn.Linear(200, 10)

    def forward(self, x):
        x = x.view(-1, 1, 32, 32)
        x = F.tanh(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.tanh(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*16)
        x = F.tanh(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

CNN = CNN_Net().to(device)

summary(CNN, (1, 32, 32))
optimizer = optim.SGD(CNN.parameters(), lr=lr)

train(CNN, device, training_generator, optimizer, max_epochs)
test(CNN, device, validation_generator)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 30, 30]             640
            Conv2d-2             [-1, 16, 9, 9]          50,192
            Linear-3                  [-1, 200]          51,400
            Linear-4                   [-1, 10]           2,010
Total params: 104,242
Trainable params: 104,242
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.45
Params size (MB): 0.40
Estimated Total Size (MB): 0.85
----------------------------------------------------------------





Test set: Average loss: 0.0794, Accuracy: 9776/10000 (98%)



0.9776