In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms

In [2]:
import datetime as dt
start = dt.datetime.now()

In [3]:
transform = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize(
                                    (0.1307,), (0.3081,))])

trainset = datasets.MNIST(root='../down', train=True,
                          download=True, transform = transform)

In [4]:
train_loader = torch.utils.data.DataLoader(trainset,batch_size=64,
                                           shuffle=True, num_workers=8)

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [6]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print(f"Training on device {device}.")

Training on device cuda.


In [7]:
model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

for epoch in range(3):
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.to(device=device)
        target = target.to(device=device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
    print('Current loss', float(loss))

Current loss 0.8304274082183838
Current loss 0.3340878188610077
Current loss 0.2962528467178345


In [8]:
torch.save(model.state_dict(), '../down/MNIST/mnist.pth')

pretrained_model = Net()
pretrained_model.load_state_dict(torch.load('../down/MNIST/mnist.pth'))

<All keys matched successfully>

In [9]:
end = dt.datetime.now()
print(end-start)
# gcp cpu: 0:00:25.551305
# gcp gpu: 0:00:16.436214

0:00:16.436214
