In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········


In [0]:
cd /content/gdrive/My Drive/weighted-opt/mnist/

In [0]:
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
%matplotlib inline

import torch
import numpy as np
torch.manual_seed(1023)
np.random.seed(1023)

import json

In [0]:
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [0]:
# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

In [0]:
# Data loader parameters
train_batch_size = 100
test_batch_size =1000
use_cuda = torch.cuda.is_available()
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

In [0]:
# Data loader
from torchvision import datasets, transforms
train_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=True, download=True,
                    transform=transforms.Compose([transforms.ToTensor(),
                                                  transforms.Normalize((0.1307,), (0.3081,))])),
                    batch_size=train_batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(datasets.MNIST('data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])),
                       batch_size=test_batch_size, shuffle=True, **kwargs)

In [0]:
class GNet(nn.Module):
    def __init__(self, model_name, input_size, hidden_size, num_classes):
        super(GNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
        self.__name__ = model_name
        self.__net__ = "GNet"
        self.opt = None
        self.H = None  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        self.H = torch.diag(1/torch.norm(self.fc1.weight, p=2, dim=1))
        out = self.fc2(out@self.H)
        return out

In [0]:
class NeuralNet(nn.Module):
    def __init__(self, model_name, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
        self.__name__ = model_name
        self.__net__ = "NeuralNet"
        self.opt = None 
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [0]:
# Set model parameters
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()

In [0]:
def build_model(name, Net, optimizer, input_size, hidden_size, num_classes, learning_rate):
    model = Net(name, input_size, hidden_size, num_classes).to(device)
    opt = optimizer(model.parameters(), lr=learning_rate)
    model.opt = opt
    return model

In [0]:
def train(model, num_epochs, learning_rate):
    total_step = len(train_loader)
    record = {"train_loss":[], "test_acc":[]}
    optimizer = model.opt

    for epoch in range(num_epochs):
        count = 0
        for i, (images, labels) in enumerate(train_loader):  
            # Move tensors to the configured device
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)
        
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
        
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
                record["train_loss"].append(loss.item())
        
                # Test the model
                # In test phase, we don't need to compute gradients (for memory efficiency)
                accuracy = test(model)
                record["test_acc"].append(accuracy)

    # save record and parameters
    if model.__net__ == "GNet":
        record["H"] = model.H.cpu().numpy().diagonal().tolist()
    with open('./record/lr{}/{}_lr{}.json'.format(learning_rate, model.__name__, learning_rate), 'w') as fp:
        json.dump(record, fp)

    return record

In [0]:
def test(model):
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            accuracy = 100 * correct / total
    
    print('Accuracy of the network on the 10000 test images: {} %'.format(accuracy))
    return accuracy

In [0]:
gmodel_sgd = build_model("gmodel_sgd", GNet, torch.optim.SGD, input_size, hidden_size, num_classes, learning_rate)

In [0]:
gmodel_adam = build_model("gmodel_adam", GNet, torch.optim.Adam, input_size, hidden_size, num_classes, learning_rate)

In [0]:
gmodel_adagrad = build_model("gmodel_adagrad", GNet, torch.optim.Adagrad, input_size, hidden_size, num_classes, learning_rate)

In [0]:
record_gmodel_sgd = train(gmodel_sgd, num_epochs, learning_rate)

In [0]:
record_gmodel_adagrad = train(gmodel_adagrad, num_epochs, learning_rate)

In [0]:
record_gmodel_adam = train(gmodel_adam, num_epochs, learning_rate)

In [0]:
nmodel_sgd = build_model("nmodel_sgd", NeuralNet, torch.optim.SGD, input_size, hidden_size, num_classes, learning_rate)

In [0]:
nmodel_adam = build_model("nmodel_adam", NeuralNet, torch.optim.Adam, input_size, hidden_size, num_classes, learning_rate)

In [0]:
nmodel_adagrad = build_model("nmodel_adagrad", NeuralNet, torch.optim.Adagrad, input_size, hidden_size, num_classes, learning_rate)

In [0]:
record_nmodel_sgd = train(nmodel_sgd, num_epochs, learning_rate)

In [0]:
record_nmodel_adam = train(nmodel_adam, num_epochs, learning_rate)

In [0]:
record_nmodel_adagrad = train(nmodel_adagrad, num_epochs, learning_rate)