In [4]:
import torch
import numpy as np
from torchvision import datasets
import torchvision
import torchvision.transforms as transforms

# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20

# convert data to torch.FloatTensor

use_gpu = torch.cuda.is_available()
device = torch.device("cuda" if use_gpu else "cpu") 

PATH = "save/trained_cnn_model.pt"

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307), (0.3081))])


# choose the training and test datasets
train_data = datasets.MNIST(root='data', train=True, download=True,  transform=transform)
test_data  = datasets.MNIST(root='data', train=False, download=True, transform=transform)

# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
    num_workers=num_workers)


import torch.nn as nn
import torch.nn.functional as F

## Define the NN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

model = Net().to(device)
print(model)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


In [2]:
# number of epochs to train the model
n_epochs = 10  
# per epoch, all the training data set is used once
model.train() # prep model for training


for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    train_loss1 = 0.0
    train_loss2 = 0.0
    
    ###################
    # train the model #
    ###################
    for data, target in train_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        
        loss1 = criterion(output, target)
        loss2 = model.conv1.weight.abs().sum() + model.conv2.weight.abs().sum()
        loss = loss1
        
        loss.backward()
        optimizer.step()
        train_loss1 += loss1.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss2 += loss2.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss  += loss.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        
    # print training statistics 
    # calculate average loss over an epoch
    train_loss1 = train_loss1/len(train_loader.dataset)
    train_loss2 = train_loss2/len(train_loader.dataset)
    train_loss = train_loss/len(train_loader.dataset)

    print('Epoch: {} \tTraining Loss1: {:.6f}'.format(epoch+1, train_loss1))
    print('Epoch: {} \tTraining Loss2: {:.6f}'.format(epoch+1, train_loss2))
    print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch+1, train_loss))
    
# see following link for details of state_dict   
# https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_loss,
            }, PATH)

Epoch: 1 	Training Loss1: 0.362894
Epoch: 1 	Training Loss2: 611.533218
Epoch: 1 	Training Loss: 0.362894
Epoch: 2 	Training Loss1: 0.136289
Epoch: 2 	Training Loss2: 635.245645
Epoch: 2 	Training Loss: 0.136289
Epoch: 3 	Training Loss1: 0.096651
Epoch: 3 	Training Loss2: 649.392149
Epoch: 3 	Training Loss: 0.096651
Epoch: 4 	Training Loss1: 0.078177
Epoch: 4 	Training Loss2: 659.570448
Epoch: 4 	Training Loss: 0.078177
Epoch: 5 	Training Loss1: 0.066309
Epoch: 5 	Training Loss2: 668.196860
Epoch: 5 	Training Loss: 0.066309
Epoch: 6 	Training Loss1: 0.057741
Epoch: 6 	Training Loss2: 675.906452
Epoch: 6 	Training Loss: 0.057741
Epoch: 7 	Training Loss1: 0.050595
Epoch: 7 	Training Loss2: 682.439263
Epoch: 7 	Training Loss: 0.050595
Epoch: 8 	Training Loss1: 0.045543
Epoch: 8 	Training Loss2: 688.437604
Epoch: 8 	Training Loss: 0.045543
Epoch: 9 	Training Loss1: 0.042923
Epoch: 9 	Training Loss2: 693.733981
Epoch: 9 	Training Loss: 0.042923
Epoch: 10 	Training Loss1: 0.038801
Epoch: 10 

In [3]:
PATH = "save/trained_cnn_model.pt"
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
train_loss = checkpoint['loss']

model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

print("Weight sum:",model.conv1.weight.abs().sum().item())


Test set: Accuracy: 9887/10000 (99%)

Weight sum: 65.00849151611328


In [4]:
# Define model again, to start over with a different loss function
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
model.train() # prep model for training

PATH = "save/trained_cnn_model_customLoss1.pt"

# number of epochs to train the model
n_epochs = 10

# Train the model again
for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    train_loss1 = 0.0
    train_loss2 = 0.0
    
    ###################
    # train the model #
    ###################
    for data, target in train_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        
        loss1 = criterion(output, target)
        loss2 = model.conv1.weight.abs().sum() + model.conv2.weight.abs().sum()
        loss = loss1 + loss2
        
        loss.backward()
        optimizer.step()
        train_loss1 += loss1.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss2 += loss2.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss  += loss.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        
    # print training statistics 
    # calculate average loss over an epoch
    train_loss1 = train_loss1/len(train_loader.dataset)
    train_loss2 = train_loss2/len(train_loader.dataset)
    train_loss = train_loss/len(train_loader.dataset)

    print('Epoch: {} \tTraining Loss1: {:.6f}'.format(epoch+1, train_loss1))
    print('Epoch: {} \tTraining Loss2: {:.6f}'.format(epoch+1, train_loss2))
    print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch+1, train_loss))
    
# see following link for details of state_dict   
# https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_loss,
            }, PATH)

Epoch: 1 	Training Loss1: 2.301933
Epoch: 1 	Training Loss2: 94.130678
Epoch: 1 	Training Loss: 96.432611
Epoch: 2 	Training Loss1: 2.301504
Epoch: 2 	Training Loss2: 93.599866
Epoch: 2 	Training Loss: 95.901371
Epoch: 3 	Training Loss1: 2.301335
Epoch: 3 	Training Loss2: 93.599953
Epoch: 3 	Training Loss: 95.901289
Epoch: 4 	Training Loss1: 2.301357
Epoch: 4 	Training Loss2: 93.599845
Epoch: 4 	Training Loss: 95.901202
Epoch: 5 	Training Loss1: 2.301296
Epoch: 5 	Training Loss2: 93.599913
Epoch: 5 	Training Loss: 95.901209
Epoch: 6 	Training Loss1: 2.301278
Epoch: 6 	Training Loss2: 93.599846
Epoch: 6 	Training Loss: 95.901124
Epoch: 7 	Training Loss1: 2.301130
Epoch: 7 	Training Loss2: 93.599693
Epoch: 7 	Training Loss: 95.900823
Epoch: 8 	Training Loss1: 2.301088
Epoch: 8 	Training Loss2: 93.599614
Epoch: 8 	Training Loss: 95.900702
Epoch: 9 	Training Loss1: 2.301148
Epoch: 9 	Training Loss2: 93.599795
Epoch: 9 	Training Loss: 95.900943
Epoch: 10 	Training Loss1: 2.301245
Epoch: 10 

In [5]:
PATH = "save/trained_cnn_model_customLoss1.pt"
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
train_loss = checkpoint['loss']

model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

print("Weight sum:",model.conv1.weight.abs().sum().item())


Test set: Accuracy: 1135/10000 (11%)

Weight sum: 1.5222227573394775


In [6]:
# Define model again, to start over with a different loss function
model = Net().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
model.train() # prep model for training

gamma = 0.05

PATH = "save/trained_cnn_model_customLoss2.pt"
# number of epochs to train the model
n_epochs = 10

# Train the model again
for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    train_loss1 = 0.0
    train_loss2 = 0.0
    
    ###################
    # train the model #
    ###################
    for data, target in train_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        
        loss1 = criterion(output, target)
        loss2 = model.conv1.weight.abs().sum() + model.conv2.weight.abs().sum()
        loss = loss1 + gamma*loss2
        
        loss.backward()
        optimizer.step()
        train_loss1 += loss1.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss2 += loss2.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        train_loss  += loss.item()*data.size(0) # as loss is tensor, .item() needed to get the value
        
    # print training statistics 
    # calculate average loss over an epoch
    train_loss1 = train_loss1/len(train_loader.dataset)
    train_loss2 = train_loss2/len(train_loader.dataset)
    train_loss = train_loss/len(train_loader.dataset)

    print('Epoch: {} \tTraining Loss1: {:.6f}'.format(epoch+1, train_loss1))
    print('Epoch: {} \tTraining Loss2: {:.6f}'.format(epoch+1, train_loss2))
    print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch+1, train_loss))
    
# see following link for details of state_dict   
# https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_loss,
            }, PATH)

Epoch: 1 	Training Loss1: 0.544160
Epoch: 1 	Training Loss2: 20.787271
Epoch: 1 	Training Loss: 1.583524
Epoch: 2 	Training Loss1: 0.341637
Epoch: 2 	Training Loss2: 8.520830
Epoch: 2 	Training Loss: 0.767678
Epoch: 3 	Training Loss1: 0.303184
Epoch: 3 	Training Loss2: 8.168155
Epoch: 3 	Training Loss: 0.711592
Epoch: 4 	Training Loss1: 0.277441
Epoch: 4 	Training Loss2: 7.945639
Epoch: 4 	Training Loss: 0.674723
Epoch: 5 	Training Loss1: 0.261553
Epoch: 5 	Training Loss2: 7.821687
Epoch: 5 	Training Loss: 0.652638
Epoch: 6 	Training Loss1: 0.254132
Epoch: 6 	Training Loss2: 7.695064
Epoch: 6 	Training Loss: 0.638885
Epoch: 7 	Training Loss1: 0.240790
Epoch: 7 	Training Loss2: 7.613241
Epoch: 7 	Training Loss: 0.621452
Epoch: 8 	Training Loss1: 0.235585
Epoch: 8 	Training Loss2: 7.537886
Epoch: 8 	Training Loss: 0.612479
Epoch: 9 	Training Loss1: 0.228761
Epoch: 9 	Training Loss2: 7.471515
Epoch: 9 	Training Loss: 0.602337
Epoch: 10 	Training Loss1: 0.223151
Epoch: 10 	Training Loss2: 

In [7]:
PATH = "save/trained_cnn_model_customLoss2.pt"
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
train_loss = checkpoint['loss']

model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device) # loading to GPU
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)  
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

print("Weight sum:",model.conv1.weight.abs().sum().item())


Test set: Accuracy: 9598/10000 (96%)

Weight sum: 1.3434120416641235
