## CNN Model:

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets
from bokeh.plotting import figure
from bokeh.io import show
from bokeh.models import LinearAxis, Range1d
import numpy as np
import torch.nn.functional as F

In [14]:
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(7 * 7 * 64, 1000)
        self.fc2 = nn.Linear(1000, 10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [15]:
model = ConvNet()
from torch import optim
loss_function = nn.CrossEntropyLoss()
#optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay= 1e-6, momentum = 0.9, nesterov = True)

In [16]:
print(model)

ConvNet(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (drop_out): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=3136, out_features=1000, bias=True)
  (fc2): Linear(in_features=1000, out_features=10, bias=True)
)


In [5]:
# One epoch is a presentation of all training data to the network
num_epochs = 5

# MNIST has 10 output classes
num_classes = 10

# One batch is averaged to compute a loss gradient
batch_size = 100

# Learning rate
learning_rate = 0.001

In [6]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [7]:
DATA_PATH = 'C:\\Users\Andy\PycharmProjects\MNISTData'
MODEL_STORE_PATH = 'C:\\Users\Andy\PycharmProjects\pytorch_models\\'

# transforms to apply to the data
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root=DATA_PATH, train=True, transform=trans, download=True)
test_dataset = torchvision.datasets.MNIST(root=DATA_PATH, train=False, transform=trans)

# Data loader
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [8]:
# Train the model
total_step = len(train_loader)
loss_list = []
acc_list = []
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Run the forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss_list.append(loss.item())

        # Backprop and perform Adam optimisation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

#         # Track the accuracy
#         total = labels.size(0)
#         _, predicted = torch.max(outputs.data, 1)
#         correct = (predicted == labels).sum().item()
#         acc_list.append(correct / total)

        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))

Epoch [1/5], Step [100/600], Loss: 0.1062
Epoch [1/5], Step [200/600], Loss: 0.0411
Epoch [1/5], Step [300/600], Loss: 0.1447
Epoch [1/5], Step [400/600], Loss: 0.0742
Epoch [1/5], Step [500/600], Loss: 0.0653
Epoch [1/5], Step [600/600], Loss: 0.1429
Epoch [2/5], Step [100/600], Loss: 0.0567
Epoch [2/5], Step [200/600], Loss: 0.0448
Epoch [2/5], Step [300/600], Loss: 0.1813
Epoch [2/5], Step [400/600], Loss: 0.0316
Epoch [2/5], Step [500/600], Loss: 0.2769
Epoch [2/5], Step [600/600], Loss: 0.0293
Epoch [3/5], Step [100/600], Loss: 0.0216
Epoch [3/5], Step [200/600], Loss: 0.0243
Epoch [3/5], Step [300/600], Loss: 0.1190
Epoch [3/5], Step [400/600], Loss: 0.0735
Epoch [3/5], Step [500/600], Loss: 0.0247
Epoch [3/5], Step [600/600], Loss: 0.1519
Epoch [4/5], Step [100/600], Loss: 0.0617
Epoch [4/5], Step [200/600], Loss: 0.0281
Epoch [4/5], Step [300/600], Loss: 0.1103
Epoch [4/5], Step [400/600], Loss: 0.1351
Epoch [4/5], Step [500/600], Loss: 0.0057
Epoch [4/5], Step [600/600], Loss:

In [9]:
import pickle
with open("loss_adam_1.txt", "wb") as fp:   #Pickling
    pickle.dump(loss_list, fp)

In [19]:
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format((correct / total) * 100))

Test Accuracy of the model on the 10000 test images: 99.03999999999999 %


In [11]:
import pickle
with open("loss_sgd_1.txt", "rb") as fp:   # Unpickling
    loss_list = pickle.load(fp)
with open("loss_adam_1.txt", "rb") as fp:   # Unpickling
    loss_list_1 = pickle.load(fp)

In [12]:
def pro_list(mylist,number):
    ave_list = []
    for i in range(round(len(mylist)/number)):
        ave_list.append(sum(mylist[i*50:(i+1)*50])/number)
    return ave_list

In [13]:
loss_sgd = pro_list(loss_list,50)
loss_adam = pro_list(loss_list_1,50)

In [15]:
from bokeh.plotting import figure, output_file, save
from bokeh.io import show
from bokeh.models import LinearAxis, Range1d
import numpy as np

p = figure(y_axis_label='Loss', width=850, y_range=(0, 1), title='CNN-2 Hidden Layers SGD ADAM Comparison')
p.line(np.arange(len(loss_sgd)), loss_sgd, legend_label="SGD")
p.line(np.arange(len(loss_adam)), loss_adam ,color='red',legend_label="Adam")


p.legend.location = "top_right"
p.legend.click_policy="hide"
output_file("compare_2.html")
save(p)

'/Users/xuechunwang/Desktop/561project/compare_2.html'

## DNN 3 Layers Models:

### Optimizer = adam

In [1]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

In [3]:
trainset = datasets.MNIST('PATH_TO_STORE_TRAINSET', download=True, train=True, transform=transform)
valset = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True)

In [4]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

print(images.shape)
print(labels.shape)

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [5]:
input_size = 784
hidden_sizes = [128, 64, 32]
output_size = 10

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], hidden_sizes[2]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[2], output_size),
                      nn.LogSoftmax(dim=1))
print(model)

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): ReLU()
  (6): Linear(in_features=32, out_features=10, bias=True)
  (7): LogSoftmax()
)


In [6]:
criterion = nn.NLLLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

logps = model(images) #log probabilities
loss = criterion(logps, labels) #calculate the NLL loss

In [7]:
#optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
time0 = time()
epochs = 15
loss_list = []
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
        
        # Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        loss_list.append(loss.item())
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))
print("\nTraining Time (in minutes) =",(time()-time0)/60)

Epoch 0 - Training loss: 0.3549092433242592
Epoch 1 - Training loss: 0.17272985234721575
Epoch 2 - Training loss: 0.1407171411435805
Epoch 3 - Training loss: 0.12238512624865339
Epoch 4 - Training loss: 0.11346999786174627
Epoch 5 - Training loss: 0.1029857755345759
Epoch 6 - Training loss: 0.09322818013966512
Epoch 7 - Training loss: 0.08965650243438415
Epoch 8 - Training loss: 0.08532233750879534
Epoch 9 - Training loss: 0.07993781194339064
Epoch 10 - Training loss: 0.07904388345158679
Epoch 11 - Training loss: 0.07410539369814213
Epoch 12 - Training loss: 0.06992016610590049
Epoch 13 - Training loss: 0.06709007232306974
Epoch 14 - Training loss: 0.06624660948946723

Training Time (in minutes) = 2.953571935494741


In [12]:
correct_count, all_count = 0, 0
for images,labels in valloader:
  for i in range(len(labels)):
    img = images[i].view(1, 784)
    with torch.no_grad():
        logps = model(img)

    
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

Number Of Images Tested = 10000

Model Accuracy = 0.9666


In [13]:
import pickle
with open("loss_adam_2.txt", "wb") as fp:   #Pickling
    pickle.dump(loss_list, fp)

### Optimizer = SGD

In [1]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

In [3]:
trainset = datasets.MNIST('PATH_TO_STORE_TRAINSET', download=True, train=True, transform=transform)
valset = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True)

In [4]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

print(images.shape)
print(labels.shape)

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [5]:
input_size = 784
hidden_sizes = [128, 64, 32]
output_size = 10

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], hidden_sizes[2]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[2], output_size),
                      nn.LogSoftmax(dim=1))
print(model)

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): ReLU()
  (6): Linear(in_features=32, out_features=10, bias=True)
  (7): LogSoftmax()
)


In [6]:
criterion = nn.NLLLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

logps = model(images) #log probabilities
loss = criterion(logps, labels) #calculate the NLL loss

In [7]:
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
time0 = time()
epochs = 15
loss_list_1 = []
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
        
        # Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        loss_list_1.append(loss.item())
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))
print("\nTraining Time (in minutes) =",(time()-time0)/60)

Epoch 0 - Training loss: 0.8686249366701285
Epoch 1 - Training loss: 0.29724624632263996
Epoch 2 - Training loss: 0.20979229197986343
Epoch 3 - Training loss: 0.16358319557766351
Epoch 4 - Training loss: 0.13634100860790976
Epoch 5 - Training loss: 0.11833655414606399
Epoch 6 - Training loss: 0.10434200936384293
Epoch 7 - Training loss: 0.09105341799104455
Epoch 8 - Training loss: 0.08400252356138359
Epoch 9 - Training loss: 0.07561184792642368
Epoch 10 - Training loss: 0.06797966321132012
Epoch 11 - Training loss: 0.0619077869002888
Epoch 12 - Training loss: 0.055171796460516814
Epoch 13 - Training loss: 0.05166147939507355
Epoch 14 - Training loss: 0.0482620599377813

Training Time (in minutes) = 2.6331329504648844


In [13]:
correct_count, all_count = 0, 0
for images,labels in valloader:
  for i in range(len(labels)):
    img = images[i].view(1, 784)
    with torch.no_grad():
        logps = model(img)

    
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

Number Of Images Tested = 10000

Model Accuracy = 0.9713


In [8]:
import pickle
with open("loss_sgd_2.txt", "wb") as fp:   #Pickling
    pickle.dump(loss_list_1, fp)

### Plot:

In [9]:
import pickle
with open("loss_sgd_2.txt", "rb") as fp:   # Unpickling
    loss_list = pickle.load(fp)
with open("loss_adam_2.txt", "rb") as fp:   # Unpickling
    loss_list_1 = pickle.load(fp)

In [10]:
def pro_list(mylist,number):
    ave_list = []
    for i in range(round(len(mylist)/number)):
        ave_list.append(sum(mylist[i*50:(i+1)*50])/number)
    return ave_list

In [11]:
loss_sgd = pro_list(loss_list,50)
loss_adam = pro_list(loss_list_1,50)

In [12]:
from bokeh.plotting import figure, output_file, save
from bokeh.io import show
from bokeh.models import LinearAxis, Range1d
import numpy as np

p = figure(y_axis_label='Loss', width=850, y_range=(0, 1), title='DNN-3 Hidden Layers SGD ADAM Comparison')
p.line(np.arange(len(loss_sgd)), loss_sgd, legend_label="SGD")
p.line(np.arange(len(loss_adam)), loss_adam ,color='red',legend_label="Adam")


p.legend.location = "top_right"
p.legend.click_policy="hide"
output_file("compare_DNN_3.html")
save(p)

'/Users/xuechunwang/Desktop/561project/compare_DNN_3.html'

# DNN 2 Layers Models

### Optimizer = ADAM

In [1]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

In [3]:
trainset = datasets.MNIST('PATH_TO_STORE_TRAINSET', download=True, train=True, transform=transform)
valset = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True)

In [4]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

print(images.shape)
print(labels.shape)

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [5]:
input_size = 784
hidden_sizes = [128, 64]
output_size = 10

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.LogSoftmax(dim=1))
print(model)

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=10, bias=True)
  (5): LogSoftmax()
)


In [6]:
criterion = nn.NLLLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

logps = model(images) #log probabilities
loss = criterion(logps, labels) #calculate the NLL loss

In [7]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
#optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
time0 = time()
epochs = 15
loss_list_1 = []
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
        
        # Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        loss_list_1.append(loss.item())
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))
print("\nTraining Time (in minutes) =",(time()-time0)/60)

Epoch 0 - Training loss: 0.32977868087176704
Epoch 1 - Training loss: 0.1662145694707836
Epoch 2 - Training loss: 0.13557993259423895
Epoch 3 - Training loss: 0.11646597242421672
Epoch 4 - Training loss: 0.10652980232474281
Epoch 5 - Training loss: 0.10062839295010148
Epoch 6 - Training loss: 0.09342596931670949
Epoch 7 - Training loss: 0.08711332426061913
Epoch 8 - Training loss: 0.08239909224318905
Epoch 9 - Training loss: 0.07780793777442098
Epoch 10 - Training loss: 0.08009229239237968
Epoch 11 - Training loss: 0.0744552697258086
Epoch 12 - Training loss: 0.07132959999414637
Epoch 13 - Training loss: 0.06558629799578865
Epoch 14 - Training loss: 0.07000656467765622

Training Time (in minutes) = 3.4183710336685182


In [9]:
import pickle
with open("loss_adam_3.txt", "wb") as fp:   #Pickling
    pickle.dump(loss_list_1, fp)

In [10]:
correct_count, all_count = 0, 0
for images,labels in valloader:
  for i in range(len(labels)):
    img = images[i].view(1, 784)
    with torch.no_grad():
        logps = model(img)

    
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

Number Of Images Tested = 10000

Model Accuracy = 0.9676


### Optimizer: SGD

In [1]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

In [3]:
trainset = datasets.MNIST('PATH_TO_STORE_TRAINSET', download=True, train=True, transform=transform)
valset = datasets.MNIST('PATH_TO_STORE_TESTSET', download=True, train=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True)

In [4]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

print(images.shape)
print(labels.shape)

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [5]:
input_size = 784
hidden_sizes = [128, 64]
output_size = 10

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.LogSoftmax(dim=1))
print(model)

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=10, bias=True)
  (5): LogSoftmax()
)


In [6]:
criterion = nn.NLLLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

logps = model(images) #log probabilities
loss = criterion(logps, labels) #calculate the NLL loss

In [7]:
#optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
time0 = time()
epochs = 15
loss_list_1 = []
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
        
        # Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        loss_list_1.append(loss.item())
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))
print("\nTraining Time (in minutes) =",(time()-time0)/60)

Epoch 0 - Training loss: 0.6404558545284307
Epoch 1 - Training loss: 0.2877736112662851
Epoch 2 - Training loss: 0.2281369806201791
Epoch 3 - Training loss: 0.18184279408385315
Epoch 4 - Training loss: 0.15013712806416663
Epoch 5 - Training loss: 0.12848343703744888
Epoch 6 - Training loss: 0.11247424812201084
Epoch 7 - Training loss: 0.09937010133273598
Epoch 8 - Training loss: 0.08913002645811324
Epoch 9 - Training loss: 0.08031543883493047
Epoch 10 - Training loss: 0.0737801215086164
Epoch 11 - Training loss: 0.06699806764555066
Epoch 12 - Training loss: 0.06170931521322586
Epoch 13 - Training loss: 0.05682032936459173
Epoch 14 - Training loss: 0.052728156619735046

Training Time (in minutes) = 2.6537465612093607


In [8]:
import pickle
with open("loss_sgd_3.txt", "wb") as fp:   #Pickling
    pickle.dump(loss_list_1, fp)

In [9]:
correct_count, all_count = 0, 0
for images,labels in valloader:
  for i in range(len(labels)):
    img = images[i].view(1, 784)
    with torch.no_grad():
        logps = model(img)

    
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

Number Of Images Tested = 10000

Model Accuracy = 0.9748


### Plot:

In [10]:
import pickle
with open("loss_sgd_3.txt", "rb") as fp:   # Unpickling
    loss_list = pickle.load(fp)
with open("loss_adam_3.txt", "rb") as fp:   # Unpickling
    loss_list_1 = pickle.load(fp)

In [11]:
def pro_list(mylist,number):
    ave_list = []
    for i in range(round(len(mylist)/number)):
        ave_list.append(sum(mylist[i*50:(i+1)*50])/number)
    return ave_list

In [12]:
loss_sgd = pro_list(loss_list,50)
loss_adam = pro_list(loss_list_1,50)

In [17]:
from bokeh.plotting import figure, output_file, save
from bokeh.io import show
from bokeh.models import LinearAxis, Range1d
import numpy as np

p = figure(y_axis_label='Loss', width=850, y_range=(0, 1), title='DNN-2 Hidden Layers SGD ADAM Comparison')
p.line(np.arange(len(loss_sgd)), loss_sgd, legend_label="SGD")
p.line(np.arange(len(loss_adam)), loss_adam ,color='red',legend_label="Adam")


p.legend.location = "top_right"
p.legend.click_policy="hide"
output_file("compare_DNN_2.html")
save(p)

'/Users/xuechunwang/Desktop/561project/compare_DNN_2.html'