In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score,classification_report,f1_score
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer 
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt 

import math
import torch
from torch import nn
from torch import optim
from torchvision.datasets import MNIST
from torch.utils.data import TensorDataset, DataLoader

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.autograd import Variable
from torchvision import datasets, transforms
import torch.utils.data as utils
from torch.optim import Adam 

In [4]:
transform = transforms.Compose([
transforms.ToTensor(), transforms.Normalize([0.5], [0.5])])

In [8]:
trainset = datasets.MNIST('MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)

In [21]:
trainloader = torch.utils.data.DataLoader(
        datasets.MNIST('MNIST_data/', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=16, shuffle=True)
test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('MNIST_data/', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=16, shuffle=True)

In [11]:
len(trainloader)

7500

In [12]:
len(train_loader)

7500

In [13]:
len(test_loader)

1250

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
device = torch.device('cpu')

cuda


In [15]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1' , nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2' , nn.ReLU())
model.add_module('l3', nn.Linear(200, 10))

model.to(device)

Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=10, bias=True)
)

In [86]:
a = list(model.l1.weight)

In [87]:
len(a)

200

In [89]:
len(a[0])

784

In [10]:
a[0][0]

tensor(0.0431, grad_fn=<SelectBackward>)

In [51]:
p = torch.eye(200)

In [52]:
p

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]])

In [53]:
p = p*10

In [56]:
 p

tensor([[10.,  0.,  0.,  ...,  0.,  0.,  0.],
        [ 0., 10.,  0.,  ...,  0.,  0.,  0.],
        [ 0.,  0., 10.,  ...,  0.,  0.,  0.],
        ...,
        [ 0.,  0.,  0.,  ..., 10.,  0.,  0.],
        [ 0.,  0.,  0.,  ...,  0., 10.,  0.],
        [ 0.,  0.,  0.,  ...,  0.,  0., 10.]])

In [57]:
b = 10*torch.eye(784)

In [59]:
m_200 = torch.distributions.multivariate_normal.MultivariateNormal(torch.zeros(200), p)
m_784 = torch.distributions.multivariate_normal.MultivariateNormal(torch.zeros(784), b)

In [61]:
def call_pr_loss(a , m ):
    s = 0 
    c = 0
    for k in a:

        s -= m.log_prob(k)
        c += 1
    return s/c

In [62]:
call_pr_loss(list(model.l2.weight) , m_200 )

tensor(414.0628, grad_fn=<DivBackward0>)

In [63]:
call_pr_loss(list(model.l1.weight) , m_784 )

tensor(1623.0780, grad_fn=<DivBackward0>)

In [18]:
m_200 = 1
m_784 = 1

In [69]:
def call_pr_loss(a , variance):
    var = variance**2
    log = math.log()
    return torch.sum(a**2) / variance**2 / 2

In [16]:
def call_pr_loss(a , variance):
    k = len(a[0])
    l = torch.sum(a**2)/ variance**2 /2/k
    return k/2*math.log(2*math.pi) + 0.5*math.log(variance**k) + l

In [67]:
call_pr_loss(model.l1.weight , 10)

tensor(1623.0615, grad_fn=<AddBackward0>)

In [22]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 3):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        
        
        
        loss = loss_0 + loss_1+ loss_2+ loss_3
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 1000  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()



In [23]:
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        output = model(data)
        test_loss += criterion(output, target).item()  # sum up batch loss
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))


Test set: Average loss: 0.0051, Accuracy: 9757/10000 (98%)



In [24]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1', nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2', nn.ReLU())
model.add_module('l3', nn.Linear(200, 200))
model.add_module('r3', nn.ReLU())
model.add_module('lf', nn.Linear(200, 10))

model.to(device)


Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=200, bias=True)
  (r3): ReLU()
  (lf): Linear(in_features=200, out_features=10, bias=True)
)

In [25]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 3):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        loss_f = call_pr_loss(model.lf.weight , m_200)
        
        
        
        loss = loss_0 + loss_1+ loss_2+ loss_3 + loss_f
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 100  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()



In [12]:
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        output = model(data)
        test_loss += criterion(output, target).item()  # sum up batch loss
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))


Test set: Average loss: 0.0333, Accuracy: 57630/60000 (96%)



In [13]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1', nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2', nn.ReLU())
model.add_module('l3', nn.Linear(200, 200))
model.add_module('r3', nn.ReLU())
model.add_module('l4', nn.Linear(200, 200))
model.add_module('r4', nn.ReLU())
model.add_module('lf', nn.Linear(200, 10))

model.to(device)



Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=200, bias=True)
  (r3): ReLU()
  (l4): Linear(in_features=200, out_features=200, bias=True)
  (r4): ReLU()
  (lf): Linear(in_features=200, out_features=10, bias=True)
)

In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 3):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        loss_4 = call_pr_loss(model.l4.weight , m_200)
        loss_f = call_pr_loss(model.lf.weight , m_200)
        
        
        loss = loss_0 + loss_1+ loss_2+ loss_3 + loss_4 + loss_f
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 100  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()



In [15]:
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        output = model(data)
        test_loss += criterion(output, target).item()  # sum up batch loss
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))


Test set: Average loss: 0.0360, Accuracy: 57473/60000 (96%)



In [16]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1', nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2', nn.ReLU())
model.add_module('l3', nn.Linear(200, 200))
model.add_module('r3', nn.ReLU())
model.add_module('l4', nn.Linear(200, 200))
model.add_module('r4', nn.ReLU())
model.add_module('l5', nn.Linear(200, 200))
model.add_module('r5', nn.ReLU())
model.add_module('lf', nn.Linear(200, 10))

model.to(device)



Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=200, bias=True)
  (r3): ReLU()
  (l4): Linear(in_features=200, out_features=200, bias=True)
  (r4): ReLU()
  (l5): Linear(in_features=200, out_features=200, bias=True)
  (r5): ReLU()
  (lf): Linear(in_features=200, out_features=10, bias=True)
)

In [17]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 3):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        loss_4 = call_pr_loss(model.l4.weight , m_200)
        loss_5 = call_pr_loss(model.l5.weight , m_200)
        loss_f = call_pr_loss(model.lf.weight , m_200)
        
        
        loss = loss_0 + loss_1+ loss_2+ loss_3 + loss_4 + loss_f + loss_5
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 100  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()



In [26]:
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        output = model(data)
        test_loss += criterion(output, target).item()  # sum up batch loss
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))


Test set: Average loss: 0.0055, Accuracy: 9732/10000 (97%)



In [27]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1', nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2', nn.ReLU())
model.add_module('l3', nn.Linear(200, 200))
model.add_module('r3', nn.ReLU())
model.add_module('l4', nn.Linear(200, 200))
model.add_module('r4', nn.ReLU())
model.add_module('l5', nn.Linear(200, 200))
model.add_module('r5', nn.ReLU())
model.add_module('l6', nn.Linear(200, 200))
model.add_module('r6', nn.ReLU())
model.add_module('lf', nn.Linear(200, 10))

model.to(device)



Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=200, bias=True)
  (r3): ReLU()
  (l4): Linear(in_features=200, out_features=200, bias=True)
  (r4): ReLU()
  (l5): Linear(in_features=200, out_features=200, bias=True)
  (r5): ReLU()
  (l6): Linear(in_features=200, out_features=200, bias=True)
  (r6): ReLU()
  (lf): Linear(in_features=200, out_features=10, bias=True)
)

In [28]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 3):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        loss_4 = call_pr_loss(model.l4.weight , m_200)
        loss_5 = call_pr_loss(model.l5.weight , m_200)
        loss_6 = call_pr_loss(model.l6.weight , m_200)
        loss_f = call_pr_loss(model.lf.weight , m_200)
        
        
        loss = loss_0 + loss_1+ loss_2+ loss_3 + loss_4 + loss_f + loss_5 + loss_6
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 100  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()



In [29]:
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        output = model(data)
        test_loss += criterion(output, target).item()  # sum up batch loss
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))


Test set: Average loss: 0.0083, Accuracy: 9612/10000 (96%)



In [30]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1', nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2', nn.ReLU())
model.add_module('l3', nn.Linear(200, 200))
model.add_module('r3', nn.ReLU())
model.add_module('l4', nn.Linear(200, 200))
model.add_module('r4', nn.ReLU())
model.add_module('l5', nn.Linear(200, 200))
model.add_module('r5', nn.ReLU())
model.add_module('l6', nn.Linear(200, 200))
model.add_module('r6', nn.ReLU())
model.add_module('l7', nn.Linear(200, 200))
model.add_module('r7', nn.ReLU())
model.add_module('lf', nn.Linear(200, 10))

model.to(device)



Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=200, bias=True)
  (r3): ReLU()
  (l4): Linear(in_features=200, out_features=200, bias=True)
  (r4): ReLU()
  (l5): Linear(in_features=200, out_features=200, bias=True)
  (r5): ReLU()
  (l6): Linear(in_features=200, out_features=200, bias=True)
  (r6): ReLU()
  (l7): Linear(in_features=200, out_features=200, bias=True)
  (r7): ReLU()
  (lf): Linear(in_features=200, out_features=10, bias=True)
)

In [31]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 3):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        loss_4 = call_pr_loss(model.l4.weight , m_200)
        loss_5 = call_pr_loss(model.l5.weight , m_200)
        loss_6 = call_pr_loss(model.l6.weight , m_200)
        loss_7 = call_pr_loss(model.l7.weight , m_200)
        loss_f = call_pr_loss(model.lf.weight , m_200)
        
        
        loss = loss_0 + loss_1+ loss_2+ loss_3 + loss_4 + loss_f + loss_5 + loss_6 + loss_7
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 100  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()



In [32]:
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        output = model(data)
        test_loss += criterion(output, target).item()  # sum up batch loss
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))


Test set: Average loss: 0.0094, Accuracy: 9585/10000 (96%)



# лосс на final не учитываем

In [98]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1', nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2', nn.ReLU())
model.add_module('l3', nn.Linear(200, 200))
model.add_module('r3', nn.ReLU())
model.add_module('l4', nn.Linear(200, 200))
model.add_module('r4', nn.ReLU())
model.add_module('l5', nn.Linear(200, 200))
model.add_module('r5', nn.ReLU())
model.add_module('l6', nn.Linear(200, 200))
model.add_module('r6', nn.ReLU())
model.add_module('l7', nn.Linear(200, 200))
model.add_module('r7', nn.ReLU())
model.add_module('l8', nn.Linear(200, 200))
model.add_module('r8', nn.ReLU())
model.add_module('lf', nn.Linear(200, 10))

model.to(device)



Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=200, bias=True)
  (r3): ReLU()
  (l4): Linear(in_features=200, out_features=200, bias=True)
  (r4): ReLU()
  (l5): Linear(in_features=200, out_features=200, bias=True)
  (r5): ReLU()
  (l6): Linear(in_features=200, out_features=200, bias=True)
  (r6): ReLU()
  (l7): Linear(in_features=200, out_features=200, bias=True)
  (r7): ReLU()
  (l8): Linear(in_features=200, out_features=200, bias=True)
  (r8): ReLU()
  (lf): Linear(in_features=200, out_features=10, bias=True)
)

In [99]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 10):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        loss_4 = call_pr_loss(model.l4.weight , m_200)
        loss_5 = call_pr_loss(model.l5.weight , m_200)
        loss_6 = call_pr_loss(model.l6.weight , m_200)
        loss_7 = call_pr_loss(model.l7.weight , m_200)
        loss_8 = call_pr_loss(model.l8.weight , m_200)
        
        #loss_f = call_pr_loss(model.lf.weight , m_200)
        
        
        loss = loss_0   + loss_1+ loss_2+ loss_3 + loss_4  + loss_5 + loss_6 + loss_7 + loss_8 #+loss_f
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 1000  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()
    eva(model , trainloader)


Test set: Average loss: 0.5754, Accuracy: 6742/60000 (11%)


Test set: Average loss: 0.5753, Accuracy: 6742/60000 (11%)


Test set: Average loss: 0.5753, Accuracy: 6742/60000 (11%)


Test set: Average loss: 0.5753, Accuracy: 6742/60000 (11%)


Test set: Average loss: 0.5753, Accuracy: 6742/60000 (11%)


Test set: Average loss: 0.5753, Accuracy: 6742/60000 (11%)


Test set: Average loss: 0.5753, Accuracy: 6742/60000 (11%)


Test set: Average loss: 0.5753, Accuracy: 6742/60000 (11%)


Test set: Average loss: 0.5753, Accuracy: 6742/60000 (11%)



In [44]:

model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
    for data, target in trainloader:
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        output = model(data)
        test_loss += criterion(output, target).item()  # sum up batch loss
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(trainloader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(trainloader.dataset),
    100. * correct / len(trainloader.dataset)))


Test set: Average loss: 0.5753, Accuracy: 6742/60000 (11%)



# умножили на 1000

In [46]:
def eva(model , trainloader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in trainloader:
            data, target = data.to(device), target.to(device)
            data = data.view(data.size(0), -1)
            output = model(data)
            test_loss += criterion(output, target).item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(trainloader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(trainloader.dataset),
        100. * correct / len(trainloader.dataset)))

In [100]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1', nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2', nn.ReLU())
model.add_module('l3', nn.Linear(200, 200))
model.add_module('r3', nn.ReLU())
model.add_module('l4', nn.Linear(200, 200))
model.add_module('r4', nn.ReLU())
model.add_module('l5', nn.Linear(200, 200))
model.add_module('r5', nn.ReLU())
model.add_module('l6', nn.Linear(200, 200))
model.add_module('r6', nn.ReLU())
model.add_module('l7', nn.Linear(200, 200))
model.add_module('r7', nn.ReLU())
model.add_module('l8', nn.Linear(200, 200))
model.add_module('r8', nn.ReLU())
model.add_module('lf', nn.Linear(200, 10))

model.to(device)


Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=200, bias=True)
  (r3): ReLU()
  (l4): Linear(in_features=200, out_features=200, bias=True)
  (r4): ReLU()
  (l5): Linear(in_features=200, out_features=200, bias=True)
  (r5): ReLU()
  (l6): Linear(in_features=200, out_features=200, bias=True)
  (r6): ReLU()
  (l7): Linear(in_features=200, out_features=200, bias=True)
  (r7): ReLU()
  (l8): Linear(in_features=200, out_features=200, bias=True)
  (r8): ReLU()
  (lf): Linear(in_features=200, out_features=10, bias=True)
)

In [101]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 10):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        loss_4 = call_pr_loss(model.l4.weight , m_200)
        loss_5 = call_pr_loss(model.l5.weight , m_200)
        loss_6 = call_pr_loss(model.l6.weight , m_200)
        loss_7 = call_pr_loss(model.l7.weight , m_200)
        loss_8 = call_pr_loss(model.l8.weight , m_200)
        
        loss_f = call_pr_loss(model.lf.weight , m_200)
        
        
        loss = 1000*loss_0   + loss_1+ loss_2+ loss_3 + loss_4  + loss_5 + loss_6 + loss_7 + loss_8 +loss_f
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 1000  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()
    eva(model , trainloader)


Test set: Average loss: 0.0815, Accuracy: 54882/60000 (91%)


Test set: Average loss: 0.0543, Accuracy: 56382/60000 (94%)


Test set: Average loss: 0.0507, Accuracy: 56606/60000 (94%)


Test set: Average loss: 0.0504, Accuracy: 56628/60000 (94%)


Test set: Average loss: 0.0504, Accuracy: 56625/60000 (94%)


Test set: Average loss: 0.0504, Accuracy: 56625/60000 (94%)


Test set: Average loss: 0.0504, Accuracy: 56625/60000 (94%)


Test set: Average loss: 0.0504, Accuracy: 56625/60000 (94%)


Test set: Average loss: 0.0504, Accuracy: 56625/60000 (94%)



In [50]:
eva(model , trainloader)


Test set: Average loss: 0.0764, Accuracy: 54479/60000 (91%)



# увеличили дисперсию до 10

In [71]:
m_200 = 10
m_784 = 10

In [91]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1', nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2', nn.ReLU())
model.add_module('l3', nn.Linear(200, 200))
model.add_module('r3', nn.ReLU())
model.add_module('l4', nn.Linear(200, 200))
model.add_module('r4', nn.ReLU())
model.add_module('l5', nn.Linear(200, 200))
model.add_module('r5', nn.ReLU())
model.add_module('l6', nn.Linear(200, 200))
model.add_module('r6', nn.ReLU())
model.add_module('l7', nn.Linear(200, 200))
model.add_module('r7', nn.ReLU())
model.add_module('l8', nn.Linear(200, 200))
model.add_module('r8', nn.ReLU())
model.add_module('lf', nn.Linear(200, 10))

model.to(device)

Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=200, bias=True)
  (r3): ReLU()
  (l4): Linear(in_features=200, out_features=200, bias=True)
  (r4): ReLU()
  (l5): Linear(in_features=200, out_features=200, bias=True)
  (r5): ReLU()
  (l6): Linear(in_features=200, out_features=200, bias=True)
  (r6): ReLU()
  (l7): Linear(in_features=200, out_features=200, bias=True)
  (r7): ReLU()
  (l8): Linear(in_features=200, out_features=200, bias=True)
  (r8): ReLU()
  (lf): Linear(in_features=200, out_features=10, bias=True)
)

In [92]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 10):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        loss_4 = call_pr_loss(model.l4.weight , m_200)
        loss_5 = call_pr_loss(model.l5.weight , m_200)
        loss_6 = call_pr_loss(model.l6.weight , m_200)
        loss_7 = call_pr_loss(model.l7.weight , m_200)
        loss_8 = call_pr_loss(model.l8.weight , m_200)
        
        loss_f = call_pr_loss(model.lf.weight , m_200)
        
        
        loss = loss_0   + loss_1+ loss_2+ loss_3 + loss_4  + loss_5 + loss_6 + loss_7 + loss_8 +loss_f
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 1000  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()
    eva(model , trainloader)


Test set: Average loss: 0.0965, Accuracy: 53898/60000 (90%)


Test set: Average loss: 0.0562, Accuracy: 56148/60000 (94%)


Test set: Average loss: 0.0529, Accuracy: 56350/60000 (94%)


Test set: Average loss: 0.0527, Accuracy: 56374/60000 (94%)


Test set: Average loss: 0.0527, Accuracy: 56377/60000 (94%)


Test set: Average loss: 0.0527, Accuracy: 56378/60000 (94%)


Test set: Average loss: 0.0527, Accuracy: 56378/60000 (94%)


Test set: Average loss: 0.0527, Accuracy: 56378/60000 (94%)


Test set: Average loss: 0.0527, Accuracy: 56378/60000 (94%)



In [74]:
eva(model , trainloader)


Test set: Average loss: 0.0631, Accuracy: 55614/60000 (93%)



# умножили на 1000 и дисперсия 10

In [93]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1', nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2', nn.ReLU())
model.add_module('l3', nn.Linear(200, 200))
model.add_module('r3', nn.ReLU())
model.add_module('l4', nn.Linear(200, 200))
model.add_module('r4', nn.ReLU())
model.add_module('l5', nn.Linear(200, 200))
model.add_module('r5', nn.ReLU())
model.add_module('l6', nn.Linear(200, 200))
model.add_module('r6', nn.ReLU())
model.add_module('l7', nn.Linear(200, 200))
model.add_module('r7', nn.ReLU())
model.add_module('l8', nn.Linear(200, 200))
model.add_module('r8', nn.ReLU())
model.add_module('lf', nn.Linear(200, 10))

model.to(device)

Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=200, bias=True)
  (r3): ReLU()
  (l4): Linear(in_features=200, out_features=200, bias=True)
  (r4): ReLU()
  (l5): Linear(in_features=200, out_features=200, bias=True)
  (r5): ReLU()
  (l6): Linear(in_features=200, out_features=200, bias=True)
  (r6): ReLU()
  (l7): Linear(in_features=200, out_features=200, bias=True)
  (r7): ReLU()
  (l8): Linear(in_features=200, out_features=200, bias=True)
  (r8): ReLU()
  (lf): Linear(in_features=200, out_features=10, bias=True)
)

In [94]:
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 10):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        loss_4 = call_pr_loss(model.l4.weight , m_200)
        loss_5 = call_pr_loss(model.l5.weight , m_200)
        loss_6 = call_pr_loss(model.l6.weight , m_200)
        loss_7 = call_pr_loss(model.l7.weight , m_200)
        loss_8 = call_pr_loss(model.l8.weight , m_200)
        
        loss_f = call_pr_loss(model.lf.weight , m_200)
        
        
        loss = 1000*loss_0   + loss_1+ loss_2+ loss_3 + loss_4  + loss_5 + loss_6 + loss_7 + loss_8 +loss_f
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 1000  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()
    eva(model , trainloader)


Test set: Average loss: 0.5758, Accuracy: 5923/60000 (10%)


Test set: Average loss: 0.5758, Accuracy: 5923/60000 (10%)


Test set: Average loss: 0.5758, Accuracy: 5923/60000 (10%)


Test set: Average loss: 0.5758, Accuracy: 5923/60000 (10%)


Test set: Average loss: 0.5758, Accuracy: 5923/60000 (10%)


Test set: Average loss: 0.5758, Accuracy: 5923/60000 (10%)


Test set: Average loss: 0.5758, Accuracy: 5923/60000 (10%)


Test set: Average loss: 0.5758, Accuracy: 5923/60000 (10%)


Test set: Average loss: 0.5758, Accuracy: 5923/60000 (10%)



In [77]:
eva(model , trainloader)


Test set: Average loss: 0.5760, Accuracy: 5949/60000 (10%)



# все тоже самое но без последнего лосса

In [84]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1', nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2', nn.ReLU())
model.add_module('l3', nn.Linear(200, 200))
model.add_module('r3', nn.ReLU())
model.add_module('l4', nn.Linear(200, 200))
model.add_module('r4', nn.ReLU())
model.add_module('l5', nn.Linear(200, 200))
model.add_module('r5', nn.ReLU())
model.add_module('l6', nn.Linear(200, 200))
model.add_module('r6', nn.ReLU())
model.add_module('l7', nn.Linear(200, 200))
model.add_module('r7', nn.ReLU())
model.add_module('l8', nn.Linear(200, 200))
model.add_module('r8', nn.ReLU())
model.add_module('lf', nn.Linear(200, 10))

model.to(device)

Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=200, bias=True)
  (r3): ReLU()
  (l4): Linear(in_features=200, out_features=200, bias=True)
  (r4): ReLU()
  (l5): Linear(in_features=200, out_features=200, bias=True)
  (r5): ReLU()
  (l6): Linear(in_features=200, out_features=200, bias=True)
  (r6): ReLU()
  (l7): Linear(in_features=200, out_features=200, bias=True)
  (r7): ReLU()
  (l8): Linear(in_features=200, out_features=200, bias=True)
  (r8): ReLU()
  (lf): Linear(in_features=200, out_features=10, bias=True)
)

In [86]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 10):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        loss_4 = call_pr_loss(model.l4.weight , m_200)
        loss_5 = call_pr_loss(model.l5.weight , m_200)
        loss_6 = call_pr_loss(model.l6.weight , m_200)
        loss_7 = call_pr_loss(model.l7.weight , m_200)
        loss_8 = call_pr_loss(model.l8.weight , m_200)
        
        #loss_f = call_pr_loss(model.lf.weight , m_200)
        
        
        loss = loss_0   + loss_1+ loss_2+ loss_3 + loss_4  + loss_5 + loss_6 + loss_7 + loss_8 #+loss_f
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 1000  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()
    eva(model , trainloader)


Test set: Average loss: 0.0621, Accuracy: 56138/60000 (94%)


Test set: Average loss: 0.0424, Accuracy: 57140/60000 (95%)


Test set: Average loss: 0.0398, Accuracy: 57307/60000 (96%)


Test set: Average loss: 0.0397, Accuracy: 57303/60000 (96%)


Test set: Average loss: 0.0397, Accuracy: 57302/60000 (96%)


Test set: Average loss: 0.0397, Accuracy: 57302/60000 (96%)


Test set: Average loss: 0.0397, Accuracy: 57302/60000 (96%)


Test set: Average loss: 0.0397, Accuracy: 57302/60000 (96%)


Test set: Average loss: 0.0397, Accuracy: 57302/60000 (96%)



In [None]:
eva(model , trainloader)

In [95]:
model = nn.Sequential()
model.add_module('l1', nn.Linear(784, 200))
model.add_module('r1', nn.ReLU())
model.add_module('l2', nn.Linear(200, 200))
model.add_module('r2', nn.ReLU())
model.add_module('l3', nn.Linear(200, 200))
model.add_module('r3', nn.ReLU())
model.add_module('l4', nn.Linear(200, 200))
model.add_module('r4', nn.ReLU())
model.add_module('l5', nn.Linear(200, 200))
model.add_module('r5', nn.ReLU())
model.add_module('l6', nn.Linear(200, 200))
model.add_module('r6', nn.ReLU())
model.add_module('l7', nn.Linear(200, 200))
model.add_module('r7', nn.ReLU())
model.add_module('l8', nn.Linear(200, 200))
model.add_module('r8', nn.ReLU())
model.add_module('lf', nn.Linear(200, 10))

model.to(device)

Sequential(
  (l1): Linear(in_features=784, out_features=200, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=200, out_features=200, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=200, out_features=200, bias=True)
  (r3): ReLU()
  (l4): Linear(in_features=200, out_features=200, bias=True)
  (r4): ReLU()
  (l5): Linear(in_features=200, out_features=200, bias=True)
  (r5): ReLU()
  (l6): Linear(in_features=200, out_features=200, bias=True)
  (r6): ReLU()
  (l7): Linear(in_features=200, out_features=200, bias=True)
  (r7): ReLU()
  (l8): Linear(in_features=200, out_features=200, bias=True)
  (r8): ReLU()
  (lf): Linear(in_features=200, out_features=10, bias=True)
)

In [96]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)

model.train()
step = 0 
loss_history = []
for epoch in range(1, 10):
    for batch_idx, (data, target) in enumerate(trainloader):
        
        data, target = data.to(device), target.to(device)
        data = data.view(data.size(0), -1)
        optimizer.zero_grad()
        output = model(data)
        loss_0 = criterion(output, target)
        
        loss_1 = call_pr_loss(model.l1.weight , m_784)
        
        loss_2 = call_pr_loss(model.l2.weight , m_200)
        loss_3 = call_pr_loss(model.l3.weight , m_200)
        loss_4 = call_pr_loss(model.l4.weight , m_200)
        loss_5 = call_pr_loss(model.l5.weight , m_200)
        loss_6 = call_pr_loss(model.l6.weight , m_200)
        loss_7 = call_pr_loss(model.l7.weight , m_200)
        loss_8 = call_pr_loss(model.l8.weight , m_200)
        
        #loss_f = call_pr_loss(model.lf.weight , m_200)
        
        
        loss = 1000*loss_0   + loss_1+ loss_2+ loss_3 + loss_4  + loss_5 + loss_6 + loss_7 + loss_8 #+loss_f
        loss.backward()
        optimizer.step()
        step += 1
        loss_history.append(loss.item())
        if step % 1000  == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                epoch, batch_idx * len(data), len(trainloader.dataset),
                100. * batch_idx / len(trainloader), loss.item()))
    scheduler.step()
    eva(model , trainloader)


Test set: Average loss: 0.0817, Accuracy: 54989/60000 (92%)


Test set: Average loss: 0.0509, Accuracy: 56513/60000 (94%)


Test set: Average loss: 0.0478, Accuracy: 56669/60000 (94%)


Test set: Average loss: 0.0476, Accuracy: 56680/60000 (94%)


Test set: Average loss: 0.0475, Accuracy: 56683/60000 (94%)


Test set: Average loss: 0.0475, Accuracy: 56683/60000 (94%)


Test set: Average loss: 0.0475, Accuracy: 56683/60000 (94%)


Test set: Average loss: 0.0475, Accuracy: 56683/60000 (94%)


Test set: Average loss: 0.0475, Accuracy: 56683/60000 (94%)



In [83]:
eva(model , trainloader)


Test set: Average loss: 0.0656, Accuracy: 55585/60000 (93%)

