In [1]:
Обучить полносвязную модель на MNIST

In [1]:
import torch
import numpy
import argparse
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn as nn

In [2]:
train_dataset = datasets.MNIST('.', download=True, train=True)
test_dataset = datasets.MNIST('.', download=True, train=False)

In [3]:
BATCH_SIZE=128

In [4]:
class LinearModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout_p=0.1):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, hidden_dim)
        nn.init.xavier_normal_(self.linear1.weight)         
        self.activation1 = nn.Sigmoid()
        self.do1 = nn.Dropout(dropout_p)
        self.linear2 = nn.Linear(hidden_dim, hidden_dim)
        self.activation2 = nn.Sigmoid()
        self.do2 = nn.Dropout(dropout_p)
        self.linear3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):                                  
        x = self.linear1(x)
        x = self.activation1(x)
        x = self.do1(x)
        x = self.linear2(x)
        x = self.activation2(x)
        x = self.do2(x)
        x = self.linear3(x)     

        return x

In [5]:
def collate_fn(data: list):
    pics = []
    target = []
    for item in data:
        pics.append(numpy.array(item[0]))
        target.append(item[1])
    pics = torch.from_numpy(numpy.array(pics)).float() / 255 
    target = torch.from_numpy(numpy.array(target))

    return {
        'data': pics.view(pics.size(0), -1), 
        'target': target.long(),
    }


In [6]:
input_dim = 28 * 28
hidden_dim = 128
output_dim = 10
device = torch.device('cuda')
n_epochs = 15
     

In [7]:
model = LinearModel(input_dim , hidden_dim, output_dim).to(device)
optim = torch.optim.Adam(model.parameters()) 
loss_func = nn.CrossEntropyLoss()

In [8]:
%%time
for epoch in range(n_epochs):
    
    trainloader = DataLoader(train_dataset, 
                            batch_size=BATCH_SIZE,
                             shuffle=True, 
                             collate_fn=collate_fn, 
                             drop_last = True)
    
    model.train()
    for i, batch in enumerate(trainloader):
        optim.zero_grad()
        predict = model(batch['data'].to(device))
        loss = loss_func(predict, batch['target'].to(device))
        loss.backward()
        optim.step()
        if i % 200 == 0:
            print(f'epoch: {epoch}, step: {i}, loss_train: {loss.item()}')
        
    testloader = DataLoader(test_dataset, 
                            batch_size=len(test_dataset.data),
                            shuffle=True, 
                            collate_fn=collate_fn, 
                            drop_last = False)
        
    model.eval()
    for i, batch in enumerate(testloader):     
        predict = model(batch['data'].to(device))
        loss = loss_func(predict, batch['target'].to(device))
        print(f'epoch: {epoch}, loss_test: {loss.item()}')



epoch: 0, step: 0, loss_train: 2.3620717525482178
epoch: 0, step: 200, loss_train: 0.6787263751029968
epoch: 0, step: 400, loss_train: 0.33667346835136414
epoch: 0, loss_test: 0.3199388086795807
epoch: 1, step: 0, loss_train: 0.28920456767082214
epoch: 1, step: 200, loss_train: 0.2860707938671112
epoch: 1, step: 400, loss_train: 0.27441006898880005
epoch: 1, loss_test: 0.22577236592769623
epoch: 2, step: 0, loss_train: 0.21219022572040558
epoch: 2, step: 200, loss_train: 0.1912621110677719
epoch: 2, step: 400, loss_train: 0.13444916903972626
epoch: 2, loss_test: 0.1782744824886322
epoch: 3, step: 0, loss_train: 0.19931180775165558
epoch: 3, step: 200, loss_train: 0.09741687774658203
epoch: 3, step: 400, loss_train: 0.07288753986358643
epoch: 3, loss_test: 0.15003250539302826
epoch: 4, step: 0, loss_train: 0.2588886022567749
epoch: 4, step: 200, loss_train: 0.18691866099834442
epoch: 4, step: 400, loss_train: 0.0895182341337204
epoch: 4, loss_test: 0.1321171075105667
epoch: 5, step: 0, 

In [None]:
Обучить глубокую сверточную сеть на MNIST

In [9]:
class ConvModel(nn.Module):
  def __init__(self, input_ch, hidden_ch, output_dim, dropout_p=0.2):
    super().__init__()
    self.conv1 = nn.Conv2d(input_ch, hidden_ch, kernel_size=5, padding=2, stride=2)
    self.bn1 = nn.BatchNorm2d(hidden_ch)
    self.activation1 = nn.Sigmoid()
    self.do1 = nn.Dropout(dropout_p)
    self.conv2 = nn.Conv2d(hidden_ch, hidden_ch, kernel_size=3, padding=1, stride=1)
    self.bn2 = nn.BatchNorm2d(hidden_ch)
    self.activation2 = nn.Sigmoid()
    self.do2 = nn.Dropout(dropout_p)
    self.conv3 = nn.Conv2d(hidden_ch, 15, kernel_size=3, padding=1, stride=1)       
    self.classifier = nn.Linear(15 * 14 *14, output_dim) 
    
    

  def forward(self, x):
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.activation1(x)
    x = self.do1(x)
    x = self.conv2(x)
    x = self.bn2(x)
    x = self.activation2(x)
    x = self.do2(x)
    x = self.conv3(x)
    x = self.classifier(x.view(x.size(0), -1))

    return x


In [10]:
def collate_fn_cv(data: list):

  pics = []
  target = []
  for item in data:
    pics.append(numpy.array(item[0]))
    target.append(item[1])
  pics = torch.from_numpy(numpy.array(pics)).float() / 255 
  target = torch.from_numpy(numpy.array(target))

  return {
      'data': pics.unsqueeze(1), 
      'target': target.long(),
      }

In [11]:
input_ch = 1
hidden_ch = 128
out_dim = 10
device = torch.device('cuda')
n_epochs = 15

In [12]:
model_cv = ConvModel(input_ch, hidden_ch, out_dim).to(device)
optim = torch.optim.Adam(model_cv.parameters())
loss_func = nn.CrossEntropyLoss()

In [13]:
%%time
for epoch in range(n_epochs):
        
    trainloader = DataLoader(train_dataset, 
                             batch_size=BATCH_SIZE,
                             shuffle=True, 
                             collate_fn=collate_fn_cv, 
                             drop_last = True)
       
    model_cv.train()  
    for i, batch in enumerate(trainloader):      
        optim.zero_grad()
        predict = model_cv(batch['data'].to(device))
        loss = loss_func(predict, batch['target'].to(device))
        loss.backward()
        optim.step()
        if i % 200 == 0:
            print(f'epoch: {epoch}, step: {i}, train_loss: {loss.item()}')
    
    testloader = DataLoader(test_dataset, 
                            batch_size=len(test_dataset.data),
                            shuffle=True, 
                            collate_fn=collate_fn_cv, 
                            drop_last = False)
    
    model_cv.eval()
    for i, batch in enumerate(testloader): 
        predict = model_cv(batch['data'].to(device))
        loss = loss_func(predict, batch['target'].to(device))
        print(f'epoch: {epoch}, test_loss: {loss.item()}')

epoch: 0, step: 0, train_loss: 2.327293872833252
epoch: 0, step: 200, train_loss: 0.3287416696548462
epoch: 0, step: 400, train_loss: 0.1748921424150467
epoch: 0, test_loss: 0.18799753487110138
epoch: 1, step: 0, train_loss: 0.33881354331970215
epoch: 1, step: 200, train_loss: 0.13034258782863617
epoch: 1, step: 400, train_loss: 0.1458151936531067
epoch: 1, test_loss: 0.09573335200548172
epoch: 2, step: 0, train_loss: 0.10515721887350082
epoch: 2, step: 200, train_loss: 0.050386495888233185
epoch: 2, step: 400, train_loss: 0.13618828356266022
epoch: 2, test_loss: 0.06599275767803192
epoch: 3, step: 0, train_loss: 0.021876882761716843
epoch: 3, step: 200, train_loss: 0.09318149834871292
epoch: 3, step: 400, train_loss: 0.1667996346950531
epoch: 3, test_loss: 0.060153521597385406
epoch: 4, step: 0, train_loss: 0.11107579618692398
epoch: 4, step: 200, train_loss: 0.12923818826675415
epoch: 4, step: 400, train_loss: 0.09675615280866623
epoch: 4, test_loss: 0.057879138737916946
epoch: 5, st