In [None]:
# Task 8 
def count_parameters_conv(in_channels: int, out_channels: int, kernel_size: int, bias: bool):
    weight_params = in_channels * kernel_size**2 * out_channels
        
    if bias:
        total_bias = out_channels
        total_params = weight_params + total_bias
    else:
        total_params = weight_params
    return total_params   


In [9]:
result = count_parameters_conv(in_channels=64, out_channels=128, kernel_size=3, bias=True)
result

73856

In [16]:
import torch
import torch.nn as nn
import torchvision.transforms as T
from IPython.display import clear_output
from time import perf_counter
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from tqdm import tqdm
from torch.optim import Optimizer

In [17]:
mnist_train = MNIST(
    "../datasets/mnist",
    train=True,
    download=True,
    transform=T.ToTensor()
)

In [18]:
mnist_valid = MNIST(
    "../datasets/mnist",
    train=False,
    download=True,
    transform=T.ToTensor()
)

In [19]:
train_loader = DataLoader(mnist_train, batch_size = 64, shuffle = True)
valid_loader = DataLoader(mnist_valid, batch_size = 64, shuffle = True)

In [20]:
# Task 9
def train(model: nn.Module, data_loader: DataLoader, optimizer: Optimizer, loss_fn) -> tuple[float, float]:
    model.train()
    
    total_loss = 0
    total = 0
    correct = 0
    loss_fn = nn.CrossEntropyLoss()
    for x, y in tqdm(data_loader, desc='Train'):
        optimizer.zero_grad()

        output = model(x)

        loss = loss_fn(output, y)

        total_loss += loss.item()
        loss.backward()

        optimizer.step()
    train_loss = total_loss / len(data_loader)
    return train_loss


In [None]:
@torch.inference_mode()

def evaluate(model: nn.Module, data_loader: DataLoader, loss_fn) -> tuple[float, float]:
    model.eval()
    total_loss = 0
    total = 0
    correct = 0
    for x, y in tqdm(data_loader, desc='Evaluate'):
        output = model(x)

        loss = loss_fn(output, y)

        total_loss += loss.item()

        _, y_pred = torch.max(output, 1)
        total += y.size(0)
        correct += (y_pred == y).sum().item()
    valid_loss = total_loss / len(data_loader)
    valid_accuracy = correct / total

    return valid_loss, valid_accuracy

In [32]:
def create_mlp_model():
    model = nn.Sequential(
        nn.Flatten(),
        nn.Linear(28 * 28, 256),
        nn.ReLU(),
        nn.Linear(256, 10)
    )
    return model

In [12]:
num_epochs = 15

model = create_mlp_model()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)  # Добавляем оптимизатор
loss_fn = nn.CrossEntropyLoss() 

best_accuracy = 0.0
best_model_weights = None
best_epoch = 0

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, loss_fn)  
    valid_loss, valid_accuracy = evaluate(model, valid_loader, loss_fn)
    
    # Проверяем, является ли текущая модель лучшей
    if valid_accuracy > best_accuracy:
        best_accuracy = valid_accuracy
        best_epoch = epoch + 1
        
print(f'Эпоха: {best_epoch}')
print(f'Точность на валидации: {best_accuracy:.5f}%')


NameError: name 'create_mlp_model' is not defined

In [34]:
torch.save(model.state_dict(), 'model_weights.pt')

In [22]:
from torch import nn

def create_conv_model():
    model = nn.Sequential(
        nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2),
            
        nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2),

        nn.Flatten(),
        nn.Linear(4 * 4 * 64, 256),
        nn.ReLU(),
        nn.Linear(256, 10)
    )
    return model

In [None]:
def create_conv_model():
    model = nn.Sequential(
        # Первый блок - увеличено до 64 каналов (вдвое)
        nn.Conv2d(in_channels=1, out_channels=64, kernel_size=5),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2),
        
        # Второй блок - увеличиваем до 128 каналов (вдвое)
        nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2),
        
        # Добавлен третий сверточный блок
        nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
        nn.ReLU(),
        
        # Добавлен четвертый сверточный блок
        nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2),

        nn.Flatten(),
        # Обновлен размер входа для Linear (учитываем новые слои)
        nn.Linear(2 * 2 * 512, 512),  # Увеличиваем до 512 нейронов
        nn.ReLU(),
        nn.Linear(512, 256),  # Добавлен дополнительный линейный слой
        nn.ReLU(),
        nn.Linear(256, 10)
    )
    return model

In [25]:
num_epochs = 20

model = create_conv_model()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)  # Добавляем оптимизатор
loss_fn = nn.CrossEntropyLoss() 

best_accuracy = 0.0
best_model_weights = None
best_epoch = 0
start = perf_counter()

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, loss_fn)  
    valid_loss, valid_accuracy = evaluate(model, valid_loader, loss_fn)
    
    # Проверяем, является ли текущая модель лучшей
    if valid_accuracy > best_accuracy:
        best_accuracy = valid_accuracy
        best_epoch = epoch + 1
        best_model_weights = model.state_dict().copy()
        
print(f'Эпоха: {best_epoch}')
print(f'Точность на валидации: {best_accuracy:.4f}%')
print(f'Время обученния {perf_counter() - start:.5f}')

Train: 100%|██████████████████████████████████| 938/938 [00:48<00:00, 19.54it/s]
Evaluate: 100%|███████████████████████████████| 157/157 [00:02<00:00, 55.36it/s]
Train: 100%|██████████████████████████████████| 938/938 [00:48<00:00, 19.48it/s]
Evaluate: 100%|███████████████████████████████| 157/157 [00:02<00:00, 55.35it/s]
Train: 100%|██████████████████████████████████| 938/938 [00:48<00:00, 19.49it/s]
Evaluate: 100%|███████████████████████████████| 157/157 [00:02<00:00, 56.02it/s]
Train: 100%|██████████████████████████████████| 938/938 [00:47<00:00, 19.62it/s]
Evaluate: 100%|███████████████████████████████| 157/157 [00:02<00:00, 54.69it/s]
Train: 100%|██████████████████████████████████| 938/938 [00:48<00:00, 19.20it/s]
Evaluate: 100%|███████████████████████████████| 157/157 [00:02<00:00, 54.28it/s]
Train: 100%|██████████████████████████████████| 938/938 [00:47<00:00, 19.58it/s]
Evaluate: 100%|███████████████████████████████| 157/157 [00:02<00:00, 54.12it/s]
Train: 100%|████████████████

Эпоха: 14
Точность на валидации: 0.9937%
Время обученния 1073.99442





In [26]:
torch.save(best_model_weights, 'model_weights_conv10.pth')