# Семинарские задания

1. Написать функцию, переводящую изображение в матрицу столбцов - im2col(). На вход функция принимает изображение и размер свёртки, возвращает столбцы.

In [1]:
import numpy as np

def im2col(image, kernel_size):
    kH, kW = kernel_size

    if len(image.shape) == 3:
        H, W, C = image.shape
        image_padded = image
        is_color = True
    elif len(image.shape) == 2:
        H, W = image.shape
        image_padded = image
        C = 1
        is_color = False
    else:
        raise ValueError("Image must be either 2D (grayscale) or 3D (color).")

    out_height = H - kH + 1
    out_width = W - kW + 1

    cols = []

    for y in range(out_height):
        for x in range(out_width):
            if is_color:
                patch = image_padded[y:y+kH, x:x+kW, :]
                patch_flattened = patch.reshape(-1)
            else:
                patch = image_padded[y:y+kH, x:x+kW]
                patch_flattened = patch.flatten()
            cols.append(patch_flattened)

    cols = np.array(cols).T
    return cols

2. Написать функцию свёртки, которая работает без циклов. Вместо циклов, она использует im2col(), для перевода изображения в набор столбцов.

In [6]:
def convolve_with_im2col(image, kernel, stride=1, padding=0):
    kH, kW = kernel.shape

    # Flatten the kernel
    kernel_flattened = kernel.flatten()

    cols = im2col(image, (kH, kW))

    result = np.dot(kernel_flattened, cols)

    # Reshape the result to output dimensions
    out_height = (image.shape[0] + 2 * padding - kH) // stride + 1
    out_width = (image.shape[1] + 2 * padding - kW) // stride + 1

    return result.reshape(out_height, out_width)

3. Сравнить результаты с torch.nn.Conv2d

In [7]:
import torch
import torch.nn as nn
from torch.nn.functional import conv2d

image_np = np.random.rand(8, 8).astype(np.float32)  # Random 8x8 image
kernel_np = np.array([[1, 0, -1], [1, 0, -1], [1, 0, -1]], dtype=np.float32)  # 3x3 kernel

custom_output = convolve_with_im2col(image_np, kernel_np, stride=1, padding=0)

image_torch = torch.tensor(image_np).unsqueeze(0).unsqueeze(0)  # Add batch and channel dimensions
kernel_torch = torch.tensor(kernel_np).unsqueeze(0).unsqueeze(0)  # Add out_channels and in_channels dimensions

conv2d_layer = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=0, bias=False)
conv2d_layer.weight.data = kernel_torch  # Set custom kernel weights

torch_output = conv2d_layer(image_torch).squeeze().detach().numpy()

print("Custom im2col-based Convolution Output:\n", custom_output)
print("Torch Conv2d Output:\n", torch_output)

if np.allclose(custom_output, torch_output, atol=1e-5):
    print("The outputs match!")
else:
    print("The outputs do not match.")

Custom im2col-based Convolution Output:
 [[-0.26550096 -0.03610522 -0.1310561   0.1083349   0.49020314  0.864507  ]
 [-0.36179203  0.15752113  0.5345055   0.70685256  0.17434216 -0.37750995]
 [-0.41467357 -0.43938458  0.60901916  0.52416074  0.31242156  0.38971603]
 [ 0.24081478 -0.62062234 -0.80593014  0.6867244   0.42825848  0.30668533]
 [-0.0899545  -1.0970418  -0.14868686  0.6273838   0.06175262  0.8289969 ]
 [ 0.2581129  -1.3067826   0.44732565  1.4011619  -0.39400145 -0.639328  ]]
Torch Conv2d Output:
 [[-0.26550096 -0.03610516 -0.13105613  0.1083349   0.4902032   0.864507  ]
 [-0.36179197  0.15752119  0.5345055   0.70685256  0.17434216 -0.37750995]
 [-0.41467357 -0.43938458  0.6090193   0.5241606   0.3124215   0.38971606]
 [ 0.24081483 -0.62062234 -0.80593014  0.6867244   0.42825848  0.30668533]
 [-0.08995456 -1.0970418  -0.14868686  0.6273838   0.06175262  0.8289969 ]
 [ 0.2581129  -1.3067826   0.44732565  1.4011619  -0.39400145 -0.639328  ]]
The outputs match!


# Лабораторная работа

1. Задача классификации изображений (https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html?highlight=mnist). Повторить тренировку модели (train) и запустить классификацию изображений (inference).
2. Получить максимальную точность классификации (минимальный loss) путём изменения модели, например, добавлением скрытых слоёв.
3. По возможности обучить на GPU.

In [11]:
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize images
])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Using device: cuda:0
Files already downloaded and verified
Files already downloaded and verified


In [15]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)  # 3 input channels, 6 output, 5x5 kernel
        self.pool = nn.MaxPool2d(2, 2)   # 2x2 max pooling
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)  # Flatten
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [14]:
class ImprovedModel(nn.Module):
    def __init__(self):
        super(ImprovedModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.5)  # Dropout regularization

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [24]:
def train(model, trainloader, criterion, optimizer, device, epochs=10):
    model.to(device)
    model.train()

    print("Starting Training...")
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # Get inputs and labels, and move to the correct device
            inputs, labels = data[0].to(device), data[1].to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if (i + 1) % 2000 == 0:
                print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 2000:.3f}")
                running_loss = 0.0

    print("Finished Training")

In [30]:
model = CNNModel()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train(model, trainloader, criterion, optimizer, device, epochs=5)

torch.save(model.state_dict(), "cifar_net.pth")

Starting Training...
[Epoch 1, Batch 2000] loss: 1.881
[Epoch 1, Batch 4000] loss: 1.621
[Epoch 1, Batch 6000] loss: 1.536
[Epoch 1, Batch 8000] loss: 1.480
[Epoch 1, Batch 10000] loss: 1.426
[Epoch 1, Batch 12000] loss: 1.403
[Epoch 2, Batch 2000] loss: 1.307
[Epoch 2, Batch 4000] loss: 1.317
[Epoch 2, Batch 6000] loss: 1.293
[Epoch 2, Batch 8000] loss: 1.289
[Epoch 2, Batch 10000] loss: 1.282
[Epoch 2, Batch 12000] loss: 1.231
[Epoch 3, Batch 2000] loss: 1.176
[Epoch 3, Batch 4000] loss: 1.210
[Epoch 3, Batch 6000] loss: 1.191
[Epoch 3, Batch 8000] loss: 1.196
[Epoch 3, Batch 10000] loss: 1.174
[Epoch 3, Batch 12000] loss: 1.177
[Epoch 4, Batch 2000] loss: 1.111
[Epoch 4, Batch 4000] loss: 1.126
[Epoch 4, Batch 6000] loss: 1.118
[Epoch 4, Batch 8000] loss: 1.131
[Epoch 4, Batch 10000] loss: 1.136
[Epoch 4, Batch 12000] loss: 1.093
[Epoch 5, Batch 2000] loss: 1.066
[Epoch 5, Batch 4000] loss: 1.068
[Epoch 5, Batch 6000] loss: 1.078
[Epoch 5, Batch 8000] loss: 1.071
[Epoch 5, Batch 100

In [34]:
model = CNNModel().to(device)
model.load_state_dict(torch.load("cifar_net.pth", weights_only=False))

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on 10,000 test images: {100 * correct / total:.2f}%")

Accuracy on 10,000 test images: 59.46%


In [32]:
model = ImprovedModel()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train(model, trainloader, criterion, optimizer, device, epochs=5)

torch.save(model.state_dict(), "improved_cifar_net.pth")

Starting Training...
[Epoch 1, Batch 2000] loss: 1.866
[Epoch 1, Batch 4000] loss: 1.574
[Epoch 1, Batch 6000] loss: 1.454
[Epoch 1, Batch 8000] loss: 1.400
[Epoch 1, Batch 10000] loss: 1.341
[Epoch 1, Batch 12000] loss: 1.318
[Epoch 2, Batch 2000] loss: 1.226
[Epoch 2, Batch 4000] loss: 1.200
[Epoch 2, Batch 6000] loss: 1.200
[Epoch 2, Batch 8000] loss: 1.173
[Epoch 2, Batch 10000] loss: 1.184
[Epoch 2, Batch 12000] loss: 1.154
[Epoch 3, Batch 2000] loss: 1.069
[Epoch 3, Batch 4000] loss: 1.071
[Epoch 3, Batch 6000] loss: 1.072
[Epoch 3, Batch 8000] loss: 1.094
[Epoch 3, Batch 10000] loss: 1.090
[Epoch 3, Batch 12000] loss: 1.070
[Epoch 4, Batch 2000] loss: 0.991
[Epoch 4, Batch 4000] loss: 0.994
[Epoch 4, Batch 6000] loss: 0.982
[Epoch 4, Batch 8000] loss: 1.018
[Epoch 4, Batch 10000] loss: 1.030
[Epoch 4, Batch 12000] loss: 1.033
[Epoch 5, Batch 2000] loss: 0.922
[Epoch 5, Batch 4000] loss: 0.960
[Epoch 5, Batch 6000] loss: 0.962
[Epoch 5, Batch 8000] loss: 0.970
[Epoch 5, Batch 100

In [35]:
model = ImprovedModel().to(device)

model.load_state_dict(torch.load("improved_cifar_net.pth", weights_only=False))

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on 10,000 test images: {100 * correct / total:.2f}%")

Accuracy on 10,000 test images: 63.87%
