In [2]:
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor

## Импорт Датасетов

In [3]:
training_data = datasets.FashionMNIST(
    root = "data", 
    train = True,
    download = True,
    transform = ToTensor(),
)

testing_data = datasets.FashionMNIST(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor(),
)

In [4]:
batch_size = 64

train_dataloader = torch.utils.data.DataLoader(training_data, batch_size = batch_size)
test_dataloader = torch.utils.data.DataLoader(testing_data, batch_size = batch_size)

for X, y in test_dataloader:
    print(f"X_test shape: {X.shape} | y_test shape: {y.shape}")
    break

X_test shape: torch.Size([64, 1, 28, 28]) | y_test shape: torch.Size([64])


## Классическая нейронная сеть (Полносвязные слои с функцией активации ReLU)

Для обучения используется SGD

In [5]:
device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else "cpu"

class NeuralNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = torch.nn.Flatten()
        self.linear_relu_stack = torch.nn.Sequential(
            torch.nn.Linear(28*28, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 10)
        )

    def forward(self, X):
        X = self.flatten(X)
        logits = self.linear_relu_stack(X)

        return logits

nn_model = NeuralNet().to(device)

print(nn_model)        

NeuralNet(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [6]:
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(nn_model.parameters(), lr = 1e-3)

def train(dataloader, model, loss_func, optimizer):
    size = len(dataloader.dataset)
    model.train()

    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        preds = model(X)
        preds_proba = torch.nn.Softmax(dim = 1)(preds)
        loss = loss_func(preds, y)

        #backprop
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"Loss: {loss} | {current} / {size}")

def test(dataloader, model, loss_func):
    size = len(dataloader.dataset)
    batches_amount = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)

            preds = model(X)
            test_loss += loss_func(preds, y).item()
            correct += (preds.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= batches_amount
    correct /= size

    print(f"Test || Loss: {test_loss:>5f} || Accuracy: {correct*100:>.2f}%")


epochs = 8
for e in range(epochs):
    print(f"Epoch {e+1} \n ===============================================")
    train(train_dataloader, nn_model, loss_func, optimizer)
    test(test_dataloader, nn_model, loss_func)

Epoch 1 
Loss: 2.304490804672241 | 64 / 60000
Loss: 2.292654037475586 | 6464 / 60000
Loss: 2.2741780281066895 | 12864 / 60000
Loss: 2.2638673782348633 | 19264 / 60000
Loss: 2.2513375282287598 | 25664 / 60000
Loss: 2.212019443511963 | 32064 / 60000
Loss: 2.2316904067993164 | 38464 / 60000
Loss: 2.189394235610962 | 44864 / 60000
Loss: 2.1826353073120117 | 51264 / 60000
Loss: 2.148253917694092 | 57664 / 60000
Test || Loss: 2.148624 || Accuracy: 30.02%
Epoch 2 
Loss: 2.1606557369232178 | 64 / 60000
Loss: 2.153538227081299 | 6464 / 60000
Loss: 2.098604440689087 | 12864 / 60000
Loss: 2.105241060256958 | 19264 / 60000
Loss: 2.0628411769866943 | 25664 / 60000
Loss: 1.9925140142440796 | 32064 / 60000
Loss: 2.0258476734161377 | 38464 / 60000
Loss: 1.9448362588882446 | 44864 / 60000
Loss: 1.9432240724563599 | 51264 / 60000
Loss: 1.8591110706329346 | 57664 / 60000
Test || Loss: 1.876250 || Accuracy: 55.48%
Epoch 3 
Loss: 1.9106634855270386 | 64 / 60000
Loss: 1.8799083232879639 | 6464 / 60000
Loss:

## state_dict ()

In [7]:
for param_tensor in nn_model.state_dict():
    print(param_tensor, "\t", nn_model.state_dict()[param_tensor].size())

linear_relu_stack.0.weight 	 torch.Size([512, 784])
linear_relu_stack.0.bias 	 torch.Size([512])
linear_relu_stack.2.weight 	 torch.Size([512, 512])
linear_relu_stack.2.bias 	 torch.Size([512])
linear_relu_stack.4.weight 	 torch.Size([10, 512])
linear_relu_stack.4.bias 	 torch.Size([10])


## Свёрточная нейронная сеть (+ использование нормирования)

Для обучения использовалась Adam

In [8]:
import torchvision

transform = torchvision.transforms.Compose(
    [ToTensor(),
    torchvision.transforms.Normalize((.5), (.5))]
)

train_set = torchvision.datasets.FashionMNIST(
    root = "data_cnn",
    train = True,
    download = True,
    transform = transform,
)

test_set = torchvision.datasets.FashionMNIST(
    root = "data_cnn",
    train = False,
    download = True,
    transform = transform,
)

In [9]:
batch_size = 64

train_dataloader = torch.utils.data.DataLoader(train_set, batch_size = batch_size)
test_dataloader = torch.utils.data.DataLoader(test_set, batch_size = batch_size)

for X, y in test_dataloader:
    print(f"X_test.shape: {X.shape} | y_test.shape: {y.shape}")
    break

X_test.shape: torch.Size([64, 1, 28, 28]) | y_test.shape: torch.Size([64])


In [None]:
class ConvolutionalNet(torch.nn.Module):
    def __init__(self):
        super(ConvolutionalNet, self).__init__()
        
        self.first_layer = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size = 3, padding = 1),
            torch.nn.BatchNorm2d(32),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size = 2, stride = 2),
        )

        self.second_layer = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, padding = 0),
            torch.nn.BatchNorm2d(64),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size = 2),
        )

        self.fc1 = torch.nn.Linear(in_features = 64*6*6, out_features = 600)
        self.drop = torch.nn.Dropout(.25) #drop neurons with probability .25
        self.fc2 = torch.nn.Linear(in_features = 600, out_features = 120)
        self.fc3 = torch.nn.Linear(in_features = 120, out_features = 10)


    def forward(self, X):
        out_sequential1 = self.first_layer(X)
        out_sequential2 = self.second_layer(out_sequential1)
        out_sequential2 = out_sequential2.view(out_sequential2.size(0), -1)
        out_fc1 = self.fc1(out_sequential2)
        out_drop = self.drop(out_fc1)
        out_fc2 = self.fc2(out_drop)
        out_fc3 = self.fc3(out_fc2)

        return out_fc3

## Переопределим некоторые уже известные переменные для удобства

In [None]:
cnn_model = ConvolutionalNet().to(device)
print(cnn_model)

ConvolutionalNet(
  (first_layer): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (second_layer): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=2304, out_features=600, bias=True)
  (drop): Dropout(p=0.25, inplace=False)
  (fc2): Linear(in_features=600, out_features=120, bias=True)
  (fc3): Linear(in_features=120, out_features=10, bias=True)
)


In [None]:
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr = 1e-3)

epochs = 5
for e in range(epochs):
    print(f"Epoch {e+1} \n ===================================")
    train(train_dataloader, cnn_model, loss_func, optimizer)
    test(test_dataloader, cnn_model, loss_func)

Epoch 1 
Loss: 2.339294910430908 | 64 / 60000
Loss: 0.452540785074234 | 6464 / 60000
Loss: 0.30578184127807617 | 12864 / 60000
Loss: 0.5669863224029541 | 19264 / 60000
Loss: 0.44785287976264954 | 25664 / 60000
Loss: 0.5179901719093323 | 32064 / 60000
Loss: 0.2588045299053192 | 38464 / 60000
Loss: 0.543664276599884 | 44864 / 60000
Loss: 0.39096739888191223 | 51264 / 60000
Loss: 0.22987543046474457 | 57664 / 60000
Test || Loss: 0.376832 || Accuracy: 86.73%
Epoch 2 
Loss: 0.23029367625713348 | 64 / 60000
Loss: 0.299391508102417 | 6464 / 60000
Loss: 0.1706864833831787 | 12864 / 60000
Loss: 0.43205392360687256 | 19264 / 60000
Loss: 0.5364901423454285 | 25664 / 60000
Loss: 0.4061213433742523 | 32064 / 60000
Loss: 0.20886099338531494 | 38464 / 60000
Loss: 0.4511266052722931 | 44864 / 60000
Loss: 0.29469287395477295 | 51264 / 60000
Loss: 0.23984776437282562 | 57664 / 60000
Test || Loss: 0.289221 || Accuracy: 89.89%
Epoch 3 
Loss: 0.15684136748313904 | 64 / 60000
Loss: 0.3420168459415436 | 6464

In [None]:
for param_tensor in cnn_model.state_dict():
    print(param_tensor, "\t", cnn_model.state_dict()[param_tensor].size())

first_layer.0.weight 	 torch.Size([32, 1, 3, 3])
first_layer.0.bias 	 torch.Size([32])
first_layer.1.weight 	 torch.Size([32])
first_layer.1.bias 	 torch.Size([32])
first_layer.1.running_mean 	 torch.Size([32])
first_layer.1.running_var 	 torch.Size([32])
first_layer.1.num_batches_tracked 	 torch.Size([])
second_layer.0.weight 	 torch.Size([64, 32, 3, 3])
second_layer.0.bias 	 torch.Size([64])
second_layer.1.weight 	 torch.Size([64])
second_layer.1.bias 	 torch.Size([64])
second_layer.1.running_mean 	 torch.Size([64])
second_layer.1.running_var 	 torch.Size([64])
second_layer.1.num_batches_tracked 	 torch.Size([])
fc1.weight 	 torch.Size([600, 2304])
fc1.bias 	 torch.Size([600])
fc2.weight 	 torch.Size([120, 600])
fc2.bias 	 torch.Size([120])
fc3.weight 	 torch.Size([10, 120])
fc3.bias 	 torch.Size([10])
