In [80]:
import os
import torch 
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor

In [6]:
device = 'cuda'

In [103]:
torch.set_default_device(device)

In [182]:
class MyNeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512), 
            nn.ReLU(), 
            nn.Linear(512, 256), 
            nn.ReLU(), 
            nn.Linear(256, 10)
        )

        print(self.flatten)

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)

        return logits
        
    
        

In [183]:
model = MyNeuralNetwork()
model

Flatten(start_dim=1, end_dim=-1)


MyNeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=256, bias=True)
    (5): ReLU()
    (6): Linear(in_features=256, out_features=10, bias=True)
  )
)

In [184]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([7], device='cuda:0')


In [185]:
pred_probab = nn.Softmax(dim=1)(logits)
pred_probab
y_pred = pred_probab.argmax(1)
y_pred

tensor([7], device='cuda:0')

In [186]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [187]:
flatten_image = nn.Flatten()(input_image)
flatten_image.size()

torch.Size([3, 784])

In [112]:
layer = nn.Linear(28 * 28, 5)

In [113]:
hidden= layer(flatten_image)
hidden.shape

torch.Size([3, 5])

In [114]:
print('before relu', hidden)
print('after relu', nn.ReLU()(hidden))

before relu tensor([[-0.2456,  0.1949, -0.0324, -0.2991, -0.5780],
        [-0.2370,  0.7251, -0.0850, -0.3289, -0.2571],
        [ 0.0226,  0.2963, -0.0331, -0.4357, -0.3695]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
after relu tensor([[0.0000, 0.1949, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.7251, 0.0000, 0.0000, 0.0000],
        [0.0226, 0.2963, 0.0000, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<ReluBackward0>)


In [115]:
input = torch.ones(5)

w = torch.randn((5, 3), requires_grad=True)
b = torch.randn(3, requires_grad=True)

y = torch.zeros(3)

z = torch.matmul(input, w) + b

loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)



In [116]:
input, y, w, b

(tensor([1., 1., 1., 1., 1.], device='cuda:0'),
 tensor([0., 0., 0.], device='cuda:0'),
 tensor([[-1.0013,  1.0017,  1.1053],
         [ 2.9527,  0.5332,  1.4619],
         [-1.2628, -0.1155,  1.2776],
         [ 0.6971,  0.8375,  1.6511],
         [ 2.1808, -0.9795,  0.4869]], device='cuda:0', requires_grad=True),
 tensor([ 1.3432,  0.5157, -1.3781], device='cuda:0', requires_grad=True))

In [117]:
z.grad_fn

<AddBackward0 at 0x7f4d07eb3c10>

In [118]:
loss.grad_fn

<BinaryCrossEntropyWithLogitsBackward0 at 0x7f4d07eb0100>

In [119]:
loss.backward()

In [120]:
print(w.grad)
print(b.grad)

tensor([[0.3309, 0.2858, 0.3300],
        [0.3309, 0.2858, 0.3300],
        [0.3309, 0.2858, 0.3300],
        [0.3309, 0.2858, 0.3300],
        [0.3309, 0.2858, 0.3300]], device='cuda:0')
tensor([0.3309, 0.2858, 0.3300], device='cuda:0')


In [121]:
z.requires_grad

True

In [188]:
learning_rate = 1e-3
batch_size = 512
epochs = 10

In [189]:
train_datasets = datasets.FashionMNIST(
    root='data',
    download=True, 
    train=True, 
    transform=ToTensor()
)
train_datasets.data.to(device)
test_datasets = datasets.FashionMNIST(
    root='data',
    download=True,
    train=False, 
    transform=ToTensor()
)

train_loader = DataLoader(train_datasets, batch_size=batch_size)
test_loader = DataLoader(test_datasets, batch_size=batch_size)

In [190]:
loss = nn.CrossEntropyLoss()

In [191]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [192]:
def train_loop(train_loader, model, loss_fn, optimizer):
    size = len(train_loader.dataset)
    model.train()

    for batch, (X, y) in enumerate(train_loader):
        X = X.to(device)
        y = y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        
        
    

In [193]:
def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
     # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [194]:
train_loader.dataset

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [197]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 100
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, model, loss_fn, optimizer)
    test_loop(test_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.782308  [  512/60000]
loss: 0.791410  [51712/60000]
Test Error: 
 Accuracy: 70.2%, Avg loss: 0.793590 

Epoch 2
-------------------------------
loss: 0.779005  [  512/60000]
loss: 0.788441  [51712/60000]
Test Error: 
 Accuracy: 70.3%, Avg loss: 0.790651 

Epoch 3
-------------------------------
loss: 0.775759  [  512/60000]
loss: 0.785532  [51712/60000]
Test Error: 
 Accuracy: 70.3%, Avg loss: 0.787764 

Epoch 4
-------------------------------
loss: 0.772573  [  512/60000]
loss: 0.782673  [51712/60000]
Test Error: 
 Accuracy: 70.5%, Avg loss: 0.784924 

Epoch 5
-------------------------------
loss: 0.769437  [  512/60000]
loss: 0.779875  [51712/60000]
Test Error: 
 Accuracy: 70.6%, Avg loss: 0.782132 

Epoch 6
-------------------------------
loss: 0.766349  [  512/60000]
loss: 0.777129  [51712/60000]
Test Error: 
 Accuracy: 70.7%, Avg loss: 0.779386 

Epoch 7
-------------------------------
loss: 0.763308  [  512/60000]
loss: 0.774428  [5

In [198]:
torch.save(model.state_dict(), 'my.pth')

In [200]:
model2 = MyNeuralNetwork()
model2.load_state_dict(torch.load('my.pth'))

Flatten(start_dim=1, end_dim=-1)


<All keys matched successfully>

In [201]:
model2.eval()

MyNeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=256, bias=True)
    (5): ReLU()
    (6): Linear(in_features=256, out_features=10, bias=True)
  )
)

In [202]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model2.parameters(), lr=learning_rate)

epochs = 100
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_loader, model2, loss_fn, optimizer)
    test_loop(test_loader, model2, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.580942  [  512/60000]
loss: 0.610337  [51712/60000]
Test Error: 
 Accuracy: 78.0%, Avg loss: 0.616486 

Epoch 2
-------------------------------
loss: 0.579650  [  512/60000]
loss: 0.609138  [51712/60000]
Test Error: 
 Accuracy: 78.1%, Avg loss: 0.615452 

Epoch 3
-------------------------------
loss: 0.578368  [  512/60000]
loss: 0.607948  [51712/60000]
Test Error: 
 Accuracy: 78.1%, Avg loss: 0.614429 

Epoch 4
-------------------------------
loss: 0.577097  [  512/60000]
loss: 0.606770  [51712/60000]
Test Error: 
 Accuracy: 78.2%, Avg loss: 0.613417 

Epoch 5
-------------------------------
loss: 0.575834  [  512/60000]
loss: 0.605596  [51712/60000]
Test Error: 
 Accuracy: 78.2%, Avg loss: 0.612415 

Epoch 6
-------------------------------
loss: 0.574580  [  512/60000]
loss: 0.604429  [51712/60000]
Test Error: 
 Accuracy: 78.2%, Avg loss: 0.611423 

Epoch 7
-------------------------------
loss: 0.573338  [  512/60000]
loss: 0.603274  [5