In [68]:
#%pip install torch torchvision


In [69]:
import torch

x = torch.tensor([1,2,3]) # tensor is n-dimensional array
x

tensor([1, 2, 3])

In [70]:
random_tensor = torch.rand(2,3) # random tensor, 2 rows, 3 columns
random_tensor

tensor([[0.4853, 0.1795, 0.6111],
        [0.7453, 0.1173, 0.8784]])

In [71]:
zeros_tensor = torch.zeros(2,3)
zeros_tensor

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [72]:
ones_tensor = torch.ones(2,3)
ones_tensor

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [73]:
x + ones_tensor

tensor([[2., 3., 4.],
        [2., 3., 4.]])

In [74]:
a = torch.tensor([[1.,2], [3,4]])
b = torch.tensor([[5.,6], [7,8]])
a @ b # torch.matmul(a,b)

tensor([[19., 22.],
        [43., 50.]])

In [75]:
# elementwise
a*b

tensor([[ 5., 12.],
        [21., 32.]])

In [76]:
# Sum
a.sum()

tensor(10.)

In [77]:
a.mean()

tensor(2.5000)

In [78]:
# cpu, cuda, mps and possibly other frameworks 

# torch.cuda.is_available()
device = torch.device("cpu") # for ordinary cpu
device = torch.device("cpu") # for CUDA
device = torch.device("mps") # for Apple Silicon

In [79]:
# Gradients 

x = torch.tensor(2.0, requires_grad=True) # mode: propagate the gradients over all computation steps
y = x**2 + 3*x +5
# 2*x +3 -> 2*2 + 3 = 7
y.backward() # compute gradients using backpropagation algorithm
x.grad

tensor(7.)

- easy NN, one gradient descent step

In [80]:
import torch.nn as nn # Neural Networks 
import torch.optim as optim # optimizer framework for gradient methods 

# 10 inputs, 1 output -> fully connected feed-forward neural network
NN = nn.Linear(10,1) # W*x + b, in Tensorflow -> Dense
# MLP -> Multi-Layer Perceptron 
# in Literature/Publications: FC, FFN, FFNN, MLP 

loss = nn.MSELoss() # mean squared error
# loss, cost, criterion, crit 
optimizer = optim.SGD(NN.parameters(), lr = 1e-2)
# lr = learning rate, eta, alpha 
# NN.parameters() ... weights and biases

input_data = torch.rand(10) # random stuff, X 
output = NN(input_data) # y_pred, predictions, y_hat 
y = torch.ones(1) # ground truth, target, regr 

# initial value of the loss function 
loss_output = loss(y, output) # Difference between reality and expectation 
print(f"Loss: {loss_output:.2} (before update)")

# two magical lines
loss_output.backward() # compute gradients
optimizer.step() # Update the weights and biases 

output_new = NN(input_data) # here are new weights and biases 
loss_new = loss(output_new,y) # value of the loss function after the update
print(f"Loss: {loss_new:.2} (after update)")


Loss: 0.092 (before update)
Loss: 0.077 (after update)


In [81]:
y

tensor([1.])

In [82]:
output # before Update

tensor([0.6965], grad_fn=<ViewBackward0>)

In [83]:
output_new

tensor([0.7227], grad_fn=<ViewBackward0>)

# FashionMNIST dataset 
- modern HelloWorld for NNs (by Zalando)

In [84]:
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor(), # convert to tensor
])


train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

In [85]:
1024/64

16.0

In [86]:
# Model 

class NN(nn.Module): # class 

    def __init__(self):
        super(NN,self).__init__() # init from the superclass
        self.layer1 = nn.Linear(28*28, 128 ) # input layer 
        self.bn1 = nn.BatchNorm1d(128)
        self.layer2 = nn.Linear(128, 64) # hidden layer
        self.bn2 = nn.BatchNorm1d(64)
        self.layer3 = nn.Linear(64 ,10)         # output layer, 10 number of product categories
        self.drop = nn.Dropout(0.3) # 30 % of activations are set to zero
        # 0.1 -> 0.2 -> ... -> 0.5 
    
    # First way to apply BatchNorm (preactivation)
    def forward(self,x): # propagate the information through the network
        x = x.view(-1, 28*28) # flatten 2D -> 1D 
        x = torch.relu(self.bn1(self.layer1(x))) # activation using ReLU
        x = self.drop(x) 
        x = torch.relu(self.bn2(self.layer2(x))) 
        x = self.drop(x) # here dropout is usually placed 
        x = self.layer3(x) # identity activation -> logit (no need to compute gradients of softmax)
        return x 
    # Second way to apply BatchNorm (postactivation)
    def forward(self,x): # propagate the information through the network
        x = x.view(-1, 28*28) # flatten 2D -> 1D 
        x = torch.relu(self.layer1(x)) # activation using ReLU
        x = self.bn1(x)
        x = self.drop(x) 
        x = torch.relu(self.layer2(x))
        x = self.bn2(x)
        x = self.drop(x) # here dropout is usually placed 
        x = self.layer3(x) # identity activation -> logit (no need to compute gradients of softmax)
        return x 


In [None]:
# Now training

model = NN()
lr = 1e-2 # learning rate
loss = nn.CrossEntropyLoss() # CE because multi-class problem
optimizer = optim.SGD(model.parameters(), lr = lr )

n_epochs = 20 

for epoch in range(n_epochs):
    model.train() # train mode
    running_loss = 0.0 # loss per epoch
    for images, labels in train_loader:
        optimizer.zero_grad() # reset the gradients
        # forward 
        outputs = model(images) # calculate outputs
        curr_loss = loss(outputs,labels)
        running_loss += curr_loss
        # backward
        curr_loss.backward() # gradients
        optimizer.step()     # update weights and biases
    print(f"Epoch[{epoch + 1}/{n_epochs}], Loss: {running_loss}")


Epoch[1/40], Loss: 556.85595703125
Epoch[2/40], Loss: 448.8724365234375
Epoch[3/40], Loss: 418.54931640625
Epoch[4/40], Loss: 404.9556579589844
Epoch[5/40], Loss: 393.7926025390625
Epoch[6/40], Loss: 378.41943359375
Epoch[7/40], Loss: 372.0762634277344
Epoch[8/40], Loss: 368.5741271972656
Epoch[9/40], Loss: 370.3645935058594
Epoch[10/40], Loss: 365.1058044433594
Epoch[11/40], Loss: 355.17913818359375
Epoch[12/40], Loss: 358.1354064941406
Epoch[13/40], Loss: 351.5055236816406
Epoch[14/40], Loss: 351.2488708496094
Epoch[15/40], Loss: 347.54376220703125
Epoch[16/40], Loss: 343.14398193359375
Epoch[17/40], Loss: 339.050537109375
Epoch[18/40], Loss: 337.1782531738281
Epoch[19/40], Loss: 335.6484069824219
Epoch[20/40], Loss: 331.34918212890625
Epoch[21/40], Loss: 329.5379943847656
Epoch[22/40], Loss: 323.7035217285156
Epoch[23/40], Loss: 324.279296875
Epoch[24/40], Loss: 320.00604248046875
Epoch[25/40], Loss: 319.33050537109375
Epoch[26/40], Loss: 318.7980041503906
Epoch[27/40], Loss: 320.01

In [88]:
len(train_dataset)

60000

In [89]:
len(test_dataset)

10000

In [93]:
# Evaluation 

model.eval() # setting the model to evaluation mode (implementation optimization)
correct = 0
total = 0

with torch.no_grad(): # we are not interested in gradient anymore 
    for images, labels in test_loader:
        outputs = model(images)
        predicted = torch.max(outputs.data, 1)[-1] 
        total += labels.size(0) 
        correct += (predicted==labels).sum().item()

accuracy = correct/total
print(f"Accuracy: {accuracy*100:.2f}%")



Accuracy: 87.67%
