In [1]:
# 1) Design model (input size, output size, forward pass)
# 2) Construct loss and optimizer
# 3) Training loop
#    - forward pass: compute prediction and loss
#    - backward pass: gradients
#    - update weights

In [1]:
import torch
import torchvision # some builtin datasets
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import numpy as np
import math

In [3]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis = 0)

x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print(outputs)

x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim = 0)
print(outputs)

[0.65900114 0.24243297 0.09856589]
tensor([0.6590, 0.2424, 0.0986])


In [5]:
def cross_entropy(y, y_pred):
    loss = -np.sum(y*np.log(y_pred))
    return loss

Y = np.array([1, 0, 0])
Y_pred_1 = np.array([0.7, 0.2, 0.1])
Y_pred_2 = np.array([0.1, 0.3, 0.6])
l1 = cross_entropy(Y, Y_pred_1)
l2 = cross_entropy(Y, Y_pred_2)
print(f'l1:{l1:.4f}')
print(f'l2:{l2:.4f}')

l1:0.3567
l2:2.3026


In [9]:
# nn.CrossEntropyLoss
# it already has softmax included

loss = nn.CrossEntropyLoss()

# Assume: nsamples x nclasss = n x 3
Y = torch.tensor([0]) #not one-hot encoded
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(f'l1:{l1:.4f}')
print(f'l2:{l2:.4f}')

_, prediction1 = torch.max(Y_pred_good, 1)
_, prediction2 = torch.max(Y_pred_bad, 1)
print(prediction1, prediction2)

l1:0.4170
l2:1.8406
tensor([0]) tensor([1])


In [10]:
# Multiple samples
Y = torch.tensor([2, 0, 1]) #not one-hot encoded
Y_pred_good = torch.tensor([[0.1, 1.0, 2.1], [2.0, 1.0, 0.1], [0.1, 3.0, 0.1]])
Y_pred_bad  = torch.tensor([[0.5, 2.0, 0.3], [2.0, 1.0, 0.1], [2.0, 1.0, 0.1]])

l1 = loss(Y_pred_good, Y)
l2 = loss(Y_pred_bad, Y)

print(f'l1:{l1:.4f}')
print(f'l2:{l2:.4f}')

value1, prediction1 = torch.max(Y_pred_good, 1)
value2, prediction2 = torch.max(Y_pred_bad, 1)
print(prediction1, prediction2)

l1:0.3018
l2:1.2916
tensor([2, 0, 1]) tensor([1, 0, 0])


In [13]:
# Multiclass
class MultiCLSNeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MultiCLSNeuralNet, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        return out

model = MultiCLSNeuralNet(input_size = 28*28, hidden_size = 5, num_classes = 3)
criterion = nn.CrossEntropyLoss() #Softmax is already included

In [17]:
# Binaryclass + Sigmoid + BCELoss()
class BinaryCLSNeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(BinaryCLSNeuralNet, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred

model = BinaryCLSNeuralNet(input_size = 28*28, hidden_size = 5)
criterion = nn.BCELoss()