# Gradient Descent



In [3]:
import torch

# we invoke requires_grad=True when we want to find this weight
x = torch.randn(3,requires_grad=True)
y = x + 2
z = y*y*2 #<-- not scalar
#z = z.mean()
z

tensor([0.0473, 0.0029, 0.2275], grad_fn=<MulBackward0>)

In [0]:
# if z is not scaler, we need to create a v vector that is the same size as z
# https://medium.com/unit8-machine-learning-publication/computing-the-jacobian-matrix-of-a-neural-network-in-python-4f162e5db180
v = torch.randn(3)
v

tensor([-0.9347,  0.3953,  0.2299])

In [0]:
z.backward(v,retain_graph=True) #ok after doing this
x.grad

tensor([-32.5362,  20.9846,   5.4597])

Calculate weights without computation

In [0]:
x = torch.randn(3,requires_grad=True)
x

# x.requires_grad_(False)
# x.detach()
# with torch.no_grad():

tensor([1.0512, 0.0848, 0.1704], requires_grad=True)

In [0]:
# option 1
x.requires_grad_(False)
x

tensor([ 2.3975,  1.0253, -1.4252])

In [0]:
# option 2
y=x.detach()
y

tensor([ 0.2284, -0.0936,  1.3454])

In [0]:
# option 3
with torch.no_grad():
  y = x + 2
  print(y)


tensor([3.0512, 2.0848, 2.1704])


Failed Example on how NOT to reset the grad ( fail to reset )

In [4]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
  model_output = (weights *3).sum()
  model_output.backward()

  print(weights.grad) # the gradients are accumulated in every loop
  #weights.grad.zero_() #reset zero

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


We managed to reset the grad here

In [0]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
  model_output = (weights *3).sum()
  model_output.backward()

  print(weights.grad) # the gradients are NOT summed in every loop
  weights.grad.zero_() #reset zero

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


Simple Backprop

In [6]:
import torch

x = torch.tensor(1.0)
y = torch.tensor(2.0)
w = torch.tensor(1.0,requires_grad=True)

# forward pass and compute the loss and local gradient
y_hat = w*x
loss = (y_hat - y)**2

print(loss)

# backward pass
loss.backward()
print(w.grad)

## update weights
## next forward backward pass

w.sub(w.grad)
w

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


tensor(1., requires_grad=True)

Gradient Descent from Scratch using numpy



In [0]:
import torch
np.random.seed(42)

# f = w * x
# f = 2 * x

X = np.array([1,2,3,4],dtype = np.float32)
Y = np.array([2,4,6,8],dtype = np.float32)

#initialize the weight
#w = np.random.rand()
w = 0.0
print(w)

# model prediction
def forward(x):
  return w * x

# loss = MSE
def loss(y, y_hat):
  return ((y_hat - y)**2).mean()

# gradient
# MSE = 1/N * (w*x - y)**2
# dJ/dw = 1/N 2x (w*x-y)

def gradient(x,y,y_hat):
  return np.dot(2*x,y_hat-y)/x.size

print(f'Prediction before training : f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = forward(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients
  dw = gradient(X,Y,y_hat)

  # update weights
  w -= learning_rate * dw

  if epoch % 10==0:
    print(f'epoch: {epoch+1}, weight : {w:.3f}, loss ={l:.8f}')

print(f'Prediction after training : f(5) = {forward(5):.3f}')

0.0
Prediction before training : f(5) = 0.000
epoch: 1, weight : 0.300, loss =30.00000000
epoch: 11, weight : 1.665, loss =1.16278565
epoch: 21, weight : 1.934, loss =0.04506905
epoch: 31, weight : 1.987, loss =0.00174685
epoch: 41, weight : 1.997, loss =0.00006770
epoch: 51, weight : 1.999, loss =0.00000262
epoch: 61, weight : 2.000, loss =0.00000010
epoch: 71, weight : 2.000, loss =0.00000000
epoch: 81, weight : 2.000, loss =0.00000000
epoch: 91, weight : 2.000, loss =0.00000000
Prediction after training : f(5) = 10.000


Gradient Descent - Autograd

PyTorch Tutorial 06 - Training Pipeline: Model, Loss, and Optimizer

## Step by step <p>
1) Design model ( input, output size, forward pass )<p>
2) Construct loss and optimizer<p>
3) Training loop<p>

- forward pass : compute prediction
- backward pass : gradients
- update weights

In [0]:
import torch
np.random.seed(42)

# f = w * x
# f = 2 * x

X = torch.tensor([1,2,3,4],dtype = torch.float32)
Y = torch.tensor([2,4,6,8],dtype = torch.float32)

#initialize the weight
w = torch.tensor(0,dtype = torch.float32, requires_grad=True) 
print(w)

# model prediction
def forward(x):
  return w * x

# loss = MSE
def loss(y, y_hat):
  return ((y_hat - y)**2).mean()

print(f'Prediction before training : f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = forward(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw
  print(w.grad)

  # update weights
  with torch.no_grad(): #ensure the gradient is not calculated
    w.sub_(learning_rate*w.grad)

  # zero the gradients to ensure it's not accumulated
  w.grad.zero_() #reset zero

  if epoch % 10==0:
    print(f'epoch: {epoch+1}, weight : {w:.3f}, loss ={l:.8f}')

print(f'Prediction after training : f(5) = {forward(5):.3f}')

tensor(0., requires_grad=True)
Prediction before training : f(5) = 0.000
tensor(-30.)
epoch: 1, weight : 0.300, loss =30.00000000
tensor(-25.5000)
tensor(-21.6750)
tensor(-18.4238)
tensor(-15.6602)
tensor(-13.3112)
tensor(-11.3145)
tensor(-9.6173)
tensor(-8.1747)
tensor(-6.9485)
tensor(-5.9062)
epoch: 11, weight : 1.665, loss =1.16278565
tensor(-5.0203)
tensor(-4.2673)
tensor(-3.6272)
tensor(-3.0831)
tensor(-2.6206)
tensor(-2.2275)
tensor(-1.8934)
tensor(-1.6094)
tensor(-1.3680)
tensor(-1.1628)
epoch: 21, weight : 1.934, loss =0.04506890
tensor(-0.9884)
tensor(-0.8401)
tensor(-0.7141)
tensor(-0.6070)
tensor(-0.5159)
tensor(-0.4385)
tensor(-0.3728)
tensor(-0.3168)
tensor(-0.2693)
tensor(-0.2289)
epoch: 31, weight : 1.987, loss =0.00174685
tensor(-0.1946)
tensor(-0.1654)
tensor(-0.1406)
tensor(-0.1195)
tensor(-0.1016)
tensor(-0.0863)
tensor(-0.0734)
tensor(-0.0624)
tensor(-0.0530)
tensor(-0.0451)
epoch: 41, weight : 1.997, loss =0.00006770
tensor(-0.0383)
tensor(-0.0326)
tensor(-0.0277)


In [0]:
import torch
import torch.nn as nn 

# f = w * x
# f = 2 * x

# 0) Training samples, watch the shape!
# bias are taken care of
X = torch.tensor([[1],[2],[3],[4]],dtype = torch.float32)
Y = torch.tensor([[2],[4],[6],[8]],dtype = torch.float32)

# test sample
X_test = torch.tensor([5],dtype = torch.float32)

n_samples,n_features = X.shape
print(n_samples, n_features)

# 1) Design Model, the model has to implement the forward pass!
# Here we can use a built-in model from PyTorch

input_size = n_features
output_size = n_features

model = nn.Linear(input_size,output_size)

print(f'Prediction before training : f(5) = {model(X_test).item():.3f}')

# Training
learning_rate = 0.01
n_iters = 100

# Loss
loss = nn.MSELoss() 

# optimzie the weights
# bias all included
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = model(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw
  print(w.grad)

  # update weights
  optimizer.step()
  
  # zero gradients
  optimizer.zero_grad()

  # zero the gradients to ensure it's not accumulated
  w.grad.zero_() #reset zero

  if epoch % 10==0:
    [w,b] = model.parameters()
    print(f'epoch: {epoch+1}, weight : {w[0][0].item():.3f}, loss ={l:.8f}')

print(f'Prediction after training : f(5) = {model(X_test).item():.3f}')

4 1
Prediction before training : f(5) = -2.434
tensor(0.)
epoch: 1, weight : 0.020, loss =49.51508331
tensor([[-32.0879]])
tensor([[-26.7319]])
tensor([[-22.2705]])
tensor([[-18.5541]])
tensor([[-15.4583]])
tensor([[-12.8796]])
tensor([[-10.7315]])
tensor([[-8.9422]])
tensor([[-7.4516]])
tensor([[-6.2100]])
epoch: 11, weight : 1.633, loss =1.28582001
tensor([[-5.1758]])
tensor([[-4.3142]])
tensor([[-3.5966]])
tensor([[-2.9988]])
tensor([[-2.5008]])
tensor([[-2.0859]])
tensor([[-1.7404]])
tensor([[-1.4525]])
tensor([[-1.2127]])
tensor([[-1.0130]])
epoch: 21, weight : 1.894, loss =0.03773768
tensor([[-0.8466]])
tensor([[-0.7080]])
tensor([[-0.5925]])
tensor([[-0.4963]])
tensor([[-0.4161]])
tensor([[-0.3494]])
tensor([[-0.2937]])
tensor([[-0.2474]])
tensor([[-0.2088]])
tensor([[-0.1766]])
epoch: 31, weight : 1.937, loss =0.00518674
tensor([[-0.1498]])
tensor([[-0.1275]])
tensor([[-0.1089]])
tensor([[-0.0933]])
tensor([[-0.0804]])
tensor([[-0.0696]])
tensor([[-0.0606]])
tensor([[-0.0531]])

In [0]:
#1) Design model ( input, output size, forward pass )
#2) Construct loss and optimizer
#3) Training loop
#forward pass : compute prediction
#backward pass : gradients
#update weights

import torch
import torch.nn as nn

#np.random.seed(42)

# f = w * x
# f = 2 * x

# 0) Training samples, watch the shape!
# bias are taken care of
X = torch.tensor([[1],[2],[3],[4]],dtype = torch.float32)
Y = torch.tensor([[2],[4],[6],[8]],dtype = torch.float32)

X_test = torch.tensor([[5]],dtype = torch.float32)
n_samples,n_features = X.shape
print(X.shape)

input_size = n_features
output_size = n_features

model = nn.Linear(input_size,output_size)

#initialize the weight
w = torch.tensor(0.0,dtype = torch.float32, requires_grad=True) 

# model prediction
def forward(x):
  return w * x

# Training
learning_rate = 0.01
n_iters = 100

# loss = MSE
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

print(f'Prediction before training : f(5) = {model(X_test).item():.3f}')

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = model(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw

  # update weights
  #with torch.no_grad(): #ensure the gradient is not calculated
  optimizer.step()

  # zero the gradients to ensure it's not accumulated
  optimizer.zero_grad() #reset zero

  if epoch % 10==0:
    w,b = model.parameters()
    print(f'epoch: {epoch+1}, weight : {w.item():.3f}, loss ={l.item():.8f}')

print(f'Prediction after training : f(5) = {model(X_test).item():.3f}')

torch.Size([4, 1])
Prediction before training : f(5) = -1.191
epoch: 1, weight : 0.030, loss =36.22446060
epoch: 11, weight : 1.415, loss =1.08311355
epoch: 21, weight : 1.646, loss =0.16543093
epoch: 31, weight : 1.690, loss =0.13369039
epoch: 41, weight : 1.705, loss =0.12533686
epoch: 51, weight : 1.715, loss =0.11802671
epoch: 61, weight : 1.723, loss =0.11115661
epoch: 71, weight : 1.732, loss =0.10468666
epoch: 81, weight : 1.739, loss =0.09859339
epoch: 91, weight : 1.747, loss =0.09285477
Prediction after training : f(5) = 9.493


In [0]:
#1) Design model ( input, output size, forward pass )
#2) Construct loss and optimizer
#3) Training loop
#forward pass : compute prediction
#backward pass : gradients
#update weights

import torch
import torch.nn as nn

#np.random.seed(42)

# f = w * x
# f = 2 * x

# 0) Training samples, watch the shape!
# bias are taken care of
X = torch.tensor([[1],[2],[3],[4]],dtype = torch.float32)
Y = torch.tensor([[2],[4],[6],[8]],dtype = torch.float32)

X_test = torch.tensor([[5]],dtype = torch.float32)
n_samples,n_features = X.shape
print(X.shape)

input_size = n_features
output_size = n_features

#model = nn.Linear(input_size,output_size)

#initialize the weight
w = torch.tensor(0.0,dtype = torch.float32, requires_grad=True) 

# model prediction
def forward(x):
  return w * x

class LinearRegression(nn.Module):
  
  def __init__(self,input_dim,output_dim):
    super(LinearRegression,self).__init__()
    # define layers
    self.lin = nn.Linear(input_dim,output_dim)
  
  def forward(self,x):
    return self.lin(x)

model = LinearRegression(input_size,output_size)


# Training
learning_rate = 0.01
n_iters = 100

# loss = MSE
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

print(f'Prediction before training : f(5) = {model(X_test).item():.3f}')

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = model(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw

  # update weights
  #with torch.no_grad(): #ensure the gradient is not calculated
  optimizer.step()

  # zero the gradients to ensure it's not accumulated
  optimizer.zero_grad() #reset zero

  if epoch % 10==0:
    w,b = model.parameters()
    print(f'epoch: {epoch+1}, weight : {w.item():.3f}, loss ={l.item():.8f}')

print(f'Prediction after training : f(5) = {model(X_test).item():.3f}')

torch.Size([4, 1])
Prediction before training : f(5) = 3.698
epoch: 1, weight : 1.046, loss =13.34873009
epoch: 11, weight : 1.881, loss =0.34783754
epoch: 21, weight : 2.015, loss =0.01132941
epoch: 31, weight : 2.035, loss =0.00248756
epoch: 41, weight : 2.038, loss =0.00213108
epoch: 51, weight : 2.037, loss =0.00200157
epoch: 61, weight : 2.036, loss =0.00188492
epoch: 71, weight : 2.035, loss =0.00177521
epoch: 81, weight : 2.034, loss =0.00167188
epoch: 91, weight : 2.033, loss =0.00157456
Prediction after training : f(5) = 10.066


# Linear  Regression

### Step by step <p>
1) Design model ( input, output size, forward pass )<p>
2) Construct loss and optimizer<p>
3) Training loop<p>

- forward pass : compute prediction
- backward pass : gradients
- update weights

### General Steps <p>
0) prepare data <p>
1) Model <p>
2) loss and optimizer <p>
3) training loop <p>

In [0]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [0]:
# 0) prepare data
bc = datasets.load_breast_cancer()
X,y = bc.data, bc.target

set(y) #binary response ( logistic regression)

n_samples, n_features = X.shape
print(n_samples,n_features)

X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=1234)

# scale ( normalize to mean = 0 , s.d = 1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#convert numpy to tensor
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

#reshape to (len(tensor),1)
y_train = y_train.view(y_train.shape[0],1)
y_test = y_test.view(y_test.shape[0],1)

# 1) Model
# f = wx + b,sigmoid in the end since it's binary

class LogisticRegression(nn.Module):
  
  def __init__(self,n_input_features):
    super(LogisticRegression,self).__init__()
    # define layers
    self.linear = nn.Linear(n_input_features,1) #output is only 1

  def forward(self,x): # x is input data
    y_predicted = torch.sigmoid(self.linear(x))
    return y_predicted 

model = LogisticRegression(n_features) 

# 2) loss and optimizer
# Training
learning_rate = 0.01

criterion = nn.BCELoss() #binary loss
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

# 3) training loop
num_epochs = 500
for epoch in range(num_epochs):

  # prediction = forward pass
  y_predicted = model(X_train)
  
  # loss
  loss=criterion(y_predicted,y_train)

  # gradients = backward pass
  loss.backward() # dl/dw

  # update weights
  optimizer.step()

  # zero the gradients to ensure it's not accumulated
  optimizer.zero_grad() #reset zero

  if (epoch+1) % 20==0:
    w,b = model.parameters()
    print(f'epoch: {epoch+1}, loss ={l.item():.4f}')

  with torch.no_grad(): #ensure the gradient is not calculated
    y_predicted = model(X_test) # will return a probability(sigmoid)
    y_predicted_cls = y_predicted.round()
    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f'accuracy = {acc:.4f}')


569 30
accuracy = 0.0965
accuracy = 0.1140
accuracy = 0.1491
accuracy = 0.1842
accuracy = 0.2018
accuracy = 0.2018
accuracy = 0.2895
accuracy = 0.3333
accuracy = 0.3509
accuracy = 0.4035
accuracy = 0.4649
accuracy = 0.5263
accuracy = 0.5351
accuracy = 0.5965
accuracy = 0.6140
accuracy = 0.6579
accuracy = 0.6930
accuracy = 0.7018
accuracy = 0.7456
epoch: 20, loss =0.0015
accuracy = 0.7632
accuracy = 0.7807
accuracy = 0.7807
accuracy = 0.7807
accuracy = 0.7807
accuracy = 0.7982
accuracy = 0.8070
accuracy = 0.8070
accuracy = 0.8070
accuracy = 0.8158
accuracy = 0.8333
accuracy = 0.8333
accuracy = 0.8333
accuracy = 0.8333
accuracy = 0.8333
accuracy = 0.8333
accuracy = 0.8333
accuracy = 0.8333
accuracy = 0.8333
accuracy = 0.8333
epoch: 40, loss =0.0015
accuracy = 0.8333
accuracy = 0.8333
accuracy = 0.8421
accuracy = 0.8509
accuracy = 0.8596
accuracy = 0.8596
accuracy = 0.8596
accuracy = 0.8596
accuracy = 0.8596
accuracy = 0.8684
accuracy = 0.8684
accuracy = 0.8684
accuracy = 0.8684
accuracy 

Dataset and DataLoader - Batch Training

gradient computation etc. not efficient for whole data set
 -> divide dataset into small batches

'''
training loop
for epoch in range(num_epochs):
    # loop over all batches
    for i in range(total_batches):
        batch_x, batch_y = ...

epoch = one forward and backward pass of ALL training samples
batch_size = number of training samples used in one forward/backward pass
number of iterations = number of passes, each pass (forward+backward) using [batch_size] number of sampes
e.g : 100 samples, batch_size=20 -> 100/20=5 iterations for 1 epoch

In [0]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [0]:
class WineDataset(Dataset):

  def __init__(self):
    # data loading
    file = 'https://raw.githubusercontent.com/python-engineer/pytorchTutorial/master/data/wine/wine.csv'
    xy = np.loadtxt(file,delimiter =",",dtype=np.float32,skiprows=1)
    self.x = torch.from_numpy(xy[:,1:])
    self.y = torch.from_numpy(xy[:,[0]])
    self.n_samples = xy.shape[0]

  def __getitem__(self,index):
    return self.x[index],self.y[index]

  def __len__(self):
    # len(dataset)
    return self.n_samples

dataset = WineDataset()
#first_data = dataset[0]
#features,labels = first_data
batch_size = 4
dataloader = DataLoader(dataset=dataset, batch_size=batch_size,shuffle=True,num_workers = 2)
dataiter = iter(dataloader)
data = dataiter.next()
features,labels = data
print(features,labels)

# training loop
num_epochs = 2
total_samples = len(dataset)
learning_rate = 0.01
n_iterations = math.ceil(total_samples/batch_size)
print(total_samples,n_iterations)

for epoch in range(num_epochs):
  for iter,(inputs,labels) in enumerate(dataloader):
    # forward, backward, update
    if (iter+1) % 5==0:
      print(f'epoch :{epoch+1}/{num_epochs},step :{iter+1}/{len(dataloader)},inputs :{inputs.shape}')

tensor([[1.3050e+01, 1.7300e+00, 2.0400e+00, 1.2400e+01, 9.2000e+01, 2.7200e+00,
         3.2700e+00, 1.7000e-01, 2.9100e+00, 7.2000e+00, 1.1200e+00, 2.9100e+00,
         1.1500e+03],
        [1.3580e+01, 2.5800e+00, 2.6900e+00, 2.4500e+01, 1.0500e+02, 1.5500e+00,
         8.4000e-01, 3.9000e-01, 1.5400e+00, 8.6600e+00, 7.4000e-01, 1.8000e+00,
         7.5000e+02],
        [1.1960e+01, 1.0900e+00, 2.3000e+00, 2.1000e+01, 1.0100e+02, 3.3800e+00,
         2.1400e+00, 1.3000e-01, 1.6500e+00, 3.2100e+00, 9.9000e-01, 3.1300e+00,
         8.8600e+02],
        [1.1840e+01, 2.8900e+00, 2.2300e+00, 1.8000e+01, 1.1200e+02, 1.7200e+00,
         1.3200e+00, 4.3000e-01, 9.5000e-01, 2.6500e+00, 9.6000e-01, 2.5200e+00,
         5.0000e+02]]) tensor([[1.],
        [3.],
        [2.],
        [2.]])


import

In [0]:
import torch
import torchvision
from torch.utils.data import Dataset
import numpy as np

class WineDataset(Dataset):

  def __init__(self,transform = None): #transform is optional
    # data loading
    file = 'https://raw.githubusercontent.com/python-engineer/pytorchTutorial/master/data/wine/wine.csv'
    xy = np.loadtxt(file,delimiter =",",dtype=np.float32,skiprows=1)


    self.x = xy[:,1:]
    self.y = xy[:,[0]]
    self.transform = transform

  def __getitem__(self,index):
    sample =  self.x[index],self.y[index]

    if self.transform: #
      sample=self.transform(sample)

    return sample

  def __len__(self):
    # len(dataset)
    return self.n_samples

class ToTensor:
  def __call__(self,sample):
    inputs, targets = sample
    return torch.from_numpy(inputs),torch.from_numpy(targets)

class MulTransform:
  def __init__(self,factor):
    self.factor = factor
  
  def __call__(self,sample):
    inputs,target = sample
    inputs *=self.factor
    return inputs,target

print('Without Transform')
dataset = WineDataset()
first_data = dataset[0]
features, labels = first_data
print(type(features), type(labels))
print(features, labels)

print('\nWith Tensor Transform')
dataset = WineDataset(transform = ToTensor())
first_data = dataset[0]
features,labels = first_data
print(type(features),type(labels))
print(features, labels)

print('\nWith Tensor and Multiplication Transform')
composed = torchvision.transforms.Compose([ToTensor(),MulTransform(2)])
dataset = WineDataset(transform=composed)
first_data = dataset[0]
features,labels = first_data
print(type(features),type(labels))
print(features, labels)

Without Transform
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
[1.423e+01 1.710e+00 2.430e+00 1.560e+01 1.270e+02 2.800e+00 3.060e+00
 2.800e-01 2.290e+00 5.640e+00 1.040e+00 3.920e+00 1.065e+03] [1.]

With Tensor Transform
<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])

With Tensor and Multiplication Transform
<class 'torch.Tensor'> <class 'torch.Tensor'>
tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
        6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
        2.1300e+03]) tensor([1.])


PyTorch Tutorial 11 - Softmax and Cross Entropy<p>


Softmax

In [0]:
import torch
import torch.nn as nn
import numpy as np

def softmax(x):
  return np.exp(x)/ np.sum(np.exp(x),axis=0)

x= np.array([2.0,1.0,0.1])
outputs = softmax(x)
print('softmax numpy:',outputs)

softmax numpy: [0.65900114 0.24243297 0.09856589]


In [0]:
x = torch.tensor([2.0,1.0,0.1])
torch.softmax(x,dim=0) #along the first axis


tensor([0.6590, 0.2424, 0.0986])

Cross Entropy


In [0]:
def cross_entropy(actual,predicted):
  loss = - np.sum(actual * np.log(predicted))
  return loss

# y must be one hot encoded
# if class 0: [1 0 0]
# if class 1: [0 1 0]
# if class 2: [0 0 1]

Y = np.array([1,0,0])

# y_pred has probabilities
Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])

l1 = cross_entropy(Y,Y_pred_good)
l2 = cross_entropy(Y,Y_pred_bad)

print(f'Loss1 numpy : {l1:.4f}') #lower 
print(f'Loss2 numpy : {l2:.4f}')

Loss1 numpy : 0.3567
Loss2 numpy : 2.3026


In [0]:
loss = nn.CrossEntropyLoss() #Softmax at the last layer not required
# nn.CrossEntropyLoss() = nn.LogSoftmax + nn.NLLLoss (negative log likelihood loss)

# Y actual ( no One Hot required )
# Y_pred has raw scores(logits), requires softmax 

Y = torch.tensor([0])
# nsamples x nclasses = 1 x 3
Y_pred_good = torch.tensor([2.0,1.0,0.1]).view(1,3)
Y_pred_bad = torch.tensor([[0.5,2.0,0.1]]) #or can be written as this

l1 = loss ( Y_pred_good,Y )
l2 = loss ( Y_pred_bad, Y )

print(f'Loss1 numpy : {l1.item()}') #lower 
print(f'Loss2 numpy : {l2.item()}')

_, predictions1 = torch.max(Y_pred_good,1)
_, predictions2 = torch.max(Y_pred_bad,1)
print(predictions1)
print(predictions2)

Loss1 numpy : 0.4170299470424652
Loss2 numpy : 1.8167786598205566
tensor([0])
tensor([1])


In [0]:
loss = nn.CrossEntropyLoss() #Softmax at the last layer not required
# nn.CrossEntropyLoss() = nn.LogSoftmax + nn.NLLLoss (negative log likelihood loss)

# Y actual ( no One Hot required )
# Y_pred has raw scores(logits), requires softmax 

Y = torch.tensor([2])
# nsamples x nclasses = 1 x 3
Y_pred_bad = torch.tensor([2.0,1.0,0.1]).view(1,3)
Y_pred_good = torch.tensor([[0.5,2.0,2]]) #or can be written as this

#l1 and l2 is swapped
l1 = loss ( Y_pred_good,Y )
l2 = loss ( Y_pred_bad, Y )

print(f'Loss1 numpy : {l1.item()}') #lower 
print(f'Loss2 numpy : {l2.item()}') 

_, predictions1 = torch.max(Y_pred_good,dim=1)
_, predictions2 = torch.max(Y_pred_bad,dim=1)
print(predictions1)
print(predictions2)


Loss1 numpy : 0.798916220664978
Loss2 numpy : 2.3170299530029297
tensor([2])
tensor([0])


# Multiple samples

In [0]:
loss = nn.CrossEntropyLoss() #Softmax at the last layer not required
# nn.CrossEntropyLoss() = nn.LogSoftmax + nn.NLLLoss (negative log likelihood loss)

# Y actual ( no One Hot required )
# Y_pred has raw scores(logits), requires softmax 

Y = torch.tensor([2,0,1])

# nsamples x nclasses = 3 x 3
Y_pred_good = torch.tensor(
    [[0.1, 0.2, 3.9], # predict class 2
    [1.2, 0.1, 0.3], # predict class 0
    [0.3, 2.2, 0.2]]) # predict class 1

Y_pred_bad = torch.tensor(
    [[0.9, 0.2, 0.1],
    [0.1, 0.3, 1.5],
    [1.2, 0.2, 0.5]])

l1 = loss ( Y_pred_good,Y )
l2 = loss ( Y_pred_bad, Y )

print(f'Loss1 numpy : {l1.item()}') #lower 
print(f'Loss2 numpy : {l2.item()}')

_, predictions1 = torch.max(Y_pred_good,dim=1)
_, predictions2 = torch.max(Y_pred_bad,dim=1)

print(predictions1)
print(predictions2)

Loss1 numpy : 0.28342217206954956
Loss2 numpy : 1.6418448686599731
tensor([2, 0, 1])
tensor([0, 2, 0])
