<a href="https://colab.research.google.com/github/WHU-Peter/COMP6200-Project/blob/main/project_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import math

# fix random seed for reproducibility
seed = 7
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
import numpy as np
np.random.seed(seed)

In [None]:
# flatten 28*28 images to a 784 vector for each image
transform = transforms.Compose([
    transforms.ToTensor(),  # convert to tensor
    transforms.Lambda(lambda x: x.view(-1))  # flatten into vector
])

trainset = MNIST(".", train=True, download=True, transform=transform)
testset = MNIST(".", train=False, download=True, transform=transform)

# create data loaders
trainloader = DataLoader(trainset, batch_size=128, shuffle=True)
testloader = DataLoader(testset, batch_size=128, shuffle=True)

class_counts = torch.zeros(10, dtype=torch.int32)

for (images, labels) in trainloader:
  for label in labels:
    class_counts[label] += 1

assert class_counts.sum()==60000

Baseline model, two linear layers. 

In [None]:
# define baseline model
class BaselineModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(BaselineModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.fc2 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = F.relu(out)
        out = self.fc2(out)
        if not self.training:
            out = F.softmax(out, dim=1)
        return out

In [None]:
# build the model 
model = BaselineModel(784, 784, 10)

# define the loss function and the optimiser
loss_function = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters())

# the epoch loop
for epoch in range(10):
    running_loss = 0.0
    for data in trainloader:
        # get the inputs
        inputs, labels = data

        # zero the parameter gradients
        optimiser.zero_grad()

        # forward + loss + backward + optimise (update weights)
        outputs = model(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimiser.step()

        # keep track of the loss this epoch
        running_loss += loss.item()
    print("Epoch %d, loss %4.2f" % (epoch, running_loss))
print('**** Finished Training ****')

Epoch 0, loss 133.89
Epoch 1, loss 51.05
Epoch 2, loss 32.49
Epoch 3, loss 23.12
Epoch 4, loss 16.61
Epoch 5, loss 12.50
Epoch 6, loss 9.49
Epoch 7, loss 6.82
Epoch 8, loss 5.10
Epoch 9, loss 4.38
**** Finished Training ****


In [None]:
# Compute the model accuracy on the test set
class_correct = torch.zeros(10)
class_total = torch.zeros(10)
model.eval()
for (images, labels) in testloader:

  for label in labels:
    class_total[label] += 1
  
  outputs = model(images)
  for i in range(outputs.shape[0]):
    _, prediction = outputs[i].max(0)
    if prediction == labels[i]:
      class_correct[labels[i]] += 1

for i in range(10):
    print('Class %d accuracy: %2.2f %%' % (i, 100.0*class_correct[i] / class_total[i]))

Class 0 accuracy: 99.08 %
Class 1 accuracy: 98.94 %
Class 2 accuracy: 97.77 %
Class 3 accuracy: 96.34 %
Class 4 accuracy: 97.35 %
Class 5 accuracy: 98.21 %
Class 6 accuracy: 98.23 %
Class 7 accuracy: 97.76 %
Class 8 accuracy: 98.25 %
Class 9 accuracy: 98.51 %


learnable temperature parameter

In [None]:
class SoftMaxWithTemperature(nn.Module):
    def __init__(self):
        super(SoftMaxWithTemperature, self).__init__()
        self.temperature = nn.Parameter(torch.tensor(1.))
    def forward(self, x):
        # print(self.temperature)
        return F.softmax(x / self.temperature, dim=-1)

In [None]:
# define LUTModel
class LUTModelWithLearnableTemperature(nn.Module):
    def __init__(self, input_size, hidden_size1,hidden_size2, num_classes):
        super(LUTModelWithLearnableTemperature, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1) 
        self.softmax_temperature = SoftMaxWithTemperature();
        self.emb = nn.Embedding(hidden_size1, hidden_size2) 
        self.fc2 = nn.Linear(hidden_size2, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = F.relu(out)
        out = self.softmax_temperature(out)
        out = out @ self.emb.weight
        # out = self.emb(out.long())
        # out = self.emb(out)
        out = F.relu(out)
        out = self.fc2(out)
        if not self.training:
            out = F.softmax(out, dim=1)
        return out

In [None]:
# build the model 
model = LUTModelWithLearnableTemperature(784, 200, 20, 10)

# define the loss function and the optimiser
loss_function = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(), lr=0.01)

# the epoch loop
for epoch in range(10):
    running_loss = 0.0
    for data in trainloader:
        # get the inputs
        inputs, labels = data

        # zero the parameter gradients
        optimiser.zero_grad()

        # forward + loss + backward + optimise (update weights)
        outputs = model(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimiser.step()
        # print(optimiser.param_groups[0]['params'][2])
        # print(np.all(optimiser.param_groups[0]['params'][0].grad.numpy() == 0))
        # keep track of the loss this epoch
        running_loss += loss.item()
        # break
    print(optimiser.param_groups[0]['params'][2])
    print("Epoch %d, loss %4.2f" % (epoch, running_loss))
print('**** Finished Training ****')

Parameter containing:
tensor(1.6316, requires_grad=True)
Epoch 0, loss 169.89
Parameter containing:
tensor(2.1211, requires_grad=True)
Epoch 1, loss 81.24
Parameter containing:
tensor(2.4600, requires_grad=True)
Epoch 2, loss 64.32
Parameter containing:
tensor(2.7636, requires_grad=True)
Epoch 3, loss 52.46
Parameter containing:
tensor(3.0728, requires_grad=True)
Epoch 4, loss 45.09
Parameter containing:
tensor(3.2618, requires_grad=True)
Epoch 5, loss 38.09
Parameter containing:
tensor(3.5440, requires_grad=True)
Epoch 6, loss 33.64
Parameter containing:
tensor(3.7379, requires_grad=True)
Epoch 7, loss 28.68
Parameter containing:
tensor(3.9677, requires_grad=True)
Epoch 8, loss 24.44
Parameter containing:
tensor(4.1709, requires_grad=True)
Epoch 9, loss 21.61
**** Finished Training ****


In [None]:
# Compute the model accuracy on the test set
class_correct = torch.zeros(10)
class_total = torch.zeros(10)
model.eval()
for (images, labels) in testloader:

  for label in labels:
    class_total[label] += 1
  
  outputs = model(images)
  for i in range(outputs.shape[0]):
    _, prediction = outputs[i].max(0)
    if prediction == labels[i]:
      class_correct[labels[i]] += 1

for i in range(10):
    print('Class %d accuracy: %2.2f %%' % (i, 100.0*class_correct[i] / class_total[i]))

Class 0 accuracy: 99.08 %
Class 1 accuracy: 98.77 %
Class 2 accuracy: 97.38 %
Class 3 accuracy: 96.93 %
Class 4 accuracy: 97.66 %
Class 5 accuracy: 97.09 %
Class 6 accuracy: 95.09 %
Class 7 accuracy: 97.08 %
Class 8 accuracy: 95.89 %
Class 9 accuracy: 94.65 %


In [None]:
for name,parameters in model.named_parameters():
    print(name,':',parameters)

fc1.weight : Parameter containing:
tensor([[-0.0194,  0.0095,  0.0185,  ...,  0.0101,  0.0243, -0.0177],
        [-0.0266, -0.0146,  0.0153,  ..., -0.0184, -0.0188,  0.0208],
        [-0.0274, -0.0220,  0.0284,  ..., -0.0287, -0.0305,  0.0048],
        ...,
        [ 0.0087, -0.0059,  0.0007,  ..., -0.0066, -0.0040, -0.0352],
        [ 0.0303, -0.0048,  0.0353,  ..., -0.0248,  0.0236, -0.0343],
        [ 0.0236, -0.0205,  0.0348,  ...,  0.0022, -0.0086,  0.0282]],
       requires_grad=True)
fc1.bias : Parameter containing:
tensor([-0.0889,  0.1299, -0.0260, -0.0790,  0.3115, -0.3797, -0.5406,  0.1640,
         0.0039, -0.5243,  0.2748, -0.2115, -0.1084, -0.0385, -0.4883, -0.0730,
        -0.0782, -0.7780, -0.2906, -0.0974, -0.0622, -0.4684, -0.0879, -0.0508,
        -0.3832,  0.2421, -0.0394, -0.0385,  0.7548, -0.2019, -0.0699,  1.0311,
        -0.0476,  0.1075, -0.0701, -0.1475,  0.0982, -0.0494,  0.4610,  0.3993,
         0.0057,  0.5459, -0.0641, -0.1036, -0.1345,  0.2464, -0.1711, 

In [None]:
def softmax_temperature(logits, temperature=0.00001):
  pro = F.softmax(logits / temperature, dim=-1)
  # pro = torch.matmul(pro, torch.FloatTensor(range(0, pro.shape[1])));
  return pro;
  # return one_hot_code

In [None]:
class LUTModelWithAnnealTemperature(nn.Module):
    def __init__(self, input_size, hidden_size1,hidden_size2, num_classes):
        super(LUTModelWithAnnealTemperature, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.emb = nn.Embedding(hidden_size1, hidden_size2) 
        self.fc2 = nn.Linear(hidden_size2, num_classes)  
    
    def forward(self, x, temperature):
        out = self.fc1(x)
        out = F.relu(out)
        out = softmax_temperature(out, temperature)
        out = out @ self.emb.weight
        # out = self.emb(out.long())
        # out = self.emb(out)
        out = F.relu(out)
        out = self.fc2(out)
        if not self.training:
            out = F.softmax(out, dim=1)
        return out

In [None]:
# build the model 
model = LUTModelWithAnnealTemperature(784, 200, 20, 10)

# define the loss function and the optimiser
loss_function = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(), lr=0.01)

# the epoch loop
i = 0;
for epoch in range(1000):
    running_loss = 0.0
    for data in trainloader:
        # get the inputs
        inputs, labels = data

        # zero the parameter gradients
        optimiser.zero_grad()
        # forward + loss + backward + optimise (update weights)
        outputs = model(inputs, max(0.001, math.exp(-3 * math.pow(10, -5) * epoch)))
        loss = loss_function(outputs, labels)
        loss.backward()
        optimiser.step()
        # print(optimiser.param_groups[0]['params'][2])
        # print(np.all(optimiser.param_groups[0]['params'][0].grad.numpy() == 0))
        # keep track of the loss this epoch
        running_loss += loss.item()
        # break
    print("Epoch %d, loss %4.2f" % (epoch, running_loss))
print('**** Finished Training ****')

Epoch 0, loss 176.66
Epoch 1, loss 99.29
Epoch 2, loss 89.00
Epoch 3, loss 79.73
Epoch 4, loss 74.44
Epoch 5, loss 71.47
Epoch 6, loss 66.83
Epoch 7, loss 63.73
Epoch 8, loss 60.61
Epoch 9, loss 58.66
Epoch 10, loss 55.63
Epoch 11, loss 55.16
Epoch 12, loss 52.65
Epoch 13, loss 50.11
Epoch 14, loss 49.76
Epoch 15, loss 48.17
Epoch 16, loss 47.48
Epoch 17, loss 44.40
Epoch 18, loss 45.68
Epoch 19, loss 45.54
Epoch 20, loss 40.56
Epoch 21, loss 40.36
Epoch 22, loss 41.92
Epoch 23, loss 39.60
Epoch 24, loss 38.44
Epoch 25, loss 38.38
Epoch 26, loss 37.12
Epoch 27, loss 35.88
Epoch 28, loss 37.82
Epoch 29, loss 35.65
Epoch 30, loss 36.11
Epoch 31, loss 36.57
Epoch 32, loss 36.11
Epoch 33, loss 34.79
Epoch 34, loss 32.64
Epoch 35, loss 32.09
Epoch 36, loss 32.33
Epoch 37, loss 31.19
Epoch 38, loss 33.55
Epoch 39, loss 31.71
Epoch 40, loss 28.45
Epoch 41, loss 29.45
Epoch 42, loss 30.18
Epoch 43, loss 30.30
Epoch 44, loss 28.90
Epoch 45, loss 30.54
Epoch 46, loss 28.48
Epoch 47, loss 28.83
E

In [None]:
# Compute the model accuracy on the test set
class_correct = torch.zeros(10)
class_total = torch.zeros(10)
model.eval()
for (images, labels) in testloader:

  for label in labels:
    class_total[label] += 1
  
  outputs = model(images, 0.0001)
  for i in range(outputs.shape[0]):
    _, prediction = outputs[i].max(0)
    if prediction == labels[i]:
      class_correct[labels[i]] += 1

for i in range(10):
    print('Class %d accuracy: %2.2f %%' % (i, 100.0*class_correct[i] / class_total[i]))

Class 0 accuracy: 98.06 %
Class 1 accuracy: 98.94 %
Class 2 accuracy: 94.09 %
Class 3 accuracy: 97.92 %
Class 4 accuracy: 94.50 %
Class 5 accuracy: 93.61 %
Class 6 accuracy: 96.03 %
Class 7 accuracy: 96.60 %
Class 8 accuracy: 94.05 %
Class 9 accuracy: 93.46 %


In [None]:
max(0.001, math.exp(-3 * math.pow(10, -5) * epoch))

0.9704746473512195

In [None]:
def softmax_temperature(logits, temperature=0.001):
  pro = F.softmax(logits / temperature, dim=-1)
  one_hot_code = pro @ torch.FloatTensor(range(1, pro.shape[1]+1))
  return one_hot_code.long()

In [None]:
logits = torch.FloatTensor([[1,2,4,5],[4,3,2,1]])
# softmax_temperature(logits, )
F.softmax(logits / 0.001, dim=-1)

tensor([[0., 0., 0., 1.],
        [1., 0., 0., 0.]])