In [14]:
# Objective 1: Print "Hello World" in CoLab
print("Hello World!")

Hello World!


In [15]:
!nvidia-smi

Fri Oct 13 05:25:31 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   54C    P8    10W /  70W |      3MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [16]:
# Objective 2: Perform matrix multiplication in PyTorch
import torch

# it is always a good idea to start with a random seed
torch.manual_seed(605)

a = torch.randn(2, 3)
b = torch.randn(3, 5)
print(a)
print(b)

tensor([[ 1.1412, -1.1903, -0.2589],
        [ 0.0178,  0.4158,  0.0207]])
tensor([[-0.9021,  0.3112,  0.1275, -0.3989, -1.2741],
        [ 1.2167, -2.5898,  1.5967, -1.8627,  1.0125],
        [-1.5573, -0.0671,  0.4498, -0.5815, -0.5709]])


In [17]:
c = a.matmul(b)
print(c.size())
print(c)

torch.Size([2, 5])
tensor([[-2.0745,  3.4550, -1.8714,  1.9125, -2.5113],
        [ 0.4577, -1.0728,  0.6755, -0.7937,  0.3865]])


In [18]:
# Often times, we want to paramterize b, which means we want to treat it as a variable than an input.
from torch import nn
# if bias=True (default), a bias vector is added
b_layer = nn.Linear(3, 5, bias=False)
# Note that linear weight is trasponsed because it is applied at the front instead of at the back.
print(b_layer.weight.size()) # have to reverse the size from (3, 5) <- because of the way of multiplication
print(b_layer.weight)

torch.Size([5, 3])
Parameter containing:
tensor([[-0.0950,  0.2956,  0.3553],
        [ 0.2678, -0.0171, -0.4928],
        [-0.1845, -0.2508,  0.2816],
        [ 0.3971, -0.2691, -0.0812],
        [ 0.2856, -0.1239,  0.1443]], requires_grad=True)


In [19]:
c_1 = b_layer(a) # b_layer.weight multiply by a is stored in c_1, input is a
# use .t() for transposing the matrix
c_2 = a.matmul(b_layer.weight.t())
print(c_1)
print(c_2)

tensor([[-0.5523,  0.4535,  0.0151,  0.7945,  0.4361],
        [ 0.1286, -0.0126, -0.1018, -0.1065, -0.0435]], grad_fn=<MmBackward0>)
tensor([[-0.5523,  0.4535,  0.0151,  0.7945,  0.4361],
        [ 0.1286, -0.0126, -0.1018, -0.1065, -0.0435]], grad_fn=<MmBackward0>)


In [20]:
# Objective 3: Perform differentiation in PyTorch
y = c_1.sum() #
y.backward()
print(b_layer.weight.grad.size())
print(b_layer.weight.grad)

torch.Size([5, 3])
tensor([[ 1.1589, -0.7744, -0.2382],
        [ 1.1589, -0.7744, -0.2382],
        [ 1.1589, -0.7744, -0.2382],
        [ 1.1589, -0.7744, -0.2382],
        [ 1.1589, -0.7744, -0.2382]])


In [21]:
# Objective 4: Create a soft XOR dataset
# N is the size of the dataset
N = 5
# First, create inputs
x = torch.rand(N, 2) * 2.0 - 1.0
print(x)

tensor([[ 0.5600, -0.8451],
        [-0.0089, -0.2796],
        [-0.1985,  0.0163],
        [ 0.1583,  0.3016],
        [ 0.1275, -0.3969]])


In [22]:
# Now, given the inputs, create labels
z = x.round().bool()
print(z)
y = z[:, 0].logical_xor(z[:, 1])
print(y)

tensor([[ True,  True],
        [False, False],
        [False, False],
        [False, False],
        [False, False]])
tensor([False, False, False, False, False])


In [23]:
# Objective 5: Create a two-layer neural network for the dataset

import torch
import torch.nn.functional as F
import torch.nn as nn

# simple XOR dataset
X = torch.Tensor([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = torch.Tensor([[0, 1, 1, 0]]).view(-1, 1) # make calumn vector


class TwoLayerNN(nn.Module):
  def __init__(self):
    super(TwoLayerNN, self).__init__()
    self.lin1 = nn.Linear(2, 3) # input tensor has dimension of 2, size of hidden layer is 3
    self.lin2 = nn.Linear(3, 1) # output dim: 1 (T/F)

  def forward(self, x): # mandatory when using nn.Module
    x = self.lin1(x)
    x = F.sigmoid(x) # put the nonlinearity
    out = self.lin2(x)
    return out

In [24]:
model = TwoLayerNN()

In [25]:
model(torch.Tensor([1, 1]))

tensor([0.0691], grad_fn=<AddBackward0>)

In [26]:
import torch.optim as optim

loss_func = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.02, momentum=0.9)

In [27]:
def weight_init(model):
	for m in model.modules(): # find layer
		if isinstance(m, nn.Linear):
			m.weight.data.normal_(0, 1) # mean, std

weight_init(model)

In [28]:
from torch.autograd import Variable
import numpy as np

epochs = 2001
steps = X.size(0)
for i in range(epochs):
	for j in range(steps):
		index = np.random.randint(X.size(0))
		x_var = Variable(X[index], requires_grad=False) # X, Y with same index!
		y_var = Variable(Y[index], requires_grad=False)

		optimizer.zero_grad()
		y_hat = model(x_var) # forward propagation
		loss = loss_func(y_hat, y_var)
		loss.backward() # compute all gradients of the computational graph
		optimizer.step()

	if i%500 == 0:
		print(f'Epoch: {i}, Loss: {loss.data.numpy()}')

Epoch: 0, Loss: 0.5289431810379028
Epoch: 500, Loss: 4.747619186673546e-09
Epoch: 1000, Loss: 8.881784197001252e-14
Epoch: 1500, Loss: 2.2737367544323206e-13
Epoch: 2000, Loss: 1.2789769243681803e-13


In [29]:
model(torch.Tensor([1, 1]))

tensor([3.5763e-07], grad_fn=<AddBackward0>)