<a href="https://colab.research.google.com/github/jdasam/mas1004-2022/blob/main/notebooks/Data_AI_4th_week_spiral_livecoding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import numpy as np
from math import pi

In [None]:
# if you run this code in your local computer, you have to check that torch is installed
!pip install torch
# But in Google Colab, torch is already installed 

In [2]:
# make spiral data
# https://gist.github.com/45deg/e731d9e7f478de134def5668324c44c5
N = 500
theta = np.sqrt(np.random.rand(N))*2*pi # np.linspace(0,2*pi,100)

r_a = 2*theta + pi
data_a = np.array([np.cos(theta)*r_a, np.sin(theta)*r_a]).T
x_a = data_a + np.random.randn(N,2)

r_b = -2*theta - pi
data_b = np.array([np.cos(theta)*r_b, np.sin(theta)*r_b]).T
x_b = data_b + np.random.randn(N,2)

res_a = np.append(x_a, np.zeros((N,1)), axis=1)
res_b = np.append(x_b, np.ones((N,1)), axis=1)

res = np.append(res_a, res_b, axis=0)
np.random.shuffle(res)

In [3]:
import matplotlib.pyplot as plt
def visualize_decision_boundary(model, data, label):
  x_linspace = torch.linspace(min(data[:,0]), max(data[:,0]), steps=200)
  y_linspace = torch.linspace(min(data[:,1]), max(data[:,1]), steps=200)
  grid_x, grid_y = torch.meshgrid(x_linspace, y_linspace)
  grid_xy = torch.stack([grid_x, grid_y]).permute(1,2,0)
  grid_xy = grid_xy.view(-1, 2)
  if isinstance(model, torch.nn.Module):
    value_by_grid = model(grid_xy)
  else:
    value_by_grid = run_neuron(model, grid_xy)
  value_by_grid = value_by_grid.view(200, 200, 1)
  value_by_grid[value_by_grid<=0.5] = 0
  value_by_grid[value_by_grid>0.5] = 1

  plt.scatter(x=data[label[:,0]==0,0], y=data[label[:,0]==0,1])
  plt.scatter(x=data[label[:,0]==1,0], y=data[label[:,0]==1,1])

  plt.contourf(grid_x.detach().numpy(), grid_y.detach().numpy(), value_by_grid.detach().numpy().squeeze(), alpha=0.3)

In [4]:
# let's convert our data in tensor
datas = torch.tensor(res, dtype=torch.float32)
# represent each number with 32 bits, instead of 64 bits

In [5]:
datas # a tensor can only have one data type.
# so if you want to represent float data
# you have to represent even integer in float

tensor([[ -6.2360,  13.0651,   1.0000],
        [  5.7898, -10.7633,   0.0000],
        [  7.8541,   8.6649,   1.0000],
        ...,
        [ -1.8618,  13.8850,   1.0000],
        [  6.3059,  -7.0654,   1.0000],
        [  3.9719,   0.2770,   0.0000]])

In [6]:
datas[datas[:,-1]==0]

tensor([[  5.7898, -10.7633,   0.0000],
        [ -8.5410,   0.1941,   0.0000],
        [  6.4224, -11.1031,   0.0000],
        ...,
        [ -8.9839,  -2.3756,   0.0000],
        [  1.4697,   6.6988,   0.0000],
        [  3.9719,   0.2770,   0.0000]])

In [7]:
data_xy = datas[:, :2]
data_label= datas[:, -1]

In [8]:
data_xy, data_label[:10]

(tensor([[ -6.2360,  13.0651],
         [  5.7898, -10.7633],
         [  7.8541,   8.6649],
         ...,
         [ -1.8618,  13.8850],
         [  6.3059,  -7.0654],
         [  3.9719,   0.2770]]),
 tensor([1., 0., 1., 1., 1., 1., 1., 0., 0., 0.]))

In [9]:
# How to make a tensor in specific shape
# Let's make 2 x 3 tensor
print(torch.zeros(2,3))
print(torch.ones(2,3))
print(torch.rand(2,3)) # every element will have random value between 0-1
print(torch.randn(2,3)) # this uses gaussian (normal) distribution
 

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.3160, 0.0010, 0.7059],
        [0.6224, 0.5211, 0.8376]])
tensor([[-2.0365, -1.5972,  1.4659],
        [-0.3311,  1.0453, -0.1468]])


In [45]:
# Let's make a neural network layer


class Layer:
  def __init__(self, in_features, num_neurons):
    # in_features: how many features does this layer take as an input
    # 
    self.in_features = in_features
    self.num_neurons = num_neurons

    # next step: define a weight matrix
    self.weight = torch.randn(self.in_features, self.num_neurons)
    # there are two types of parameters
    self.bias = torch.randn(self.num_neurons)

  def __call__(self, input):
    return torch.mm(input, self.weight) + self.bias

layer = Layer(2, 4)
layer.weight, layer.bias
asingle_data = data_xy[:1]
layer(asingle_data)

tensor([[-36.5538,  -0.5213,  -1.6657,  32.5262]])

In [44]:
print(torch.mm(asingle_data, layer.weight))
print(layer.bias)

tensor([[-10.1124, -17.8197,   3.6596,   4.7405]])
tensor([ 0.0191, -0.3798, -0.4291,  0.2112])


In [11]:
data_xy.shape, layer.weight.shape

(torch.Size([1000, 2]), torch.Size([2, 4]))

In [12]:
# Implement layer's computation with pure for-loop

# what will be the output's size
output = torch.zeros(data_xy.shape[0], layer.weight.shape[1])

for data_idx, data_sample in enumerate(data_xy):
  for neuron_idx in range(layer.weight.shape[1]):
    temporary_sum = 0
    for feature_idx in range(len(data_sample)):
      data_pos = data_sample[feature_idx]
      corresp_neuron_weight = layer.weight[feature_idx, neuron_idx]
      temporary_sum += data_pos * corresp_neuron_weight
    # print(data_sample[i])
    output[data_idx, neuron_idx] = temporary_sum 

In [13]:
output

tensor([[ 6.3607, -9.3477, -2.8790,  7.5625],
        [-4.1856,  8.0268,  3.0944, -5.9477],
        [23.5946, -0.2093, 11.3684, 10.2044],
        ...,
        [14.4611, -7.5534,  2.2177, 10.0994],
        [ 1.3010,  6.5207,  4.8056, -2.8201],
        [ 6.7673,  1.8522,  4.4839,  1.9365]])

In [14]:
# Easier, and more efficient way
torch.mm(data_xy, layer.weight) # mm means matrix multiplication

tensor([[ 6.3607, -9.3477, -2.8790,  7.5625],
        [-4.1856,  8.0268,  3.0944, -5.9477],
        [23.5946, -0.2093, 11.3684, 10.2044],
        ...,
        [14.4611, -7.5534,  2.2177, 10.0994],
        [ 1.3010,  6.5207,  4.8056, -2.8201],
        [ 6.7673,  1.8522,  4.4839,  1.9365]])

In [15]:
# computation time with for loop vs matrix multiplication
import time


start_time = time.time()

output = torch.zeros(data_xy.shape[0], layer.weight.shape[1])
for data_idx, data_sample in enumerate(data_xy):
  for neuron_idx in range(layer.weight.shape[1]):
    temporary_sum = 0
    for feature_idx in range(len(data_sample)):
      data_pos = data_sample[feature_idx]
      corresp_neuron_weight = layer.weight[feature_idx, neuron_idx]
      temporary_sum += data_pos * corresp_neuron_weight
    # print(data_sample[i])
    output[data_idx, neuron_idx] = temporary_sum

end_time = time.time()
print("time spent: ", end_time - start_time) 

time spent:  0.1337108612060547


In [16]:
# Never tries to use for loop in matrix multiplication
# matrix multiplication is much much faster than for loop

start_time = time.time()
torch.mm(data_xy, layer.weight)
end_time = time.time()
print(end_time - start_time)

0.0016651153564453125


In [17]:
def relu(x):
  # x is an tensor
  is_larger_than_zero = x > 0
  new_x = torch.clone(x)
  new_x[~is_larger_than_zero] = 0
  # print(is_larger_than_zero.shape, x.shape)
  # the index of tensor can have the same shape with the tensor itself
  return new_x
relu(output)

tensor([[ 6.3607,  0.0000,  0.0000,  7.5625],
        [ 0.0000,  8.0268,  3.0944,  0.0000],
        [23.5946,  0.0000, 11.3684, 10.2044],
        ...,
        [14.4611,  0.0000,  2.2177, 10.0994],
        [ 1.3010,  6.5207,  4.8056,  0.0000],
        [ 6.7673,  1.8522,  4.4839,  1.9365]])

In [None]:
output > 0


tensor([[ True, False,  True,  True],
        [ True,  True, False, False],
        [ True, False, False, False],
        ...,
        [ True, False,  True,  True],
        [ True, False, False, False],
        [False,  True, False, False]])

In [18]:
layer_0 = Layer(in_features=2, num_neurons=4)
layer_1 = Layer(in_features=4, num_neurons=1)

print(data_xy.shape)
out_0 = layer_0(data_xy)
print(out_0.shape)
out_1 = layer_1(relu(out_0))
print(out_1.shape)

torch.Size([1000, 2])
torch.Size([1000, 4])
torch.Size([1000, 1])


In [19]:
# scientific notation
# 9.4111e-01 means 9.4111 * 10 ** (-1), 0.94111

# turn off scientific notation
torch.set_printoptions(sci_mode=False)

In [20]:
out_1.shape

torch.Size([1000, 1])

In [21]:
# flattening, or squeezing the tensor
out_1[:, 0].shape # if you select only one index in a certain dimension,
# it will delete that dimension 


torch.Size([1000])

In [None]:
out_1.squeeze() # this deletes every dimension with size 1

In [None]:
# Let's suppose that out_1 is our model's prediction for a given input
# out_1[n] is the prediction value of whether the n-th input data is category 1

# our target value is data_label
data_label

In [32]:
# Let's define how bad our model is.
pred = out_1.squeeze()

pred[:10], data_label[:10]
print(pred[:10].shape, data_label[:10].shape)
diff = pred[:5] - data_label[:7]
print(pred[:5], '\n',data_label[:5],'\n', diff)

torch.Size([10]) torch.Size([10])


RuntimeError: ignored

In [38]:
# we can make + - computation with tensor of different size
pred[:10].shape, data_label[0:1].shape
print(pred[:10])
print(data_label[0:1])
print(pred[:10] - data_label[0:1]) # It automatically broadcast 
# So you can add a scalar value to vector like this


tensor([16.1975, -3.1294,  2.0876,  1.4890, 19.4938, 19.1547, -1.9991,  9.0167,
        -3.1715,  4.7047])
tensor([1.])
tensor([15.1975, -4.1294,  1.0876,  0.4890, 18.4938, 18.1547, -2.9991,  8.0167,
        -4.1715,  3.7047])


In [48]:
# How we define the loss function
loss = torch.abs(pred - data_label) # abs means absolute value

In [49]:
loss[:10] # each data sample has different loss 

tensor([15.1975,  3.1294,  1.0876,  0.4890, 18.4938, 18.1547,  2.9991,  9.0167,
         3.1715,  4.7047])

In [51]:
loss_mean = sum(loss) / len(loss)
loss_mean

tensor(5.9336)

In [54]:
# in torch you can use a predefined method for tensor
loss_mean = loss.mean()
loss_mean, len(loss)

# our goal is to reduce this loss by updating the parameters of the model
# the model consists of two layers, currently


(tensor(5.9336), 1000)

In [72]:
# the loss value will be different everytime
# because the weights are initialized in a random way
layer_0 = Layer(in_features=2, num_neurons=4)
layer_1 = Layer(in_features=4, num_neurons=1)

def get_loss(layer_0, layer_1, data_xy, data_label):
  pred = layer_1(relu(layer_0(data_xy)))
  loss_mean = torch.abs(pred-data_label).mean()
  return loss_mean

get_loss(layer_0, layer_1, data_xy, data_label)

tensor(16.8463)

In [67]:
# What happens if we change a little bit of parameters
selected_weight = layer_0.weight[0, 0]
selected_weight 
# get_loss(layer_0, layer_1, data_xy, data_label)

tensor(-0.3554)

In [80]:
import copy 
old_loss = get_loss(layer_0, layer_1, data_xy, data_label)

num_iteration = 100

# this is gradient descent with single neuron
# it only works with a single weight
for i in range(num_iteration):
  # change a little bit for the selected weight to calculate gradient
  eps = 0.001
  new_layer_0 = copy.deepcopy(layer_0)
  new_layer_0.weight[0, 0] += eps
  new_layer_0.weight[0, 1] += eps
  new_loss = get_loss(new_layer_0, layer_1, data_xy, data_label)

  loss_diff = new_loss - old_loss
  gradient = loss_diff / eps
  gradient # how the loss will change if you change the parameter for 1

  # update parameters
  learning_rate = 0.001
  layer_0.weight[0, 0] -= gradient * learning_rate
  new_loss = get_loss(layer_0, layer_1, data_xy, data_label)
  print(new_loss)

tensor(16.8462)
tensor(16.8462)
tensor(16.8462)
tensor(16.8462)
tensor(16.8462)
tensor(16.8462)
tensor(16.8461)
tensor(16.8461)
tensor(16.8461)
tensor(16.8460)
tensor(16.8460)
tensor(16.8460)
tensor(16.8459)
tensor(16.8459)
tensor(16.8458)
tensor(16.8457)
tensor(16.8457)
tensor(16.8456)
tensor(16.8455)
tensor(16.8454)
tensor(16.8453)
tensor(16.8452)
tensor(16.8450)
tensor(16.8449)
tensor(16.8447)
tensor(16.8445)
tensor(16.8443)
tensor(16.8441)
tensor(16.8438)
tensor(16.8435)
tensor(16.8432)
tensor(16.8428)
tensor(16.8424)
tensor(16.8419)
tensor(16.8414)
tensor(16.8409)
tensor(16.8402)
tensor(16.8395)
tensor(16.8387)
tensor(16.8379)
tensor(16.8369)
tensor(16.8358)
tensor(16.8346)
tensor(16.8332)
tensor(16.8317)
tensor(16.8299)
tensor(16.8280)
tensor(16.8259)
tensor(16.8235)
tensor(16.8209)
tensor(16.8180)
tensor(16.8148)
tensor(16.8112)
tensor(16.8071)
tensor(16.8023)
tensor(16.7966)
tensor(16.7899)
tensor(16.7819)
tensor(16.7725)
tensor(16.7615)
tensor(16.7486)
tensor(16.7337)
tensor(1

In [82]:
layer_0.weight.requires_grad = True
layer_0.bias.requires_grad = True
layer_1.weight.requires_grad = True
layer_1.bias.requires_grad = True

new_loss = get_loss(layer_0, layer_1, data_xy, data_label)
new_loss.backward() # do backpropagation

In [84]:
layer_0.weight.data, layer_0.weight.grad

(tensor([[48.3523, -1.1030, -0.9030,  0.7895],
         [-0.8538,  1.6361, -0.2767, -1.9903]]),
 tensor([[ 0.0426, -0.8905, -0.8941, -0.8194],
         [ 0.1762,  1.2960,  1.4890, -4.8787]]))

In [86]:
for i in range(100):
  new_loss = get_loss(layer_0, layer_1, data_xy, data_label)
  new_loss.backward()

  layer_0.weight.data -= layer_0.weight.grad * learning_rate
  layer_0.weight.grad = None
  print(new_loss)

tensor(14.1116, grad_fn=<MeanBackward0>)
tensor(14.0218, grad_fn=<MeanBackward0>)
tensor(13.9920, grad_fn=<MeanBackward0>)
tensor(13.9621, grad_fn=<MeanBackward0>)
tensor(13.9323, grad_fn=<MeanBackward0>)
tensor(13.9024, grad_fn=<MeanBackward0>)
tensor(13.8726, grad_fn=<MeanBackward0>)
tensor(13.8427, grad_fn=<MeanBackward0>)
tensor(13.8127, grad_fn=<MeanBackward0>)
tensor(13.7827, grad_fn=<MeanBackward0>)
tensor(13.7527, grad_fn=<MeanBackward0>)
tensor(13.7227, grad_fn=<MeanBackward0>)
tensor(13.6928, grad_fn=<MeanBackward0>)
tensor(13.6628, grad_fn=<MeanBackward0>)
tensor(13.6327, grad_fn=<MeanBackward0>)
tensor(13.6027, grad_fn=<MeanBackward0>)
tensor(13.5727, grad_fn=<MeanBackward0>)
tensor(13.5428, grad_fn=<MeanBackward0>)
tensor(13.5128, grad_fn=<MeanBackward0>)
tensor(13.4830, grad_fn=<MeanBackward0>)
tensor(13.4532, grad_fn=<MeanBackward0>)
tensor(13.4233, grad_fn=<MeanBackward0>)
tensor(13.3932, grad_fn=<MeanBackward0>)
tensor(13.3631, grad_fn=<MeanBackward0>)
tensor(13.3331, 