<a href="https://colab.research.google.com/github/dahlia52/Advanced-Statistical-Data-Analysis/blob/main/one_layer_logistic_regression_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt

In [None]:
num_sample = 500

data0 = np.random.randn(num_sample,2) + (2,2)
data1 = np.random.randn(num_sample,2) + (-2,-2)

data0 = np.hstack([data0,np.zeros((num_sample,1),dtype=float)])
data1 = np.hstack([data1,np.ones((num_sample,1),dtype=float)])

data = np.vstack([data0,data1])

print(data.shape)

(1000, 3)


In [None]:
data

array([[ 3.236788  ,  2.14095442,  0.        ],
       [ 3.12806048,  2.92277511,  0.        ],
       [ 2.08697688,  1.48163492,  0.        ],
       ...,
       [-2.82204399, -0.77232958,  1.        ],
       [-0.50074832, -1.9921468 ,  1.        ],
       [-1.79009761, -2.38530759,  1.        ]])

In [None]:
# we are assuming one layer logistic regression
# w1*x1 + w2*x2 + b = XW + b
w = np.random.randn(2,1) # w1, w2
b = np.random.randn(1,1)  # scalar
eta = 1e-5 # learning rate
delta = 1e-10 # prevent log 0

In [None]:
# define sigmoid function
def sigmoid(val):
    result = 1 / (1+np.exp(-val))
    return result

# define derivative of sigmoid function w.r.t. its value
def grad_sigmoid(val):
    result = sigmoid(val)
    result *= (1-result)
    return result

# given data instances in batch form,
# compute loss and gradients of w and b
# also, count the number of correct prediction
def compute_loss_and_grad(data_instance):
    x, y = data_instance
    linear = np.matmul(x,w) + b
    y_est = sigmoid(linear)

    # loss
    # 로그의 진수 부분을 0이 아닌 양수가 되도록 하기 위해 아주 작은 값 delta를 더해줌.
    loss = -y*np.log(y_est + delta) - (1-y)*np.log(1-y_est + delta)

    # dL/dh = dL/dt * dt/dh
    grad = -y*(1-y_est) + (1-y)*y_est

    # dL/dw = dL/dt * dt/dh * dh/dx
    # elementwise product (a,b), (c,d) -> (ac, bd)
    # grad.shape = (1,1), x.shape = (2,) -> broadcasting (np.matmul에서는 오류 발생)
    grad_w = np.multiply(grad,x)

    # dL/db = dL/dt * dt/dh * dh/db
    grad_b = grad

    hit = (y == np.round(y_est)) # 예측이 맞는가

    return loss, (grad_w, grad_b), hit

# update NN parameters w and b with SGD
def update_parameters(params,grads):
    w, b = params
    grad_w, grad_b = grads

    w -= eta * grad_w.reshape(-1,1) # w.shape = (2,1), grad_w.shape = (1,2)
    b -= eta * grad_b

    return w, b

In [None]:
num_epoch = 100

# train the logistic regression model
for i in range(num_epoch):
    # shuffle traning data by permutation
    perm = np.random.permutation(len(data)) # shuffling the index of data
    total_loss = 0
    count = 0

    for j in range(len(data)):
        # feed data instances one-by-one, i.e., mini-batch size is 1
        x = data[perm[j]][:-1]
        y = data[perm[j]][-1]# (1,1)
        y = y.reshape([1])# (1,)
        params = (w, b)
        # compute loss and gradients, and then update the parameters
        # also, compute sum of the loss and the number of correct prediction
        loss, grads, hit = compute_loss_and_grad((x,y))
        w, b = update_parameters(params, grads)
        total_loss += loss.sum() # (1,1) -> scalar
        count += hit.sum()

    # compute average loss and accuracy for the train dataset
    loss_train = total_loss / len(data)
    acc_train = count / len(data)

    if i % 5 == 0:
        print("Epoch %d Train: %.3f / %.2f %%"%(i,loss_train,acc_train*100))

Epoch 0 Train: 0.684 / 61.90 %
Epoch 5 Train: 0.625 / 66.10 %
Epoch 10 Train: 0.573 / 69.00 %
Epoch 15 Train: 0.528 / 72.40 %
Epoch 20 Train: 0.488 / 75.00 %
Epoch 25 Train: 0.453 / 77.10 %
Epoch 30 Train: 0.422 / 79.40 %
Epoch 35 Train: 0.394 / 81.30 %
Epoch 40 Train: 0.370 / 83.30 %
Epoch 45 Train: 0.348 / 84.70 %
Epoch 50 Train: 0.328 / 85.70 %
Epoch 55 Train: 0.311 / 86.40 %
Epoch 60 Train: 0.295 / 87.40 %
Epoch 65 Train: 0.280 / 88.70 %
Epoch 70 Train: 0.267 / 89.60 %
Epoch 75 Train: 0.255 / 90.90 %
Epoch 80 Train: 0.244 / 91.60 %
Epoch 85 Train: 0.234 / 92.00 %
Epoch 90 Train: 0.225 / 92.30 %
Epoch 95 Train: 0.216 / 93.00 %
