In [233]:
import numpy as np
from tqdm import tqdm

## Generate Data

In [234]:
def GenerateData(N, rng):
    D = []
    coins = rng.randint(2, size=N) * 2 - 1
    for coin in coins:
        if coin == 1: # = +1
            tmp = rng.multivariate_normal(mean=[2,3], cov=[[0.6, 0],[0,0.6]])
        else: # = -1
            tmp = rng.multivariate_normal(mean=[0,4], cov=[[0.4, 0],[0,0.4]])
        D.append([1, tmp[0], tmp[1]])
    return np.mat(D), np.mat(coins).transpose()

In [235]:
def linear_regression(features, target):
    return np.linalg.pinv(features) * target

In [236]:
def theta(s):
    return 1 / (1 + np.exp(-s))

def get_gradient(w, x, y):
    # w = 3x1, x=3x1, y=1x1
    wT = w.transpose()
    theta_result = theta(-y * wT * x)
    theta_result *= (-y)
    return np.multiply(theta_result, x)

def logistic_regression(x, y, rate=0.1, iter_time=500):
    w = np.mat([[0], [0], [0]]) # 3x1
    N = y.shape[0]
    for i in range(iter_time):
        # Compute Gradient
        dEin = np.mat([[0.0], [0.0], [0.0]])
        for i in range(N):
            dEin += get_gradient(w, x[i].transpose(), y[i])
        dEin /= N
        w = w - rate * dEin
    return w            

In [237]:
def calculate_err(w, x, y, N):    
    sqr_err = np.zeros(N)
    zo_err = np.zeros(N)
    for k in range(N):
        # w = (3x1), w.T = (1x3)
        # x[k] = (1x3), x[k].T = (3x1)
        y_hat = w.transpose() * x[k].transpose()
        sqr_err[k] = (y_hat - y[k]) ** 2
        if (y_hat * y[k] < 0):
            zo_err[k] = 1
    return np.average(sqr_err), np.average(zo_err)

In [238]:
def problem_13_14():
    sqr_err_in = np.zeros(100)
    zo_err_in = np.zeros(100)
    sqr_err_out = np.zeros(100)
    zo_err_out = np.zeros(100)
    for i in tqdm(range(sqr_err_in.shape[0])):
        N_in = 200
        N_out = 5000

        rng = np.random.RandomState(i)
        D_in, D_in_target = GenerateData(N_in, rng)
        D_out, D_out_target = GenerateData(N_out, rng)
        
        w_lin = linear_regression(D_in, D_in_target)

        sqr_err_in[i], zo_err_in[i] = calculate_err(w_lin, D_in, D_in_target, N_in)
        sqr_err_out[i], zo_err_out[i]  = calculate_err(w_lin, D_out, D_out_target, N_out)

    print("Problem 13: ", np.average(sqr_err_in))
    print("Problem 14: ", np.average(np.abs(zo_err_in-zo_err_out)))
problem_13_14()

100%|██████████| 100/100 [00:55<00:00,  1.80it/s]

Problem 13:  0.2830428321443629
Problem 14:  0.013166





In [239]:
def problem_15():
    zo_err_lin = np.zeros(100)
    zo_err_log = np.zeros(100)
    for i in tqdm(range(zo_err_lin.shape[0])):
        N_in = 200
        N_out = 5000

        rng = np.random.RandomState(i)
        D_in, D_in_target = GenerateData(N_in, rng)
        D_out, D_out_target = GenerateData(N_out, rng)
        
        w_lin = linear_regression(D_in, D_in_target)
        w_log = logistic_regression(D_in, D_in_target)

        _, zo_err_lin[i] = calculate_err(w_lin, D_out, D_out_target, N_out)
        _, zo_err_log[i] = calculate_err(w_log, D_out, D_out_target, N_out)

    print("Problem 15: ", (np.average(zo_err_lin), np.average(zo_err_log)))
problem_15()

100%|██████████| 100/100 [10:47<00:00,  6.47s/it]

Problem 15:  (0.05848, 0.05954400000000001)





In [240]:
def GenerateDataWithOutlier(N, N_ol, rng):
    D = []
    coins = []

    for _ in range(N):
        coin = rng.randint(2) * 2 - 1
        if coin == 1: # = +1
            tmp = rng.multivariate_normal(mean=[2,3], cov=[[0.6, 0],[0,0.6]])
        else: # = -1
            tmp = rng.multivariate_normal(mean=[0,4], cov=[[0.4, 0],[0,0.4]])
        coins.append(coin)
        D.append([1, tmp[0], tmp[1]])
    
    for _ in range(N_ol):
        tmp = rng.multivariate_normal(mean=[6, 0], cov=[[0.3, 0],[0,0.1]])
        coins.append(1)
        D.append([1, tmp[0], tmp[1]])

    return np.mat(D), np.mat(coins).transpose()

In [241]:
def problem_16():
    zo_err_lin = np.zeros(100)
    zo_err_log = np.zeros(100)
    for i in tqdm(range(zo_err_lin.shape[0])):
        N_in = 200
        N_ol = 20
        N_out = 5000

        rng = np.random.RandomState(i)
        D_in, D_in_target = GenerateDataWithOutlier(N_in, N_ol, rng)
        D_out, D_out_target = GenerateData(N_out, rng)
        
        w_lin = linear_regression(D_in, D_in_target)
        w_log = logistic_regression(D_in, D_in_target)

        _, zo_err_lin[i] = calculate_err(w_lin, D_out, D_out_target, N_out)
        _, zo_err_log[i] = calculate_err(w_log, D_out, D_out_target, N_out)

    print("Problem 16: ", (np.average(zo_err_lin), np.average(zo_err_log)))
problem_16()

100%|██████████| 100/100 [14:51<00:00,  8.92s/it]

Problem 16:  (0.093052, 0.059532)



