In [2]:
import numpy as np

### Generate Dataset (Single Feature)

In [3]:
x = np.random.uniform(low = 0.0, high = 1.0, size = 100)
print (x.shape)
x[0:10]

(100,)


array([0.2527795 , 0.08312264, 0.7228865 , 0.21066908, 0.26303798,
       0.95288637, 0.00670555, 0.58455555, 0.51714767, 0.67176602])

In [4]:
y = x * 0.3
print (y.shape)
y[0:10]

(100,)


array([0.07583385, 0.02493679, 0.21686595, 0.06320072, 0.07891139,
       0.28586591, 0.00201167, 0.17536666, 0.1551443 , 0.20152981])

### Case 1 - Random Search

In [13]:
num_epoch = 100

best_error = np.inf
best_w = None
best_epoch = None

for epoch in range(num_epoch):
    w = np.random.uniform(low = 0.0, high = 1.0)
    y_predict = x * w
    error = np.abs(y_predict - y).mean()
    if error < best_error:
        best_error = error
        best_w = w
        best_epoch = epoch
        print ("{:2} w = {:.6f}, error = {:.6f}".format(epoch, w, error))

print ("----" * 10)
print ("{:2} w = {:.6f}, error = {:.6f}".format(best_epoch, best_w, best_error))

 0 w = 0.002385, error = 0.150516
 1 w = 0.470284, error = 0.086120
 3 w = 0.461673, error = 0.081765
 5 w = 0.357468, error = 0.029064
11 w = 0.262428, error = 0.019002
16 w = 0.320009, error = 0.010119
26 w = 0.290669, error = 0.004719
----------------------------------------
26 w = 0.290669, error = 0.004719


### Case 2 H-Step Search

In [14]:
num_epoch = 100
h = 0.1

w = np.random.uniform(low = 0.0, high = 1.0)

for epoch in range(num_epoch):
    y_predict = x * w
    current_error = np.abs(y_predict - y).mean()
    
    y_predict = x * (w + h)
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        print ("{:2} w = {:.6f}, error = {:.6f}".format(epoch, w, h_plus_error)) 
        w = w + h
        continue
    
    y_predict = x * (w - h)
    h_minus_error = np.abs(y_predict - y).mean()
    if h_minus_error < current_error:
        print ("{:2} w = {:.6f}, error = {:.6f}".format(epoch, w, h_minus_error)) 
        w = w - h
        continue
        
    break

print ("----" * 10)
print ("{:2} w = {:.6f}, error = {:.6f}".format(epoch, w, current_error))

 0 w = 0.384552, error = 0.007813
----------------------------------------
 1 w = 0.284552, error = 0.007813


### Case 3 - Gradient Descent (not yet)

In [15]:
num_epoch = 100

w = np.random.uniform(low = 0.0, high = 1.0)

for epoch in range(num_epoch):
    y_predict = x * w

    error = np.abs(y_predict - y).mean()    
    if error < 0.0005:
        break
    
    print ("{:2} w = {:.6f}, error = {:.6f}".format(epoch, w, error))
    
    w = w - (y_predict - y).mean()
    ### w가 미세하게 바뀔 수 있다.
    ### np.abs(y_predict - y).mean()은 error의 크기만 보는 것이고
    ### (y_predict - y).mean()는 방향성만 고려하는 것이다.

print ("----" * 10)
print ("{:2} w = {:.6f}, error = {:.6f}".format(epoch, w, error))

 0 w = 0.381490, error = 0.041213
 1 w = 0.340277, error = 0.020370
 2 w = 0.319907, error = 0.010068
 3 w = 0.309839, error = 0.004976
 4 w = 0.304863, error = 0.002460
 5 w = 0.302404, error = 0.001216
 6 w = 0.301188, error = 0.000601
----------------------------------------
 7 w = 0.300587, error = 0.007813


### Generate Dataset (Multi Feature)

In [16]:
x1 = np.random.uniform(low = 0.0, high = 1.0, size = 100)
print (x1.shape)
x1[0:10]

(100,)


array([0.93136816, 0.92819147, 0.70146286, 0.89644403, 0.46976151,
       0.73058125, 0.91121839, 0.20314373, 0.44743208, 0.27513544])

In [17]:
x2 = np.random.uniform(low = 0.0, high = 1.0, size = 100)
print (x2.shape)
x2[0:10]

(100,)


array([0.87580484, 0.92753513, 0.31718843, 0.40265446, 0.68044304,
       0.30432413, 0.95735766, 0.47498527, 0.50854411, 0.23425251])

In [18]:
y = x1 * 0.3 + x2 * 0.5
print (y.shape)
y[0:10]

(100,)


array([0.71731287, 0.742225  , 0.36903307, 0.47026044, 0.48114997,
       0.37133644, 0.75204435, 0.29843575, 0.38850168, 0.19966689])

### Case 1 - Random Search

In [21]:
num_epoch = 10000

best_error = np.inf
best_w1 = None
best_w2 = None
best_epoch = None

for epoch in range(num_epoch):
    w1 = np.random.uniform(low = 0.0, high = 1.0)
    w2 = np.random.uniform(low = 0.0, high = 1.0)
    
    y_predict = x1 * w1 + x2 * w2
    error = np.abs(y_predict - y).mean()
    if error < best_error:
        best_error = error
        best_w1 = w1
        best_w2 = w2
        best_epoch = epoch
        print ("{:4} w1 = {:.6f}, w2 = {:.6f}, error = {:.6f}".format(epoch, w1, w2, error))

print ("----" * 15)
print ("{:4} w1 = {:.6f}, w2 = {:.6f}, error = {:.6f}".format(best_epoch, best_w1, best_w2, best_error))

   0 w1 = 0.978938, w2 = 0.847022, error = 0.480984
   1 w1 = 0.459028, w2 = 0.756259, error = 0.195734
   2 w1 = 0.366621, w2 = 0.459004, error = 0.019231
  92 w1 = 0.328135, w2 = 0.500232, error = 0.013214
 442 w1 = 0.296987, w2 = 0.512269, error = 0.004795
1399 w1 = 0.299216, w2 = 0.498556, error = 0.001051
------------------------------------------------------------
1399 w1 = 0.299216, w2 = 0.498556, error = 0.001051


### Case 3 - Gradient Descent (not yet)

In [26]:
num_epoch = 10000

w1 = np.random.uniform(low = 0.0, high = 1.0)
w2 = np.random.uniform(low = 0.0, high = 1.0)

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2

    error = np.abs(y_predict - y).mean()    
    if error < 0.0005:
        break
    
    print ("{:4} w1 = {:.6f}, w2 = {:.6f}, error = {:.6f}".format(epoch, w1, w2, error))
    
    w1 = w1 - ((y_predict - y) * x1).mean()
    w2 = w2 - ((y_predict - y) * x2).mean()
    ### w1, w2는 서로 변화량이 달라야 한다.
    ### ((y_predict - y) * x1).mean(), ((y_predict - y) * x2).mean()를 이용하여
    ### feature 각각의 방향성을 고려한다.
    
print ("----" * 15)
print ("{:4} w1 = {:.6f}, w2 = {:.6f}, error = {:.6f}".format(epoch, w1, w2, error))

   0 w1 = 0.054941, w2 = 0.005076, error = 0.349117
   1 w1 = 0.238200, w2 = 0.218068, error = 0.162638
   2 w1 = 0.320180, w2 = 0.321689, error = 0.076631
   3 w1 = 0.354954, w2 = 0.373735, error = 0.043433
   4 w1 = 0.367862, w2 = 0.401334, error = 0.030691
   5 w1 = 0.370770, w2 = 0.417228, error = 0.025159
   6 w1 = 0.369222, w2 = 0.427411, error = 0.022191
   7 w1 = 0.365802, w2 = 0.434712, error = 0.020259
   8 w1 = 0.361705, w2 = 0.440472, error = 0.018688
   9 w1 = 0.357478, w2 = 0.445335, error = 0.017266
  10 w1 = 0.353361, w2 = 0.449616, error = 0.015981
  11 w1 = 0.349456, w2 = 0.453474, error = 0.014789
  12 w1 = 0.345798, w2 = 0.456995, error = 0.013685
  13 w1 = 0.342393, w2 = 0.460231, error = 0.012662
  14 w1 = 0.339232, w2 = 0.463214, error = 0.011716
  15 w1 = 0.336302, w2 = 0.465969, error = 0.010840
  16 w1 = 0.333590, w2 = 0.468515, error = 0.010030
  17 w1 = 0.331079, w2 = 0.470870, error = 0.009280
  18 w1 = 0.328756, w2 = 0.473049, error = 0.008586
  19 w1 = 0.

### Generate Dataset (Multi Feature with Bias)

In [29]:
x1 = np.random.uniform(low = 0.0, high = 1.0, size = 100)
print (x1.shape)
x1[0:10]

(100,)


array([0.22601894, 0.6801539 , 0.9999859 , 0.81228367, 0.49599131,
       0.06851541, 0.12002865, 0.30363821, 0.15674128, 0.50746203])

In [30]:
x2 = np.random.uniform(low = 0.0, high = 1.0, size = 100)
print (x2.shape)
x2[0:10]

(100,)


array([0.70781306, 0.16217993, 0.06675143, 0.89816791, 0.82571392,
       0.3286649 , 0.63916029, 0.42216842, 0.38749287, 0.66821074])

In [31]:
y = x1 * 0.3 + x2 * 0.5 + 0.1
print (y.shape)
y[0:10]

(100,)


array([0.52171221, 0.38513613, 0.43337149, 0.79276906, 0.66165435,
       0.28488707, 0.45558874, 0.40217567, 0.34076882, 0.58634398])

### Case 1 - Random Search

In [32]:
num_epoch = 100000

best_error = np.inf
best_w1 = None
best_w2 = None
best_b = None
best_epoch = None

for epoch in range(num_epoch):
    w1 = np.random.uniform(low = 0.0, high = 1.0)
    w2 = np.random.uniform(low = 0.0, high = 1.0)
    b = np.random.uniform(low = 0.0, high = 1.0)
    
    y_predict = x1 * w1 + x2 * w2 + b
    error = np.abs(y_predict - y).mean()
    if error < best_error:
        best_error = error
        best_w1 = w1
        best_w2 = w2
        best_b = b
        best_epoch = epoch
        print ("{:5} w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(epoch, w1, w2, b, error))

print ("----" * 20)
print ("{:5} w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(best_epoch, best_w1, best_w2, best_b, best_error))

    0 w1 = 0.809140, w2 = 0.143510, b = 0.038143, error = 0.139998
    2 w1 = 0.050794, w2 = 0.249944, b = 0.433936, error = 0.114073
    8 w1 = 0.075005, w2 = 0.284312, b = 0.253189, error = 0.098476
   20 w1 = 0.136782, w2 = 0.379042, b = 0.173935, error = 0.078548
   22 w1 = 0.082519, w2 = 0.762838, b = 0.074151, error = 0.077366
   32 w1 = 0.471155, w2 = 0.490632, b = 0.039703, error = 0.045104
  291 w1 = 0.262817, w2 = 0.666132, b = 0.026999, error = 0.041223
  295 w1 = 0.420787, w2 = 0.582119, b = 0.001760, error = 0.037655
  621 w1 = 0.289837, w2 = 0.586452, b = 0.097924, error = 0.037503
  997 w1 = 0.359268, w2 = 0.491795, b = 0.086753, error = 0.017153
 3582 w1 = 0.292333, w2 = 0.436705, b = 0.133794, error = 0.016876
 5618 w1 = 0.234082, w2 = 0.517503, b = 0.119801, error = 0.016755
 6291 w1 = 0.323191, w2 = 0.453610, b = 0.110656, error = 0.012122
 8695 w1 = 0.314640, w2 = 0.460449, b = 0.106147, error = 0.011646
13628 w1 = 0.311064, w2 = 0.533442, b = 0.079245, error = 0.00

### Case 3 - Gradient Descent (not yet)

In [None]:
x3 = [1] * 100
x3 = np.array(x3)
x3

In [38]:
num_epoch = 100

w1 = np.random.uniform(low = 0.0, high = 1.0)
w2 = np.random.uniform(low = 0.0, high = 1.0)
b = np.random.uniform(low = 0.0, high = 1.0)

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2 + x3 * b

    error = np.abs(y_predict - y).mean()    
    if error < 0.0005:
        break
    
    print ("{:4} w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(epoch, w1, w2, b, error))
    
    w1 = w1 - ((y_predict - y) * x1).mean()
    w2 = w2 - ((y_predict - y) * x2).mean()
    b = b - (y_predict - y).mean()
    ### bias는 해당 feature가 모두 1이라고 가정 할 수 있다.
    ### ((y_predict - y) * x3).mean()
    ### 1을 곱하는 것은 생략해도 되므로 (y_predict - y).mean()로 방향성을 고려한다.
    
print ("----" * 15)
print ("{:4} w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(epoch, w1, w2, b, error))

   0 w1 = 0.002931, w2 = 0.303485, b = 0.357255, error = 0.092982
   1 w1 = 0.028570, w2 = 0.321852, b = 0.352742, error = 0.087647
   2 w1 = 0.042528, w2 = 0.329126, b = 0.330211, error = 0.081640
   3 w1 = 0.061181, w2 = 0.341542, b = 0.319335, error = 0.076624
   4 w1 = 0.075493, w2 = 0.350087, b = 0.303426, error = 0.071634
   5 w1 = 0.090627, w2 = 0.359860, b = 0.291720, error = 0.067142
   6 w1 = 0.103802, w2 = 0.368065, b = 0.278966, error = 0.062837
   7 w1 = 0.116670, w2 = 0.376319, b = 0.268017, error = 0.058861
   8 w1 = 0.128399, w2 = 0.383767, b = 0.257201, error = 0.055107
   9 w1 = 0.139539, w2 = 0.390934, b = 0.247380, error = 0.051610
  10 w1 = 0.149862, w2 = 0.397571, b = 0.238006, error = 0.048325
  11 w1 = 0.159570, w2 = 0.403858, b = 0.229321, error = 0.045255
  12 w1 = 0.168620, w2 = 0.409734, b = 0.221130, error = 0.042377
  13 w1 = 0.177101, w2 = 0.415268, b = 0.213488, error = 0.039684
  14 w1 = 0.185024, w2 = 0.420457, b = 0.206311, error = 0.037163
  15 w1 = 

### Case 3 - Gradient Descent with Hyperparmeter

In [39]:
num_epoch = 100
learning_rate = 1.2 # Hyperparameter

w1 = np.random.uniform(low = 0.0, high = 1.0)
w2 = np.random.uniform(low = 0.0, high = 1.0)
b = np.random.uniform(low = 0.0, high = 1.0)

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2 + b

    error = np.abs(y_predict - y).mean()    
    if error < 0.0005:
        break
    
    print ("{:4} w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(epoch, w1, w2, b, error))
    
    w1 = w1 - learning_rate * ((y_predict - y) * x1).mean()
    w2 = w2 - learning_rate * ((y_predict - y) * x2).mean()
    b = b - learning_rate * (y_predict - y).mean()
    ### bias는 해당 feature가 모두 1이라고 가정 할 수 있다.
    ### ((y_predict - y) * x3).mean()
    ### 1을 곱하는 것은 생략해도 되므로 (y_predict - y).mean()로 방향성을 고려한다.
    
print ("----" * 15)
print ("{:4} w1 = {:.6f}, w2 = {:.6f}, b = {:.6f}, error = {:.6f}".format(epoch, w1, w2, b, error))

   0 w1 = 0.676730, w2 = 0.282931, b = 0.422228, error = 0.404995
   1 w1 = 0.391493, w2 = 0.052156, b = -0.063766, error = 0.345134
   2 w1 = 0.601307, w2 = 0.308545, b = 0.350395, error = 0.307493
   3 w1 = 0.383556, w2 = 0.135875, b = -0.018597, error = 0.261353
   4 w1 = 0.541333, w2 = 0.332415, b = 0.295027, error = 0.233500
   5 w1 = 0.375013, w2 = 0.203418, b = 0.014827, error = 0.197877
   6 w1 = 0.493564, w2 = 0.354223, b = 0.252279, error = 0.177343
   7 w1 = 0.366457, w2 = 0.258022, b = 0.039468, error = 0.149856
   8 w1 = 0.455456, w2 = 0.373852, b = 0.219213, error = 0.134718
   9 w1 = 0.358258, w2 = 0.302252, b = 0.057552, error = 0.113688
  10 w1 = 0.425007, w2 = 0.391315, b = 0.193586, error = 0.102360
  11 w1 = 0.350633, w2 = 0.338146, b = 0.070755, error = 0.086558
  12 w1 = 0.400641, w2 = 0.406710, b = 0.173682, error = 0.077792
  13 w1 = 0.343693, w2 = 0.367330, b = 0.080331, error = 0.066195
  14 w1 = 0.381114, w2 = 0.420178, b = 0.158186, error = 0.059222
  15 w1 