In [1]:
import numpy as np

In [152]:
def random_from(lo, hi):
    return (hi - lo) * np.random.random() + lo

def random_adjust_clipped(value, lo, hi):
    new_value = value + np.random.normal(scale=0.1)
    return np.clip(new_value, lo, hi)
#    return min(max(new_value, lo), hi)

def train_nn(lr, beta):
    return lr + beta

In [85]:
# 0.001, 0.01, 0.1, 1
lo = np.log10(0.0001)
hi = np.log10(1)
lo, hi

(-4.0, 0.0)

In [86]:
learning_rate = 10**random_from(lo, hi)
learning_rate

0.004358567008523179

In [102]:
# Hyperparamter for exponential smoothing
# 0.9 that is going to average the previous 10 values
# 0.999 that is going to aver the previous 1000 values
# beta is usually chosen from 0.9 to 0.9999
# 0.9, 0.99, 0.999, 0.9999
# we care less about seeing 0.91 (almost the same as 0.9)
# we care less abuot seeing 0.991 (almost the same as 0.99)
# 0.9005
# Exponential smoothing is more sensitive to values in the upper part of the range
# 0.900 --> 0.9005
# 0.999 --> 0.9995 (bigger impact on the final result)

# 0.9 = 1 - 0.1
# 0.99 = 1 - 0.01
# 0.999 = 1 - 0.001

lo = np.log10(1 - 0.9999)
hi = np.log10(1 - 0.9)

beta = 1 - 10**random_from(lo, hi)
beta

0.9995303919058545

# Grid Search

Worst method for today

In [105]:
learning_rates = [10**-r for r in range(4)]
betas = [1 - 10**-r for r in range(1, 5)]
learning_rates, betas

([1, 0.1, 0.01, 0.001], [0.9, 0.99, 0.999, 0.9999])

In [108]:
for lr in learning_rates:
    for beta in betas:
        loss = train_nn(lr, beta)
        print(lr, beta, loss)

1 0.9 1.9
1 0.99 1.99
1 0.999 1.999
1 0.9999 1.9999
0.1 0.9 1.0
0.1 0.99 1.09
0.1 0.999 1.099
0.1 0.9999 1.0999
0.01 0.9 0.91
0.01 0.99 1.0
0.01 0.999 1.009
0.01 0.9999 1.0099
0.001 0.9 0.901
0.001 0.99 0.991
0.001 0.999 1.0
0.001 0.9999 1.0009


In [113]:
# dropout_rates = [0.1 + i/10 for i in range(4)]
from itertools import product
for lr, beta in product(learning_rates, betas):
    loss = train_nn(lr, beta)
    print(lr, beta, loss)

1 0.9 1.9
1 0.99 1.99
1 0.999 1.999
1 0.9999 1.9999
0.1 0.9 1.0
0.1 0.99 1.09
0.1 0.999 1.099
0.1 0.9999 1.0999
0.01 0.9 0.91
0.01 0.99 1.0
0.01 0.999 1.009
0.01 0.9999 1.0099
0.001 0.9 0.901
0.001 0.99 0.991
0.001 0.999 1.0
0.001 0.9999 1.0009


In [114]:
lr_lo = np.log10(0.001)
lr_hi = np.log10(1)

bt_lo = np.log10(1 - .9999)
bt_hi = np.log10(1 - .9)

num_trials = 16

for _ in range(num_trials):
    lr = 10**random_from(lr_lo, lr_hi)
    beta = 1 - 10**random_from(bt_lo, bt_hi)
    loss = train_nn(lr, beta)
    print(lr, beta, loss)

0.03207635774177285 0.9990334026360121 1.031109760377785
0.00575246913142425 0.9998541035234998 1.0056065726549241
0.015029424642049014 0.9994572271233091 1.0144866517653581
0.11152061433860384 0.9909534957303098 1.1024741100689137
0.0015276874686569574 0.9789287502846554 0.9804564377533124
0.0011280762165611182 0.9991693767023269 1.000297452918888
0.009351747444670471 0.9988155652475417 1.0081673126922122
0.019671308067394894 0.9963956685217541 1.016066976589149
0.7861036901199939 0.9993059496883235 1.7854096398083175
0.029511306625670527 0.9981911158684348 1.0277024224941054
0.001457400915278945 0.9992205683606564 1.0006779692759353
0.0013452125973471706 0.999793431315459 1.001138643912806
0.016691075314099735 0.9994996501483603 1.01619072546246
0.018035267174211833 0.9914178765300831 1.0094531437042948
0.0014662933468557967 0.9995055884589077 1.0009718818057636
0.23482932629802658 0.9835173247813805 1.2183466510794072


# Stochastic Hill Climbing

In [191]:
def hill_climber():
    lr_lo = np.log10(0.001)
    lr_hi = np.log10(1)
    lr_param = random_from(lr_lo, lr_hi)

    bt_lo = np.log10(1 - .9999)
    bt_hi = np.log10(1 - .9)
    bt_param = random_from(bt_lo, bt_hi)

    lr = 10**lr_param
    beta = 1 - 10**bt_param

    best_loss = train_nn(lr, beta)
#     print(best_loss)

    for _ in range(num_trials):

        new_lr_param = random_adjust_clipped(lr_param, lr_lo, lr_hi)
        new_bt_param = random_adjust_clipped(bt_param, bt_lo, bt_hi)

        new_lr = 10**new_lr_param
        new_beta = 1 - 10**new_bt_param

        new_loss = train_nn(new_lr, new_beta)

        if new_loss < best_loss:
            best_loss = new_loss
            lr = new_lr
            beta = new_beta

#         print(best_loss)
    
    print("Best loss was:", best_loss)

In [192]:
for _ in range(5):
    hill_climber()

Best loss was: 0.9440480120457533
Best loss was: 1.018834819679194
Best loss was: 1.0618571674066508
Best loss was: 0.9938809574677866
Best loss was: 1.0465753551620418
