In [44]:
import time
import numpy as np
import matplotlib.pyplot as plt

from tabulate import tabulate
from copy import deepcopy

from features import PreprocessData
from linear_regression import linear_closed_form, linear_gradient_descent

ppd = PreprocessData()

# Split dataset
train, validation, test = ppd.preprocess_data(ppd.data)

In [2]:
# Compute most common words from 
ppd.compute_most_common_words(train)

## Experiments
### 1 - Compare runtime, stability and performance of closed-form and gradient descent

In [3]:
# Compute features on training set
start = time.time()
X_train, y_train = ppd.compute_features(train, simple=True)
feat_train_runtime = time.time() - start
print(X_train.shape)

# Compute features on validation set
start = time.time()
X_valid, y_valid = ppd.compute_features(validation, simple=True)
feat_valid_runtime = time.time() - start
print(X_valid.shape)

print(f'Training features runtime: {feat_train_runtime}')
print(f'Validation features runtime: {feat_valid_runtime}')

(10000, 4)
(1000, 4)
Training features runtime: 0.008931159973144531
Validation features runtime: 0.0010755062103271484


In [4]:
def test_closed_vs_gradient(X_train, y_train, X_valid, y_valid, hyperparams, rand_init=False):
    # Train using closed form method
    start = time.time()
    w_closed = linear_closed_form(X_train, y_train)
    w_closed_runtime = time.time() - start
    
    # Train using gradient descent
    # Hyperparameters
    w_init = np.random.rand(X_train.shape[1]) if rand_init else np.zeros(X_train.shape[1])
    decay_speed = hyperparams['decay_speed']
    learn_rate = hyperparams['learn_rate']
    min_err = hyperparams['min_err']
    max_iter = hyperparams['max_iter']

    start = time.time()
    w_grad = linear_gradient_descent(X_train, y_train, w_init, decay_speed, learn_rate, min_err, max_iter, verbose=True)
    w_grad_runtime = time.time() - start
    
    # Compute MSE on training set
    y_closed_train = np.matmul(X_train, w_closed)
    mse_closed_train = np.sum((y_closed_train - y_train)**2)/len(y_train)

    y_grad_train = np.matmul(X_train, w_grad)
    mse_grad_train = np.sum((y_grad_train - y_train)**2)/len(y_train)
    
    # Compute MSE on validation set
    y_closed_valid = np.matmul(X_valid, w_closed)
    mse_closed_valid = np.sum((y_closed_valid - y_valid)**2)/len(y_valid)

    y_grad_valid = np.matmul(X_valid, w_grad)
    mse_grad_valid = np.sum((y_grad_valid - y_valid)**2)/len(y_valid)
    
    return {'train': {'closed': mse_closed_train, 'grad': mse_grad_train}, 'validation': {'closed': mse_closed_valid, 'grad': mse_grad_valid}, 'runtime': {'closed': w_closed_runtime, 'grad': w_grad_runtime}}

#### 1.1 - Testing random w0 vs. zero w0

In [5]:
hyperparams = {'decay_speed': 10**(-12), 'learn_rate': 10**(-6), 'min_err': 10**(-7), 'max_iter': 1000000}
rand_runtime = []
zero_runtime = []
for _ in range(20):
    perf_rand = test_closed_vs_gradient(X_train, y_train, X_valid, y_valid, hyperparams, rand_init=True)
    perf_zero = test_closed_vs_gradient(X_train, y_train, X_valid, y_valid, hyperparams, rand_init=False)
    rand_runtime.append(perf_rand['runtime']['grad'])
    zero_runtime.append(perf_zero['runtime']['grad'])

print(f'Average runtime with random init: {sum(rand_runtime)/len(rand_runtime)}')
print(f'Average runtime with zero init: {sum(zero_runtime)/len(zero_runtime)}')

Error: 3.120570662003951e-05 | Learning rate: 9.999999900000001e-07
Error: 3.104680852799975e-06 | Learning rate: 9.999999800000003e-07
Error: 3.088872024731313e-07 | Learning rate: 9.99999970000001e-07
Finished after 34887 iterations
Error: 2.5246314222317527e-05 | Learning rate: 9.999999900000001e-07
Error: 2.511776109700652e-06 | Learning rate: 9.999999800000003e-07
Error: 2.4989863144651346e-07 | Learning rate: 9.99999970000001e-07
Finished after 33968 iterations
Error: 4.4887670599719864e-05 | Learning rate: 9.999999900000001e-07
Error: 4.465910454719902e-06 | Learning rate: 9.999999800000003e-07
Error: 4.443170336417149e-07 | Learning rate: 9.99999970000001e-07
Finished after 36462 iterations
Error: 2.5246314222317527e-05 | Learning rate: 9.999999900000001e-07
Error: 2.511776109700652e-06 | Learning rate: 9.999999800000003e-07
Error: 2.4989863144651346e-07 | Learning rate: 9.99999970000001e-07
Finished after 33968 iterations
Error: 2.5420088100052258e-05 | Learning rate: 9.999999

Error: 2.5246314222317527e-05 | Learning rate: 9.999999900000001e-07
Error: 2.511776109700652e-06 | Learning rate: 9.999999800000003e-07
Error: 2.4989863144651346e-07 | Learning rate: 9.99999970000001e-07
Finished after 33968 iterations
Error: 4.2686863862556586e-05 | Learning rate: 9.999999900000001e-07
Error: 4.246950422058295e-06 | Learning rate: 9.999999800000003e-07
Error: 4.2253252350184315e-07 | Learning rate: 9.99999970000001e-07
Finished after 36244 iterations
Error: 2.5246314222317527e-05 | Learning rate: 9.999999900000001e-07
Error: 2.511776109700652e-06 | Learning rate: 9.999999800000003e-07
Error: 2.4989863144651346e-07 | Learning rate: 9.99999970000001e-07
Finished after 33968 iterations
Error: 3.978073219687879e-05 | Learning rate: 9.999999900000001e-07
Error: 3.957817045017951e-06 | Learning rate: 9.999999800000003e-07
Error: 3.937664104267556e-07 | Learning rate: 9.99999970000001e-07
Finished after 35939 iterations
Error: 2.5246314222317527e-05 | Learning rate: 9.99999

In [48]:
learn_rates = [10**(-5), 10**(-6), 10**(-7), 10**(-8), 10**(-9), 10**(-10)]
decay_speeds = [10**(-5), 10**(-6), 10**(-7), 10**(-8), 10**(-9), 10**(-10)]
min_error = 10**(-6)

train_grad_errors = []
valid_grad_errors = []
grad_runtimes = []
for learn_rate in learn_rates:
    train_err = []
    valid_err = []
    run = []
    for decay_speed in decay_speeds:
        print(f'Testing with learn_rate: {learn_rate} and decay_speed: {decay_speed}')
        hyperparams = {'decay_speed': decay_speed, 'learn_rate': learn_rate, 'min_err': min_error, 'max_iter': 10000000}
        perf = test_closed_vs_gradient(X_train, y_train, X_valid, y_valid, hyperparams)
        train_err.append(perf['train']['grad'])
        valid_err.append(perf['validation']['grad'])
        run.append(perf['runtime']['grad'])
    train_grad_errors.append(train_err)
    valid_grad_errors.append(valid_err)
    grad_runtimes.append(run)

Testing with learn_rate: 1e-05 and decay_speed: 1e-05
Finished after 3437 iterations
Testing with learn_rate: 1e-05 and decay_speed: 1e-06
Finished after 3398 iterations
Testing with learn_rate: 1e-05 and decay_speed: 1e-07
Finished after 3394 iterations
Testing with learn_rate: 1e-05 and decay_speed: 1e-08
Finished after 3394 iterations
Testing with learn_rate: 1e-05 and decay_speed: 1e-09
Finished after 3394 iterations
Testing with learn_rate: 1e-05 and decay_speed: 1e-10
Finished after 3394 iterations
Testing with learn_rate: 1e-06 and decay_speed: 1e-05
Finished after 25853 iterations
Testing with learn_rate: 1e-06 and decay_speed: 1e-06
Finished after 24175 iterations
Testing with learn_rate: 1e-06 and decay_speed: 1e-07
Finished after 24009 iterations
Testing with learn_rate: 1e-06 and decay_speed: 1e-08
Finished after 23992 iterations
Testing with learn_rate: 1e-06 and decay_speed: 1e-09
Finished after 23991 iterations
Testing with learn_rate: 1e-06 and decay_speed: 1e-10
Finish

In [58]:
data = deepcopy(train_grad_errors)
for err, learn_rate in zip(data, learn_rates):
    err.append(learn_rate)
    err.reverse()
table = tabulate(data, headers=['Learn rate/decay speed'] + decay_speeds)
print('MSE for gradient descent on training set:\n')
print(table)

MSE for gradient descent on training set:

  Learn rate/decay speed    1e-05    1e-06    1e-07    1e-08    1e-09    1e-10
------------------------  -------  -------  -------  -------  -------  -------
                   1e-05  1.08468  1.08468  1.08468  1.08468  1.08468  1.08468
                   1e-06  1.08468  1.08468  1.08468  1.08468  1.08468  1.08468
                   1e-07  1.0847   1.0847   1.0847   1.08471  1.08471  1.08484
                   1e-08  1.08685  1.08685  1.08687  1.08702  1.08849  1.09433
                   1e-09  1.10921  1.10922  1.10927  1.10975  1.1141   1.13619
                   1e-10  1.28816  1.28822  1.28886  1.2952   1.35573  1.7013


In [59]:
data = deepcopy(valid_grad_errors)
for err, learn_rate in zip(data, learn_rates):
    err.append(learn_rate)
    err.reverse()
table = tabulate(data, headers=['Learn rate/decay speed'] + decay_speeds)
print('MSE for gradient descent on validation set:\n')
print(table)

MSE for gradient descent on validation set:

  Learn rate/decay speed    1e-05    1e-06    1e-07    1e-08    1e-09    1e-10
------------------------  -------  -------  -------  -------  -------  -------
                   1e-05  1.02033  1.02033  1.02033  1.02033  1.02033  1.02033
                   1e-06  1.02034  1.02034  1.02034  1.02034  1.02035  1.02035
                   1e-07  1.02052  1.02052  1.02052  1.02052  1.02055  1.02091
                   1e-08  1.02362  1.02362  1.02364  1.02381  1.02533  1.03106
                   1e-09  1.04876  1.04876  1.04882  1.04938  1.0544   1.07942
                   1e-10  1.23434  1.23441  1.23505  1.24146  1.30268  1.65364


In [62]:
data = deepcopy(grad_runtimes)
for runtime, learn_rate in zip(data, learn_rates):
    runtime.append(learn_rate)
    runtime.reverse()
table = tabulate(data, headers=['Learn rate/decay speed'] + decay_speeds)
print('Runtime (s) for gradient descent:\n')
print(table)

Runtime (s) for gradient descent:

  Learn rate/decay speed      1e-05      1e-06      1e-07      1e-08      1e-09      1e-10
------------------------  ---------  ---------  ---------  ---------  ---------  ---------
                   1e-05  0.0221374  0.0220525  0.0231824  0.0221987  0.0247433  0.0257566
                   1e-06  0.202772   0.180613   0.166153   0.173438   0.156009   0.201105
                   1e-07  1.06012    0.908098   1.09969    0.993882   1.27826    1.87776
                   1e-08  2.73481    2.69563    2.67626    2.68189    2.21886    0.81861
                   1e-09  1.72633    1.4209     1.56289    1.34611    1.39479    0.843215
                   1e-10  1.92435    2.0231     2.48358    1.62809    1.62576    0.978847
