In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import autograd.numpy as np
from autograd import grad

from GradientDescent import Plain, Stochastic
from utils import analytic_grad_OLS, analytic_grad_Ridge, cost_OLS, cost_Ridge

In [2]:
degree = 2
n = 100

x = np.linspace(0, 10, n)

X = np.empty((n, degree+1))
for d in range(degree + 1):
    X[:,d] = x**d

y = 3 * x**2 + 2 * x + 5

# not sure if we even want to split??
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [3]:
# scale if necessary?? 
# scaler = StandardScaler()
# X_train[:,1:] = scaler.fit_transform(X_train[:,1:])
# X_test[:,1:] = scaler.transform(X_test[:,1:])

In [4]:
# initialize beta (must be in a list)
beta = [np.random.randn(degree+1)]

In [5]:
lmbda= 0.1

beta_analytic_OLS = np.linalg.inv(X_train.T @ X_train) @ X_train.T @ y_train
beta_analytic_Ridge = np.linalg.inv(X_train.T @ X_train + lmbda * np.eye(len(X[0]))) @ X_train.T @ y_train

In [14]:
# plain with analytic gradient OLS

optimizer = Plain(lr=0.01, max_iter=1000, momentum=0.3, tuner="adam")
optimizer.set_gradient(analytic_grad_OLS)
beta_prediction = optimizer.gradient_descent(X_train, beta, y_train)

print(f"analytic solution: {beta_analytic_OLS}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 10000/10000 [00:00<00:00, 95141.94it/s]

analytic solution: [5. 2. 3.]
computed solution: [4.99814174 1.99814174 2.99814174]





In [7]:
# plain with analytic gradient Ridge

optimizer = Plain(lr=0.01, max_iter=1000, momentum=0.3, tuner="adam")
optimizer.set_gradient(analytic_grad_Ridge(lmbda=lmbda))
beta_prediction = optimizer.gradient_descent(X_train, beta, y_train)

print(f"analytic solution: {beta_analytic_Ridge}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 1000/1000 [00:00<00:00, 88894.39it/s]

analytic solution: [4.95097963 2.01866611 2.99847426]
computed solution: [4.90562551 2.04006184 2.99441998]





In [8]:
# plain with autograd OLS

optimizer = Plain(lr=0.01, max_iter=1000, momentum=0.3, tuner="adam")
gradient = grad(cost_OLS, 1)
optimizer.set_gradient(gradient)
beta_prediction = optimizer.gradient_descent(X_train, beta, y_train)

print(f"analytic solution: {beta_analytic_OLS}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 1000/1000 [00:00<00:00, 22327.35it/s]

analytic solution: [5. 2. 3.]
computed solution: [4.90562551 2.04006184 2.99441998]





In [9]:
# plain with autograd Ridge

optimizer = Plain(lr=0.01, max_iter=1000, momentum=0.3, tuner="adam")
cost_ridge = cost_Ridge(lmbda=lmbda)
gradient = grad(cost_ridge, 1)
optimizer.set_gradient(gradient)
beta_prediction = optimizer.gradient_descent(X_train, beta, y_train)

print(f"analytic solution: {beta_analytic_Ridge}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 1000/1000 [00:00<00:00, 11074.54it/s]

analytic solution: [4.95097963 2.01866611 2.99847426]
computed solution: [4.90562551 2.04006184 2.99441998]





In [10]:
# stochastic with analytic gradient OLS

optimizer = Stochastic(lr=0.1, lr_schedule="linear", momentum=0.3, tuner="adam", M=10, n_epochs=100)
optimizer.set_gradient(analytic_grad_OLS)
beta_prediction = optimizer.gradient_descent(X_train, beta, y_train)

print(f"analytic solution: {beta_analytic_OLS}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 100/100 [00:00<00:00, 13341.93it/s]

analytic solution: [5. 2. 3.]
computed solution: [4.90562551 2.04006184 2.99441998]





In [11]:
# stochastic with analytic gradient Ridge

optimizer = Stochastic(lr=0.1, lr_schedule="linear", momentum=0.3, tuner="adam", M=10, n_epochs=100)
optimizer.set_gradient(analytic_grad_Ridge(lmbda=lmbda))
beta_prediction = optimizer.gradient_descent(X_train, beta, y_train)

print(f"analytic solution: {beta_analytic_Ridge}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 100/100 [00:00<00:00, 13539.18it/s]

analytic solution: [4.95097963 2.01866611 2.99847426]
computed solution: [4.90562551 2.04006184 2.99441998]





In [12]:
# stochastic with autograd OLS

optimizer = Stochastic(lr=0.1, lr_schedule="linear", momentum=0.3, tuner="adam", M=10, n_epochs=100)
gradient = grad(cost_OLS, 1)
optimizer.set_gradient(gradient)
beta_prediction = optimizer.gradient_descent(X_train, beta, y_train)

print(f"analytic solution: {beta_analytic_OLS}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 100/100 [00:00<00:00, 1258.90it/s]

analytic solution: [5. 2. 3.]
computed solution: [4.90562551 2.04006184 2.99441998]





In [13]:
# stochastic with autograd Ridge

optimizer = Stochastic(lr=0.1, lr_schedule="linear", momentum=0.3, tuner="adam", M=10, n_epochs=100)
cost_ridge = cost_Ridge(lmbda=lmbda)
gradient = grad(cost_ridge, 1)
optimizer.set_gradient(gradient)
beta_prediction = optimizer.gradient_descent(X_train, beta, y_train)

print(f"analytic solution: {beta_analytic_Ridge}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 100/100 [00:00<00:00, 1929.57it/s]

analytic solution: [4.95097963 2.01866611 2.99847426]
computed solution: [4.90562551 2.04006184 2.99441998]



