In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import autograd.numpy as np
from autograd import grad

from GradientDescent import Plain, Stochastic
import utils

In [2]:
n = 100
degree = 2
x = np.linspace(0, 1, n)

X = np.empty((n, degree+1))
X[:,0] = 1
X[:,1] = x
X[:,2] = x**2

y =  3 + 7*x + 5*x**2

X_train, X_test, y_train, y_test = train_test_split(X, y)

scaler = StandardScaler()
X_train[:,1:] = scaler.fit_transform(X_train[:,1:])
X_test[:,1:] = scaler.transform(X_test[:,1:])

analytic_beta = [utils.analytic_beta_OLS(X_train, y_train)]

beta = np.ones(degree+1) * 0.1

In [3]:
lmbda= 0.1

beta_analytic_OLS = np.linalg.inv(X_train.T @ X_train) @ X_train.T @ y_train
beta_analytic_Ridge = np.linalg.inv(X_train.T @ X_train + lmbda * np.eye(len(X[0]))) @ X_train.T @ y_train

In [4]:
# plain with analytic gradient OLS

optimizer = Plain(lr=0.1, max_iter=2000, momentum=0.5, tuner="adam")
optimizer.set_gradient(utils.analytic_grad_OLS)
beta_prediction = optimizer.gradient_descent(X_train, [np.copy(beta)], y_train)

print(f"analytic solution: {beta_analytic_OLS}")
print(f"computed solution: {beta_prediction[0]}")
print(f"cost: {utils.cost_OLS(X_test, beta_prediction, y_test)}")

100%|██████████| 2000/2000 [00:00<00:00, 61292.90it/s]

analytic solution: [8.28916097 2.0324426  1.49393092]
computed solution: [8.07925122 2.21578373 1.67727204]
cost: [np.float64(0.20402075555865423)]





In [12]:
# plain with analytic gradient Ridge

optimizer = Plain(lr=0.01, max_iter=1000, momentum=0.3, tuner="adam")
optimizer.set_gradient(utils.analytic_grad_Ridge(lmbda=lmbda))
beta_prediction = optimizer.gradient_descent(X_train, [np.copy(beta)], y_train)

print(f"analytic solution: {beta_analytic_Ridge}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 1000/1000 [00:00<00:00, 41352.93it/s]

analytic solution: [8.27812347 2.02144069 1.50254132]
computed solution: [8.2836595  2.02661775 1.49855956]





In [None]:
# plain with autograd OLS

# optimizer = Plain(lr=0.01, max_iter=1000, momentum=0.3, tuner="adam")
# gradient = grad(utils.cost_OLS, 1)
# print(utils.cost_OLS(X_train, [np.copy(beta)], y_train))
# print(gradient(X_train, [np.copy(beta)], y_train))
# optimizer.set_gradient(gradient)
# beta_prediction = optimizer.gradient_descent(X_train, [np.copy(beta)], y_train)

# print(f"analytic solution: {beta_analytic_OLS}")
# print(f"computed solution: {beta_prediction[0]}")
# print(f"cost: {utils.cost_OLS(X_test, beta_prediction, y_test)}")



[np.float64(77.93706688384918)]
[array([0., 0., 0.])]


  1%|          | 10/1000 [00:00<00:00, 4714.82it/s]

analytic solution: [8.28916097 2.0324426  1.49393092]
computed solution: [0.1 0.1 0.1]
cost: [np.float64(71.5191106850619)]





In [None]:
# plain with autograd Ridge

# optimizer = Plain(lr=0.01, max_iter=1000, momentum=0.3, tuner="adam")
# cost_ridge = utils.cost_Ridge(lmbda=lmbda)
# gradient = grad(cost_ridge, 1)
# optimizer.set_gradient(gradient)
# beta_prediction = optimizer.gradient_descent(X_train, [np.copy(beta)], y_train)

# print(f"analytic solution: {beta_analytic_Ridge}")
# print(f"computed solution: {beta_prediction[0]}")

  1%|          | 10/1000 [00:00<00:00, 5573.83it/s]

analytic solution: [8.27812347 2.02144069 1.50254132]
computed solution: [0.1 0.1 0.1]





In [20]:
# stochastic with analytic gradient OLS

optimizer = Stochastic(lr=0.1, lr_schedule="linear", momentum=0.5, tuner="adam", M=4, n_epochs=10)
optimizer.set_gradient(utils.analytic_grad_OLS)
beta_prediction = optimizer.gradient_descent(X_train, [np.copy(beta)], y_train)

print(f"analytic solution: {beta_analytic_OLS}")
print(f"computed solution: {beta_prediction[0]}")
print(f"cost: {utils.cost_OLS(X_test, beta_prediction, y_test)}")

100%|██████████| 10/10 [00:00<00:00, 1975.84it/s]

analytic solution: [8.28916097 2.0324426  1.49393092]
computed solution: [8.72051269 1.68204267 0.55136616]
cost: [np.float64(2.0865344224114293)]





In [None]:
# stochastic with analytic gradient Ridge

optimizer = Stochastic(lr=0.1, lr_schedule="linear", momentum=0.3, tuner="adam", M=10, n_epochs=100)
optimizer.set_gradient(analytic_grad_Ridge(lmbda=lmbda))
beta_prediction = optimizer.gradient_descent(X_train, beta, y_train)

print(f"analytic solution: {beta_analytic_Ridge}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 100/100 [00:00<00:00, 14056.92it/s]

analytic solution: [4.94269621 2.02145176 2.99827824]
computed solution: [-1515.62642324 -1517.6454077  -1516.46917111]





In [None]:
# stochastic with autograd OLS

optimizer = Stochastic(lr=0.1, lr_schedule="linear", momentum=0.3, tuner="adam", M=10, n_epochs=100)
gradient = grad(cost_OLS, 1)
optimizer.set_gradient(gradient)
beta_prediction = optimizer.gradient_descent(X_train, beta, y_train)

print(f"analytic solution: {beta_analytic_OLS}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 100/100 [00:00<00:00, 3334.24it/s]

analytic solution: [5. 2. 3.]
computed solution: [-1515.62642324 -1517.6454077  -1516.46917111]





In [None]:
# stochastic with autograd Ridge

optimizer = Stochastic(lr=0.1, lr_schedule="linear", momentum=0.3, tuner="adam", M=10, n_epochs=100)
cost_ridge = cost_Ridge(lmbda=lmbda)
gradient = grad(cost_ridge, 1)
optimizer.set_gradient(gradient)
beta_prediction = optimizer.gradient_descent(X_train, beta, y_train)

print(f"analytic solution: {beta_analytic_Ridge}")
print(f"computed solution: {beta_prediction[0]}")

100%|██████████| 100/100 [00:00<00:00, 1397.58it/s]

analytic solution: [4.94269621 2.02145176 2.99827824]
computed solution: [-1515.62642324 -1517.6454077  -1516.46917111]



