In [1]:
# Training models with Gradient Descent
import numpy as np

X = 2 * np.random.randn(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

In [2]:
X_b = np.c_[np.ones((100, 1)), X]
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y) # applying the normal equation to find ideal parameter values
theta_best # ideal value should be 4 and 3

array([[ 3.93217582],
       [ 3.03116889]])

In [3]:
# Simple batch gradient algorithm
eta = 0.1 # learngin rate
n_iterations = 1000
m = 100

theta = np.random.randn(2,1) # random initialization

for iterations in range(n_iterations):
    gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y)
    theta = theta - eta * gradients
theta

array([[ 3.93217582],
       [ 3.03116889]])

In [4]:
# Stochastic gradient descent with simple learning schedule (for simulated annealing process)
n_epochs = 50
t0, t1 = 5, 50  # learning schedule hyperparameters

def learning_schedule(t):
    return t0 / (t + t1)

theta = np.random.randn(2, 1)  # random initialization

for epoch in range(n_epochs):
    for i in range(m):
        random_index = np.random.randint(m)
        xi = X_b[random_index:random_index+1]
        yi = y[random_index:random_index+1]
        gradients = 2 * xi.T.dot(xi.dot(theta) - yi)
        eta = learning_schedule(epoch * m + i)
        theta = theta - eta * gradients  # gradient step function
theta # pretty good result

array([[ 3.9314492 ],
       [ 2.98276792]])

In [5]:
# Stochastic gradient descent with sklearn
from sklearn.linear_model import SGDRegressor
sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.1)
sgd_reg.fit(X, y.ravel())
sgd_reg.intercept_, sgd_reg.coef_ # good results



(array([ 3.99621408]), array([ 3.07183848]))

In [6]:
X_b[1:4]

array([[ 1.        , -2.37071429],
       [ 1.        , -3.00778266],
       [ 1.        , -1.64767276]])

In [7]:
# Polynomial regression
m = 100
X = 6 * np.random.rand(m,1) - 3
y = 0.5 * X**2 + X + 2 + np.random.randn(m,1) # non-linear data


In [8]:
# using sklearn PolynomialFeatures to add 2nd degree polynomial as a new feature
from sklearn.preprocessing import PolynomialFeatures
poly_features = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly_features.fit_transform(X)
# added X^2 column to data set, i.e. [X, X^2]

In [10]:
# Apply linear regression
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_poly, y)
lin_reg.intercept_, lin_reg.coef_

(array([ 1.85111268]), array([[ 1.01551605,  0.55363044]]))

In [None]:
# Learning cur