In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
# create our x matrix
x = np.array([[-2, -5, -3, 0, -6, -2, 1, 5, -1, 3]]).T
print("X:")
print(x)

# create our y matrix
y = np.array([[1, -4, 1, 3, 11, 5, 0, -1, -3, 1]]).T
print("\nY:")
print(y)

X:
[[-2]
 [-5]
 [-3]
 [ 0]
 [-6]
 [-2]
 [ 1]
 [ 5]
 [-1]
 [ 3]]

Y:
[[ 1]
 [-4]
 [ 1]
 [ 3]
 [11]
 [ 5]
 [ 0]
 [-1]
 [-3]
 [ 1]]


In [3]:
# Normalize our x data
x_mean = np.mean(x, axis=0)
x_std = np.std(x, axis=0, ddof=1)
x_norm = (x-x_mean) / x_std

print("x mean: " + str(x_mean))
print("x std dev: " + str(x_std))
print("\nx matrix normalized:")
print(x_norm)

x mean: [-1.]
x std dev: [3.39934634]

x matrix normalized:
[[-0.2941742 ]
 [-1.17669681]
 [-0.58834841]
 [ 0.2941742 ]
 [-1.47087101]
 [-0.2941742 ]
 [ 0.58834841]
 [ 1.76504522]
 [ 0.        ]
 [ 1.17669681]]


In [4]:
# Add our bias
x_norm = np.hstack((np.ones((x_norm.shape[0], 1)), x_norm))
print("Normalized x matrix with bias:")
print(x_norm)

Normalized x matrix with bias:
[[ 1.         -0.2941742 ]
 [ 1.         -1.17669681]
 [ 1.         -0.58834841]
 [ 1.          0.2941742 ]
 [ 1.         -1.47087101]
 [ 1.         -0.2941742 ]
 [ 1.          0.58834841]
 [ 1.          1.76504522]
 [ 1.          0.        ]
 [ 1.          1.17669681]]


In [5]:
# lets visually represent a step by step to compute our thetas...
print("thetas = inverse(xT @ x) @ (xT @ y) -- where x is standardized")

print("\nxT:")
print(x_norm.T)

print("\nxT @ x:")
print(x_norm.T @ x_norm)

print("\ninverse(xT @ x):")
print(np.linalg.inv(x_norm.T @ x_norm))

print("\nxT @ y:")
print(x_norm.T @ y)

# normalize our x data then 1 shot computation of normal form of function
thetas = np.linalg.inv(x_norm.T@x_norm) @ (x_norm.T@y)
print("\nthetas:")
print(thetas)

thetas = inverse(xT @ x) @ (xT @ y) -- where x is standardized

xT:
[[ 1.          1.          1.          1.          1.          1.
   1.          1.          1.          1.        ]
 [-0.2941742  -1.17669681 -0.58834841  0.2941742  -1.47087101 -0.2941742
   0.58834841  1.76504522  0.          1.17669681]]

xT @ x:
[[10.  0.]
 [ 0.  9.]]

inverse(xT @ x):
[[0.1        0.        ]
 [0.         0.11111111]]

xT @ y:
[[ 14.        ]
 [-13.53201332]]

thetas:
[[ 1.4       ]
 [-1.50355704]]


In [6]:
# comparison to the sklearn function
reg = LinearRegression(fit_intercept=False).fit(x_norm, y)

# quick transpose to convert into column vector for visual comparison
print("thetas via sklearn linear regression:")
print(np.array(reg.coef_).T)

thetas via sklearn linear regression:
[[ 1.4       ]
 [-1.50355704]]
