In [57]:
from sklearn.datasets import load_boston
from sklearn.metrics import explained_variance_score
import numpy as np

x = load_boston().data
y = load_boston().target

In [58]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=29)

x_train.shape

(379, 13)

In [59]:
def polynom(m, deg):
    n_rows, n_cols = m.shape[0], m.shape[1]
    nm = np.zeros((m.shape[0],n_cols*deg))
    for d in range(deg):
        for c in range(n_cols):
            for r in range(n_rows):
                nm[r][n_cols*d+c] = m[r][c]**(d+1)
    return nm

In [60]:
# scaling
def scaling(X):
    for c in range(X.shape[1]):
        X[:,c] = [(i - np.mean(X[:,c])) / (max(X[:,c]) - min(X[:,c])) for i in X[:,c]]
    return X

# normalize
def normal(X):
    return X / X.max(axis=0)

In [61]:
def predict(X, theta):
    return np.c_[np.ones(len(X)), normal(X)] @ theta

In [62]:
def normalEq(X, y):
    X = np.c_[np.ones(len(X)), normal(X)]
    return (np.linalg.inv(X.T @ X) @ X.T @ y).T

theta = normalEq(x_train, y_train)

print(theta)

preds = predict(x_test, theta)

print(f'\nNorm Eq EVS: {explained_variance_score(preds, y_test)}')

[ 35.98189827  -6.88140442   5.31800698   1.01840583   3.43338847
 -12.89840309  31.08338357  -0.25493095 -16.56781587   7.73387839
  -9.19866233 -21.23376392   4.39090559 -20.92958659]

Norm Eq EVS: 0.717097886997718


$X^{'}X=\begin{bmatrix} 7 & 38.5\\  38.5& 218.75 \end{bmatrix}$

In [63]:
def BGD(X, y, n_iter, eta):
    X = np.c_[np.ones(len(X)), normal(X)]
    y = np.array([y.T])
    theta = np.random.rand(1, X.shape[1])
    for i in range(n_iter):
        for c in range(X.shape[1]):
            nX = X[:,c]
            nT = theta[:,c]
            gradients = ((X @ theta.T) - y.T).T @ nX
            theta[:,c] = nT - eta * gradients
    return theta.T

theta = BGD(polynom(x_train, 2), y_train, 1000, 0.001)

print(theta)

preds = predict(polynom(x_test, 2), theta)

print(f'\nBGD EVS: {explained_variance_score(preds, y_test)}')

[[ 21.8931335 ]
 [ -5.43487448]
 [  0.39796692]
 [ -2.42543318]
 [  1.89296495]
 [  0.72446924]
 [ 12.31921992]
 [ -3.8861149 ]
 [ -9.21228673]
 [  6.66478731]
 [ -3.68334728]
 [ -2.10035867]
 [  6.26678892]
 [-26.30110318]
 [ -1.50203824]
 [  4.53380238]
 [  4.13137407]
 [  1.61333946]
 [ -7.28443507]
 [ 19.95960147]
 [  3.17775833]
 [ -2.71401281]
 [  0.06367914]
 [ -4.11859494]
 [ -9.39385649]
 [ -1.10433132]
 [ 10.49937459]]

BGD EVS: 0.7402080039764631


In [64]:
import time

start = time.clock()

def learning_schedule(t):
    return 5 / (t + 50)

def SGD(X, y, n_epoch, eta):
    X = np.c_[np.ones(len(X)), normal(X)]
    y = np.array([y.T])
    theta = np.random.rand(1, X.shape[1])
    m = X.shape[0]
    for epoch in range(1, n_epoch):
        for n in range(m):
            for c in range(X.shape[1]):
                r = np.random.randint(0, m-1)
                xi, yi = X[r:r+1], y.T[r:r+1]
                gradients = ((xi @ theta.T) - yi.T).T @ xi[:,c]
#                 eta = eta/epoch
                theta[:,c] = theta[:,c] - (gradients * eta)
    return theta.T

theta = SGD(polynom(x_train, 2), y_train, 50, 0.01)

print(theta)

preds = predict(polynom(x_test, 2), theta)

print(f'\nSGD EVS: {explained_variance_score(preds, y_test)}')

print(time.clock() - start)

[[  8.13744458]
 [ -1.49712867]
 [  2.03728367]
 [ -0.49084929]
 [  4.40733603]
 [  8.95151053]
 [ 22.15881621]
 [  8.03065796]
 [ -6.49531498]
 [  2.02944954]
 [ -4.78797264]
 [  0.87613155]
 [  9.02382867]
 [-16.98927621]
 [ -2.16044495]
 [  3.77697549]
 [  1.80657927]
 [ -1.33602727]
 [ -9.57735991]
 [ 13.93369507]
 [ -7.33688433]
 [ -4.55990506]
 [  5.9088325 ]
 [ -5.74020734]
 [-13.52924355]
 [ -1.30876696]
 [  1.38733344]]

SGD EVS: 0.7162274658139319
4.337443999999998


In [65]:
import time

start = time.clock()

def MBGD(X, y, n_epoch, batch_size, eta):
    X = np.c_[np.ones(len(X)), normal(X)]
    y = np.array([y.T])
    theta = np.random.rand(1, X.shape[1])
    m = X.shape[0]
    for epoch in range(n_epoch):
        shuffi = np.random.permutation(m)
        xs, ys = X[shuffi], y.T[shuffi]
        for i in range(0, m, batch_size):
            for c in range(X.shape[1]):
                xi, yi = X[i:i+batch_size], y.T[i:i+batch_size]
                gradients = ((xi @ theta.T) - yi).T @ xi[:,c]
                theta[:,c] = theta[:,c] - (gradients * eta)
    return theta.T
        
theta = MBGD(polynom(x_train, 2), y_train, 500, 64, 0.01)

print(theta)

preds = predict(polynom(x_test, 2), theta)

print(f'\n MBGD EVS: {explained_variance_score(preds, y_test)}')

print(time.clock() - start)

[[ 37.04309579]
 [-16.75859051]
 [ -3.89250989]
 [ -9.40240935]
 [  5.93247627]
 [  2.56530559]
 [ -3.35917434]
 [ -5.92062448]
 [-17.38539747]
 [ 11.06770553]
 [ -5.32112355]
 [  5.24106173]
 [ 12.54123495]
 [-55.40492418]
 [  8.26405065]
 [  7.37683227]
 [ 10.86916509]
 [ -2.60188315]
 [-10.2671643 ]
 [ 24.10659902]
 [  5.92829214]
 [  4.96700177]
 [ -1.7045288 ]
 [ -3.15393348]
 [-13.84472861]
 [ -7.51091762]
 [ 38.73947415]]

 MBGD EVS: 0.755299048453566
0.45688099999999565


In [73]:
import numpy.linalg as LA

def Ridge(X, y, n_epoch, batch_size, eta, alpha):
    X = np.c_[np.ones(len(X)), normal(X)]
    y = np.array([y.T])
    theta = np.random.rand(1, X.shape[1])
    m = X.shape[0]
    print(m)
    for epoch in range(n_epoch):
        shuffi = np.random.permutation(m)
        xs, ys = X[shuffi], y.T[shuffi]
        for i in range(0, m, batch_size):
            for c in range(X.shape[1]):
                r = np.random.randint(0, m-1)
                xi, yi = X[i:i+batch_size], y.T[i:i+batch_size]
                gradients = ((xi @ theta.T) - yi).T @ xi[:,c]
                theta[:,c] = theta[:,c] - eta * (gradients + (alpha * LA.norm(theta)**2))
    return theta.T

theta = Ridge(polynom(x_train, 2), y_train, 50, 64, 0.01, 1.0)

print(theta)

preds = predict(polynom(x_test, 2), theta)

print(f'\n Ridge EVS: {explained_variance_score(preds, y_test)}')

379
[[ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]
 [ nan]]




ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [75]:
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler

polybig_features = PolynomialFeatures(degree=2, include_bias=False)
std_scaler = StandardScaler()
lin_reg = LinearRegression()
polynomial_regression = Pipeline([
        ("poly_features", polybig_features),
        ("std_scaler", std_scaler),
        ("lin_reg", lin_reg),
    ])
polynomial_regression.fit(x_train, y_train)
preds = polynomial_regression.predict(x_test)

In [78]:
m = Ridge(alpha = 10)

m.fit(polynom(x_train, 2), y_train)

preds = m.predict(polynom(x_test, 2))

print('weights: ')
print(m.coef_)
print('Intercept: ')
print(m.intercept_)

print(f'\n Scikit learn lin reg EVS: {explained_variance_score(y_test, preds)}')

weights: 
[ -4.45795003e-01  -4.20281709e-02  -1.13638279e-01   1.26681724e+00
  -1.43233686e+00  -2.18378528e+00  -3.29550428e-02  -1.37782981e+00
   5.43332742e-01  -3.16984967e-02  -1.81378244e+00   2.67667275e-02
  -1.80841526e+00   4.67544909e-03   7.78662251e-04   3.97695328e-03
   1.26681724e+00  -1.98542113e+00   4.07584210e-01   3.40675265e-04
   4.48095854e-02  -7.28036088e-03   2.27704613e-05   3.08977838e-02
  -3.93823629e-05   3.58743805e-02]
Intercept: 
66.143471914

 Scikit learn lin reg EVS: 0.7704624730353917
