In [1]:
from sklearn.datasets import load_boston
from sklearn.metrics import explained_variance_score
import numpy as np

x = load_boston().data
y = load_boston().target

In [2]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=29)

x_train.shape

(379, 13)

In [3]:
def polynom(m, deg):
    n_rows, n_cols = m.shape[0], m.shape[1]
    nm = np.zeros((m.shape[0],n_cols*deg))
    for d in range(deg):
        for c in range(n_cols):
            for r in range(n_rows):
                nm[r][n_cols*d+c] = m[r][c]**(d+1)
    return nm

In [4]:
# scaling
def scaling(X):
    for c in range(X.shape[1]):
        X[:,c] = [(i - np.mean(X[:,c])) / (max(X[:,c]) - min(X[:,c])) for i in X[:,c]]
    return X

# normalize
def normal(X):
    return X / X.max(axis=0)

In [5]:
def predict(X, theta):
    return np.c_[np.ones(len(X)), normal(X)] @ theta

In [6]:
def normalEq(X, y):
    X = np.c_[np.ones(len(X)), normal(X)]
    return (np.linalg.inv(X.T @ X) @ X.T @ y).T

theta = normalEq(x_train, y_train)

print(theta)

preds = predict(x_test, theta)

print(f'\nNorm Eq EVS: {explained_variance_score(preds, y_test)}')

[ 35.98189827  -6.88140442   5.31800698   1.01840583   3.43338847
 -12.89840309  31.08338357  -0.25493095 -16.56781587   7.73387839
  -9.19866233 -21.23376392   4.39090559 -20.92958659]

Norm Eq EVS: 0.7170978869977083


In [7]:
class Ridge:
    def __init__(self, a):
        self.a = a
    
    def fit(self, X, y):
        X = np.c_[np.ones(len(X)), normal(X)]
        self.theta = (np.linalg.inv(X.T @ X + self.a * np.identity(1)) @ X.T @ y)
        
    def predict(self, X):
        return np.c_[np.ones(len(X)), normal(X)] @ self.theta

m = Ridge(10)

m.fit(x_train, y_train)

preds = m.predict(x_test)
        
# theta = RidgeEq(x_train, y_train, 0.1)

# print(theta)

# preds = predict(x_test, theta)

print(f'\nNorm Eq EVS: {explained_variance_score(preds, y_test)}')


Norm Eq EVS: 0.7181512345558513


$X^{'}X=\begin{bmatrix} 7 & 38.5\\  38.5& 218.75 \end{bmatrix}$

In [8]:
def BGD(X, y, n_iter, eta):
    X = np.c_[np.ones(len(X)), normal(X)]
    y = np.array([y.T])
    theta = np.random.rand(1, X.shape[1])
    for i in range(n_iter):
        for c in range(X.shape[1]):
            nX = X[:,c]
            nT = theta[:,c]
            gradients = ((X @ theta.T) - y.T).T @ nX
            theta[:,c] = nT - eta * gradients
    return theta.T

theta = BGD(polynom(x_train, 2), y_train, 1000, 0.001)

print(theta.T)

preds = predict(polynom(x_test, 2), theta)

print(f'\nBGD EVS: {explained_variance_score(preds, y_test)}')

[[ 21.74217009  -5.30632933   0.53430923  -2.35034143   1.67833571
    0.62139854  11.8716317   -3.40822492  -8.99214704   6.47421359
   -3.92826057  -1.6645482    6.58684841 -26.34774889  -1.59086153
    4.41198395   4.06461451   1.82198668  -7.16369361  20.26714673
    2.79609624  -2.93137156   0.16826899  -3.86324929  -9.64207408
   -1.38285068  10.54366763]]

BGD EVS: 0.7408746453745261


In [9]:
import time

start = time.clock()

def learning_schedule(t):
    return 5 / (t + 50)

def SGD(X, y, n_epoch, eta):
    X = np.c_[np.ones(len(X)), normal(X)]
    y = np.array([y.T])
    theta = np.random.rand(1, X.shape[1])
    m = X.shape[0]
    for epoch in range(1, n_epoch):
        for n in range(m):
            for c in range(X.shape[1]):
                r = np.random.randint(0, m-1)
                xi, yi = X[r:r+1], y.T[r:r+1]
                gradients = ((xi @ theta.T) - yi.T).T @ xi[:,c]
#                 eta = eta/epoch
                theta[:,c] = theta[:,c] - (gradients * eta)
    return theta.T

theta = SGD(polynom(x_train, 2), y_train, 50, 0.01)

print(theta.T)

preds = predict(polynom(x_test, 2), theta)

print(f'\nSGD EVS: {explained_variance_score(preds, y_test)}')

print(time.clock() - start)

[[ 10.88821029  -3.67920476  -0.02257505  -6.0309697    2.79233587
    4.2464707   17.759932    -4.54678429  -5.66035717   2.84398109
   -2.43593795  -0.65305708   5.88038122 -16.57638232  -2.17180453
    4.41628243   4.8800417    0.35048162  -2.69746067  20.56356765
    3.0188748   -4.29538086   1.83978228  -1.34886525  -9.84168197
   -0.46014786   1.24602146]]

SGD EVS: 0.679973965561697
2.059884


In [10]:
import time

start = time.clock()

class MBGD:
    def __init__(self, n_epoch, batch_size, eta):
        self.n_epoch = n_epoch
        self.batch_size = batch_size
        self.eta = eta
        
    def fit(self, X, y):
        X = np.c_[np.ones(len(X)), normal(X)]
        y = np.array([y.T])
        self.theta = np.random.rand(1, X.shape[1])
        m = X.shape[0]
        for epoch in range(self.n_epoch):
            shuffi = np.random.permutation(m)
            xs, ys = X[shuffi], y.T[shuffi]
            for i in range(0, m, self.batch_size):
                for c in range(X.shape[1]):
                    xi, yi = X[i:i+self.batch_size], y.T[i:i+self.batch_size]
                    gradients = ((xi @ self.theta.T) - yi).T @ xi[:,c]
                    self.theta[:,c] = self.theta[:,c] - (gradients * self.eta)
        self.theta = self.theta.T
        
    def predict(self, X):
        return np.c_[np.ones(len(X)), normal(X)] @ self.theta
        
m = MBGD(500, 64, 0.01)

m.fit(polynom(x_train, 2), y_train)

print(theta.T)

preds = m.predict(polynom(x_test, 2))

print(f'\n MBGD EVS: {explained_variance_score(preds, y_test)}')

print(time.clock() - start)

NameError: name 'theta' is not defined

In [11]:
def Ridge(X, y, n_iter, eta, alpha):
    X = np.c_[np.ones(len(X)), normal(X)]
    y = np.array([y.T])
    theta = np.random.rand(1, X.shape[1])
    best = theta
    for i in range(n_iter):
        for c in range(X.shape[1]):
            nX = X[:,c]
            nT = theta[:,c]
            gradients = (((X @ theta.T) - y.T).T @ nX) + 2 * alpha * theta[:,c]
            theta[:,c] = nT - eta * gradients
        b = explained_variance_score(X @ best.T, y.T)
        c = explained_variance_score(X @ theta.T, y.T)
        if c > b:
            best = theta
    return (best.T, theta.T)

theta, l = Ridge(polynom(x_train, 2), y_train, 10000, 0.01, 0.1)

print(theta.T)

preds = predict(polynom(x_test, 2), theta)

predsl = predict(polynom(x_test, 2), l)

print(f'\nRidge Best EVS: {explained_variance_score(preds, y_test)}')
print(f'\nRidge Last EVS: {explained_variance_score(predsl, y_test)}')

[[ 30.42482636 -12.55536164  -1.35015479  -1.73401023   2.87320091
   14.41951094   9.74910917  -2.41877145 -14.61110548  10.64808221
   -5.90384771  -9.52925565   5.03037408 -47.93320259   4.19514921
    6.11356811   3.51499506   0.47791253 -17.18588212  15.17218192
    2.59964845   3.77660859  -2.6726566   -2.39030449  -5.61163604
   -0.78359791  31.21104229]]

Ridge Best EVS: 0.7408145666508517

Ridge Last EVS: 0.7408145666508517


In [12]:
def LASSO(X, y, n_iter, eta, alpha):
    X = np.c_[np.ones(len(X)), normal(X)]
    y = np.array([y.T])
    theta = np.random.rand(1, X.shape[1])
    for i in range(n_iter):
        for c in range(X.shape[1]):
            nX = X[:,c]
            nT = theta[:,c]
            gradients = ((X @ theta.T) - y.T).T @ nX + alpha * np.sign(theta[:,c])
            theta[:,c] = nT - eta * gradients
    return theta.T

theta = LASSO(polynom(x_train, 2), y_train, 1000, 0.001, 1)

print(theta.T)

preds = predict(polynom(x_test, 2), theta)

print(f'\nLASSO EVS: {explained_variance_score(preds, y_test)}')

[[ 20.99799322  -5.03747222   0.46050264  -1.81546819   1.36664584
    0.30291227  11.76209688  -2.68353306  -8.59475062   6.27777115
   -3.54172642  -1.5625417    5.95293767 -26.04618397  -1.31904031
    4.41984406   3.44889254   2.13078318  -6.65372168  20.41315838
    2.21489812  -2.88449461  -0.03883526  -3.69426378  -9.53280081
   -0.74353722  10.16485699]]

LASSO EVS: 0.7368465669120126


In [17]:
def Logistic(X, y, n_epoch, batch_size, eta, alpha):
    X = np.c_[np.ones(len(X)), normal(X)]
    y = np.array([y.T])
    theta = np.random.rand(1, X.shape[1])
    m = X.shape[0]
    for epoch in range(n_epoch):
        shuffi = np.random.permutation(m)
        xs, ys = X[shuffi], y.T[shuffi]
        for i in range(0, m, batch_size):
            for c in range(X.shape[1]):
                xi, yi = X[i:i+batch_size], y.T[i:i+batch_size]
                gradients = ((xi @ theta.T) - yi).T @ xi[:,c]
                theta[:,c] = theta[:,c] - (gradients * eta)
    return theta.T

theta = Logistic(polynom(x_train, 2), y_train, 500, 64, 0.001, 1)

print(theta.T)

preds = predict(polynom(x_test, 2), theta)

print(f'\nLogistic EVS: {explained_variance_score(preds, y_test)}')

[[ 13.71905921  -3.76414787   2.12756793  -0.75150958   2.05674768
    1.4973839   15.07223963  -0.93310484  -6.41674131   4.78512963
   -2.22639594  -1.42430276   5.19254321 -18.07414423  -1.44734589
    3.29506719   1.94645032   1.60604217  -5.28141408  20.45313134
    0.37128903  -4.70357522   0.34435075  -4.04671026  -8.47558385
    0.37232364   2.43104434]]

Logistic EVS: 0.7114146615851674


In [118]:
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler

polybig_features = PolynomialFeatures(degree=2, include_bias=False)
std_scaler = StandardScaler()
lin_reg = LinearRegression()
polynomial_regression = Pipeline([
        ("poly_features", polybig_features),
        ("std_scaler", std_scaler),
        ("lin_reg", lin_reg),
    ])
polynomial_regression.fit(x_train, y_train)
preds = polynomial_regression.predict(x_test)

In [119]:
m = Ridge(alpha = 10)

m.fit(polynom(x_train, 2), y_train)

preds = m.predict(polynom(x_test, 2))

print('weights: ')
print(m.coef_)
print('Intercept: ')
print(m.intercept_)

print(f'\n Scikit learn ridge reg EVS: {explained_variance_score(y_test, preds)}')

weights: 
[ -4.45795003e-01  -4.20281709e-02  -1.13638279e-01   1.26681724e+00
  -1.43233686e+00  -2.18378528e+00  -3.29550428e-02  -1.37782981e+00
   5.43332742e-01  -3.16984967e-02  -1.81378244e+00   2.67667275e-02
  -1.80841526e+00   4.67544909e-03   7.78662251e-04   3.97695328e-03
   1.26681724e+00  -1.98542113e+00   4.07584210e-01   3.40675265e-04
   4.48095854e-02  -7.28036088e-03   2.27704613e-05   3.08977838e-02
  -3.93823629e-05   3.58743805e-02]
Intercept: 
66.143471914

 Scikit learn ridge reg EVS: 0.7704624730353917
