# Test runs
Below follows some test runs of my implementations on basic problems.

### ```SGD``` class on basic first-degree polynomial line-fitting problem

In [1]:
n = 100
np.random.seed(0)
x = np.random.rand(n,1)
y = 4 + 2*x + .05*np.random.rand(n,1)
X = np.c_[np.ones(n), x]

In [2]:
beta = np.linalg.inv(X.T @ X) @ X.T @ y
print('ols-inversion:\n', beta)

ols-inversion:
 [[4.027904  ]
 [1.99682755]]


In [3]:
from StochasticGradientDescent import SGD

sgd = SGD(epochs=10, batches=5, learning_schedule='optimal', cost_function='ols')
beta = sgd.run(X, y)
print('ols-sgd:\n', beta)

sgd = SGD(epochs=20, batches=10, learning_schedule='optimal', cost_function='ridge')
beta = sgd.run(X, y, lmd=.1)
print('ridge-sgd:\n', beta)

ols-sgd:
 [[3.99789401]
 [1.97782246]]
ridge-sgd:
 [[3.72007746]
 [1.86825833]]


### `SGD` class together with implementations of OLS and Ridge in `ols.py` and `ridge.py` 

In [4]:
import ols, ridge
from utils import *

n = 100
x, y = randmesh(sn=n)
z = 1. + 2.*x + 3.*y + 4.*x*x + 5*x*y + 6*y*y + .03*np.random.rand(n,1)
X = make_design_matrix(x, y, pn=2)

In [5]:
# Direct inversion
beta = np.linalg.inv(X.T @ X) @ X.T @ z
print('ols-inversion:\n', beta)

ols-inversion:
 [[1.02429934]
 [1.98277384]
 [2.97928365]
 [4.00860255]
 [5.01623802]
 [6.00975532]]


In [6]:
# Regular OLS
sgd = SGD(epochs=50, batches=40, learning_schedule='constant', eta0=.5, cost_function='ols')
df = ols.run_ols_kfold(X, z, sgd, k=5, polynomial_orders=[2])
print('ols-sgd:\n', sgd.beta)
df

ols-sgd:
 [[1.07049348]
 [2.10085946]
 [2.90572522]
 [3.96732253]
 [4.93328259]
 [6.12359383]]


Unnamed: 0,pol_order,train_mse,test_mse
2,2.0,0.002434,0.002483


In [7]:
# Ridge
sgd = SGD(epochs=100, batches=40, learning_schedule='invscaling', eta0=.5, cost_function='ridge')
ridge.run_ridge_kfold(X, z, sgd, k=5, polynomial_orders=[2], lambdas=[0.003594])
print('ridge-sgd:\n', sgd.beta)
df = ridge.run_ridge_kfold(X, z, sgd, k=5, polynomial_orders=[2], lambdas=np.logspace(-3,-2,10))
df

ridge-sgd:
 [[8.43104528]
 [0.47606812]
 [0.64298641]
 [1.24295542]
 [1.0490371 ]
 [1.9085351 ]]


Unnamed: 0,lambda,train_mse_2,test_mse_2
0,0.001,0.003379,0.004296
1,0.001292,0.003106,0.003967
2,0.001668,0.002809,0.003607
3,0.002154,0.002515,0.00324
4,0.002783,0.002271,0.002917
5,0.003594,0.002161,0.002725
6,0.004642,0.00232,0.002807
7,0.005995,0.00296,0.003382
8,0.007743,0.004397,0.004777
9,0.01,0.007102,0.00747


In [79]:
from FeedForwardNeuralNetwork import Layer, FFNN
l1 = Layer(3, 4)
l2 = Layer(2, 3)
l3 = Layer(1, 2)
data_set = np.random.rand(5,4)
out = l1.feed_forward(data_set)
out

array([[0.74473969, 0.82651211, 0.81383602],
       [0.78663831, 0.86572883, 0.87880888],
       [0.79186047, 0.8849014 , 0.8659692 ],
       [0.60457609, 0.68459139, 0.63852002],
       [0.8383198 , 0.91986642, 0.91733748]])

In [80]:
out = l2.feed_forward(out)
out

array([[0.77931861, 0.82033729],
       [0.79180812, 0.83367971],
       [0.79328506, 0.83449461],
       [0.73593727, 0.77343768],
       [0.80426663, 0.84596264]])

In [81]:
out = l3.feed_forward(out)
out

array([[0.73582821],
       [0.73900315],
       [0.73927181],
       [0.72453045],
       [0.74200502]])

In [91]:
ffnn = FFNN(4)
t = np.random.rand(5).reshape((5,1))
cost_derivatives = ffnn.cost_derivatives(out, t)
cost_derivatives

array([[0.55935576],
       [0.34113841],
       [1.44096402],
       [0.21378992],
       [0.25981859]])

In [92]:
(d,w) = l3.back_propagate(cost_derivatives)
(d,w)

(array([[0.1087304 ],
        [0.06579792],
        [0.27774437],
        [0.04266949],
        [0.049738  ]]),
 array([[0.5488135 ],
        [0.71518937]]))

In [94]:
cost_derivatives = d @ w.T
(d,w) = l2.back_propagate(cost_derivatives)
(d,w)

(array([[0.01026258, 0.011461  ],
        [0.00595279, 0.00652496],
        [0.02499604, 0.02743481],
        [0.00455083, 0.00534751],
        [0.00429713, 0.00463539]]),
 array([[0.5488135 , 0.71518937],
        [0.60276338, 0.54488318],
        [0.4236548 , 0.64589411]]))

In [95]:
cost_derivatives = d @ w.T
(d,w) = l1.back_propagate(cost_derivatives)
(d,w)

(array([[0.00262893, 0.00178245, 0.00178026],
        [0.00133156, 0.00083037, 0.00071745],
        [0.00549489, 0.0030571 , 0.00328581],
        [0.00151137, 0.00122146, 0.00124221],
        [0.00076899, 0.0003771 , 0.00036508]]),
 array([[0.5488135 , 0.71518937, 0.60276338],
        [0.54488318, 0.4236548 , 0.64589411],
        [0.43758721, 0.891773  , 0.96366276],
        [0.38344152, 0.79172504, 0.52889492]]))

In [98]:
eta = .1
w = l1.update_weights(eta, data_set)
b = l1.update_biases(eta)
(w,b)

(array([[0.54736881, 0.7143417 , 0.60189121],
        [0.54361782, 0.42290217, 0.64513941],
        [0.43632694, 0.89103587, 0.96292387],
        [0.381608  , 0.7906015 , 0.52773645]]),
 array([0.00765285, 0.0085463 , 0.00852184]))

In [100]:
w = l2.update_weights(eta, l1.outputs)
b = l2.update_biases(eta)
(w,b)

(array([[0.54496623, 0.7109382 ],
        [0.59848108, 0.54015084],
        [0.41944711, 0.64124552]]),
 array([0.00499406, 0.00445963]))

In [101]:
w = l3.update_weights(eta, l2.outputs)
b = l3.update_biases(eta)
(w,b)

(array([[0.50595649],
        [0.67009888]]),
 array([-0.04446802]))

In [318]:
n = 100
np.random.seed(0)
x = np.random.rand(n,1)*n
y = 4 + 2*x + .03*np.random.rand(n,1)
X = np.c_[np.ones(n), x]
y = y / y.max()
X_train, X_test, y_train, y_test = skl.model_selection.train_test_split(X, y, train_size=.7)

In [324]:
ffnn = FFNN(2)
ffnn.add_layer(2)
ffnn.add_layer(1)
out = ffnn.train(X_train, y_train, max_iter=50000, eta=30)
r2_train = r2(y_train, out)
out = ffnn.test(X_test)
r2_test = r2(y_test, out)
(r2_train, r2_test)

(0.9463707309629964, 0.929431036692402)