In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Full unprocessed dataset
X_train = pd.read_csv("/datasets/sberbank-russian-housing-market/train.csv", index_col=0)
df_macro = pd.read_csv("/datasets/sberbank-russian-housing-market/macro.csv")

In [None]:
# dropping na's and processing ONLY for the forward stepwise
X_train.dropna(inplace=True)
X_train.reset_index(inplace=True)
X_train.drop(columns=["id"], inplace=True)
X_train = X_train.select_dtypes(include=np.number)

In [None]:
y_train = X_train[["price_doc"]]
X_train = X_train.drop("price_doc", axis = 1)

In [None]:
X_train_sample = X_train[["full_sq", "life_sq","floor"]].fillna(0).sample(5000)
y_train_sample = y_train.loc[X_train_sample.index]

In [3]:
# Entire normalized and cleaned dataset
X_train = pd.read_csv("../Data/df_train_vvfinal").drop("Unnamed: 0", axis = 1)
y_train = pd.read_csv("../Data/y_train_vfinal").drop("Unnamed: 0", axis = 1)
X_train_normalize = (X_train - X_train.mean())/X_train.std()
y_train_normalize = (y_train - y_train.mean())/y_train.std()

X_val = pd.read_csv("../Data/df_val_vvfinal").drop("Unnamed: 0", axis = 1)
X_val.at[591, "build_year"] = 2009
y_val = pd.read_csv("../Data/y_val_vfinal").drop("Unnamed: 0", axis = 1)
X_val_normalize = (X_val - X_train.mean())/X_train.std()
y_val_normalize = (y_val - y_train.mean())/y_train.std()

X_test = pd.read_csv("../Data/df_test_vvfinal").drop("Unnamed: 0", axis = 1)
y_test = pd.read_csv("../Data/y_test_vfinal").drop("Unnamed: 0", axis = 1)
X_test_normalize = (X_test - X_train.mean())/X_train.std()
y_test_normalize = (y_test - y_train.mean())/y_train.std()

## Neural Network

In [22]:
class NeuralNetwork(): 
    def fit(self, X, y, n_hidden, nodes, activations, lr, validation_X, validation_y, batch_size = 0):
        self._lr = lr
        self._X = X.values
        self._y = y.values
        self._n_hidden = n_hidden
        self._nodes = nodes
        self._weights = self._generate_weights()
        self._biases = self._generate_bias()
        self._activations = activations
        self._forward_inputs = []
        self._val_X = validation_X.values
        self._val_y = validation_y.values
        self._batch_size = batch_size
        

        return self._train()
    
    def predict(self, X):
        pred = X
        weights = self._weights
        biases = self._biases
        activations = self._activations[1:-1]
    
        for idx, layer in enumerate(weights):
            if idx == (len(weights) - 1):
                pred = (pred @ layer) + biases[idx].T,
            else:
                weight_output = (pred @ layer) + biases[idx].T
                pred = self._activation(data = weight_output, activation = activations[idx])

        return pred[0]
        
    def _activation(self, data, activation = "relu"):
        if activation == "relu":
            def relu(data):
                return np.array([max(0,i) for i in data]).reshape(data.shape)
            return np.apply_along_axis(relu, 1, data)
        if activation == "sigmoid":
            def sigmoid(data):
                return (1/(1 + np.exp(-data))).reshape(data.shape)
            return np.apply_along_axis(sigmoid, 1, data)
    
    def _der_activation(self, points, activation = "relu"):
        if activation == "relu":
            def d_relu(point):
                return np.array([0 if y <= 0 else 1 for y in point])
            return np.apply_along_axis(d_relu, 1, points)
        if activation == "sigmoid":
            ## todo
            return
    
    def _loss_function(self, ypred, loss = "l2"):
        y = self._val_y
        if loss == "mse":
            return ((ypred - y) ** 2).mean()
        if loss == "l2":
            return (((ypred - y) ** 2)/2).mean()
    
    def _loss_jacobian(self, ypred, loss = "l2"):
        if self._batch_size > 0:
            y = self._batchy
        else:
            y = self._y
        if loss == "l2":
            return (ypred - y)/(len(ypred))
    
    def _generate_weights(self):
        hidden_weights = []
        nodes = self._nodes
        for idx in range(1,len(nodes)):
            hidden_weights.append(0.1 * np.random.randn(nodes[idx -1], nodes[idx]))
            #hidden_weights.append(0.01 *np.random.randn(nodes[idx -1], nodes[idx]))

        return hidden_weights
    
    def _generate_bias(self):
        hidden_layers = []
        nodes = self._nodes
        for i in range(self._n_hidden + 1):
            hidden_layers.append(np.zeros((nodes[i + 1], 1)))
        return hidden_layers
    
    
    def _forward_propagation(self):
        """
        Suppose 2 observations
        
        Suppose previous layer is 3 nodes
        Suppose current layer is 2 nodes
        
        prev shape (2,3)
        prev = ob1 [prev_node_1 val, prev_node_2 val, prev_node_3 val]
               ob2 [prev_node_1 val, prev_node_2 val, prev_node_3 val]
               
        layer shape (3,2)
        layer = [weight for current_node_1 for prev_node_1, weight for current_node_2 for prev_node_1]
                [weight for current_node_1 for prev_node_2, weight for current_node_2 for prev_node_2]
                [weight for current_node_1 for prev_node_3, weight for current_node_2 for prev_node_3]
                
        output shape (2,2) # since 2 observations and 2 layers
        output = ob1 [current_node_1 val, current_node_2 val]
                 ob2 [current_node_1 val, current_node_2 val]
                 
        Then for bias in current layer it is (2,1) since 2 nodes in current layer
        
        So for each row in output we add the bias row wise and apply the activation function to each row
        
        prev <- ouput
        
        Move onto next layer...
        """
        if self._batch_size > 0:
            prev = self._batchX
        else:
            prev = self._X
        weights = self._weights
        biases = self._biases
        activations = self._activations[1:-1]
    
        for idx, layer in enumerate(weights):
            if idx == (len(weights) - 1):
                self._forward_inputs.append((prev, None))
                prev = (prev @ layer) + biases[idx].T,
            else:
                weight_output = (prev @ layer) + biases[idx].T
                self._forward_inputs.append((prev, weight_output))
                prev = self._activation(data = weight_output, activation = activations[idx])

        return prev
    
    def _backward_propagation(self, ypred):
            
        j = self._loss_jacobian(ypred)
                
        for i in range(len(self._forward_inputs)-1, -1, -1):
            if i != (len(self._forward_inputs) - 1):
                # activation func on all layers except the last
                der_acti = self._der_activation(self._forward_inputs[i][1])
                j = np.multiply(j,der_acti)

            x = self._forward_inputs[i][0]

            jw = x.T.dot(j)

            b = np.ones((j.shape[0],1))
            jb = j.T.dot(b)
            
            j = j.dot(self._weights[i].T)
            
            self._weights[i] -= self._lr * jw
            self._biases[i] -= self._lr * jb
            
        self._forward_inputs = []        
    
    def _train(self):
        min_loss = old_loss = np.inf
        losses = []
        mses = []
        tol = 0.00001
        terminate_count = anneal_count = step_count = 0
        while True:
            if self._batch_size > 0:
                X_index = np.arange(self._X.shape[0])
                np.random.shuffle(X_index)
                batch_index = X_index[:self._batch_size]
                self._batchX = self._X[batch_index,:]
                self._batchy = self._y[batch_index,:]
            
            batched_out = self._forward_propagation()
            validation_out = self.predict(self._val_X)
                
            loss = self._loss_function(validation_out)
            mse = self._loss_function(validation_out, loss = "mse")
            print("\nloss:")
            print(loss)
            print("mse:")
            print(mse)
            if loss <= min_loss:
                min_loss = loss
                terminate_count = anneal_count = 0
            if loss <= old_loss:
                anneal_count = 0
            else:
                terminate_count += 1
                anneal_count += 1
                print("INCREASE IN LOSS")
                if anneal_count >= 2:
                    anneal_count = 0
                    self._lr = self._lr / 2
                    print("Decreasing learning rate. New rate is " + str(self._lr))
                if terminate_count > 20:
                    break
            if step_count > 25:
                self._lr = self._lr * 0.9
                print("Annealing learning rate. New rate is " + str(self._lr))
                step_count = 0
            if self._lr < tol:
                break
            
            losses.append(loss)
            mses.append(mse)
            self._backward_propagation(batched_out[0])
            old_loss = loss
            step_count += 1
            
        return losses, mses

In [None]:
INPUT_SIZE = X_train_normalize.shape[1]
OUTPUT_SIZE = 1
LEARNING_RATE = 0.5
nodes = [INPUT_SIZE,50,OUTPUT_SIZE]
activations = ["relu" for i in range(len(nodes))]

nn = NeuralNetwork()

losses, mses = nn.fit(X = X_train_normalize,
       y = y_train_normalize,
       n_hidden = len(nodes) - 2,
       nodes = nodes,
       activations = activations,
       lr = LEARNING_RATE,
       validation_X = X_val_normalize,
       validation_y = y_val_normalize,
       batch_size = 1000)

In [None]:
# Loss and MSE Plot
plt.plot(range(0,len(mses)), mses, color='red', linewidth=2, label="mse")
plt.plot(range(0,len(mses)), losses, color='blue', linewidth=2, linestyle='dashed', label="loss")
plt.legend()

### GRID SEARCH

In [None]:
# GRID SEARCH
INPUT_SIZE = X_train_normalize.shape[1]
OUTPUT_SIZE = 1
LEARNING_RATE = 0.5


batch_size_param = [64, 128, 512, 1024, 2048, 5000, 10000]
width_param = [34, 50, 75, 100]
depth_param = [1, 5, 7, 8, 10]

for batch in batch_size_param:
    for width in width_param:
        for depth in depth_param:
            
            min_losses = []
            min_mses = []
            
            for i in range(0,5):
                nodes = [INPUT_SIZE] + [width for i in range(depth)] + [OUTPUT_SIZE]
                activations = ["relu" for i in range(len(nodes))]

                nn = NeuralNetwork()

                losses, mses = nn.fit(X = X_train_normalize,
                       y = y_train_normalize,
                       n_hidden = len(nodes) - 2,
                       nodes = nodes,
                       activations = activations,
                       lr = LEARNING_RATE,
                       validation_X = X_val_normalize,
                       validation_y = y_val_normalize,
                       batch_size = batch)

                print((batch, width, depth))
                print(min(losses), min(mses))
                min_losses.append(min(losses))
                min_mses.append(min(mses))
                
                plt.figure()
                plt.plot(range(0,len(mses)), mses, color='red', linewidth=2, label="mse")
                plt.plot(range(0,len(mses)), losses, color='blue', linewidth=2, linestyle='dashed', label="loss")
                plt.legend()
            
            print(mean(min_losses), mean(mses)) 

In [None]:
# 1x34 (batch = 0)
batch = 0
width = 34
depth = 1

INPUT_SIZE = X_train_normalize.shape[1]
OUTPUT_SIZE = 1
LEARNING_RATE = 0.5

nodes = [INPUT_SIZE] + [width for i in range(depth)] + [OUTPUT_SIZE]
activations = ["relu" for i in range(len(nodes))]
nn = NeuralNetwork()

losses, mses = nn.fit(X = X_train_normalize,
                      y = y_train_normalize,
                      n_hidden = len(nodes) - 2,
                      nodes = nodes,
                      activations = activations,
                      lr = LEARNING_RATE,
                      validation_X = X_val_normalize,
                      validation_y = y_val_normalize,
                      batch_size = batch)

# RESULTS
## loss = 0.186
## mse = 0.373

In [19]:
# 5x34 (batch = 0)
batch = 0
width = 34
depth = 5

INPUT_SIZE = X_train_normalize.shape[1]
OUTPUT_SIZE = 1
LEARNING_RATE = 0.75

nodes = [INPUT_SIZE] + [width for i in range(depth)] + [OUTPUT_SIZE]
activations = ["relu" for i in range(len(nodes))]
nn = NeuralNetwork()

losses, mses = nn.fit(X = X_train_normalize,
                      y = y_train_normalize,
                      n_hidden = len(nodes) - 2,
                      nodes = nodes,
                      activations = activations,
                      lr = LEARNING_RATE,
                      validation_X = X_val_normalize,
                      validation_y = y_val_normalize,
                      batch_size = batch)

# RESULTS
## loss = 0.179
## mse = 0.359


loss:
0.44545745594455444
mse:
0.8909149118891089

loss:
0.44476697627248707
mse:
0.8895339525449741

loss:
0.4441709209086317
mse:
0.8883418418172634

loss:
0.4436311719464357
mse:
0.8872623438928714

loss:
0.44310508053157993
mse:
0.8862101610631599

loss:
0.44253799022408846
mse:
0.8850759804481769

loss:
0.4418881615280752
mse:
0.8837763230561504

loss:
0.44112366846375145
mse:
0.8822473369275029

loss:
0.4402101149591218
mse:
0.8804202299182436

loss:
0.4390900336408204
mse:
0.8781800672816408

loss:
0.43769689786521915
mse:
0.8753937957304383

loss:
0.4360285389182484
mse:
0.8720570778364968

loss:
0.43397542786908494
mse:
0.8679508557381699

loss:
0.43139078577247086
mse:
0.8627815715449417

loss:
0.428067997034054
mse:
0.856135994068108

loss:
0.4237332772143261
mse:
0.8474665544286522

loss:
0.41800016378300536
mse:
0.8360003275660107

loss:
0.41028517708408135
mse:
0.8205703541681627

loss:
0.39982574573785934
mse:
0.7996514914757187

loss:
0.38560256610850463
mse:
0.7712051


loss:
0.21127712942794866
mse:
0.4225542588558973

loss:
0.21111748714587722
mse:
0.42223497429175444

loss:
0.21095610483555127
mse:
0.42191220967110254

loss:
0.21079823323984953
mse:
0.42159646647969906

loss:
0.2106376892661368
mse:
0.4212753785322736
Annealing learning rate. New rate is 0.09964518750000002

loss:
0.2104966832531492
mse:
0.4209933665062984

loss:
0.2103570164500275
mse:
0.420714032900055

loss:
0.21021546807368546
mse:
0.4204309361473709

loss:
0.21007416361754522
mse:
0.42014832723509044

loss:
0.20993077814283642
mse:
0.41986155628567284

loss:
0.2097895173742767
mse:
0.4195790347485534

loss:
0.20964520463094705
mse:
0.4192904092618941

loss:
0.20950473903986333
mse:
0.41900947807972666

loss:
0.2093634246442959
mse:
0.4187268492885918

loss:
0.2092212210436784
mse:
0.4184424420873568

loss:
0.20907664164891154
mse:
0.4181532832978231

loss:
0.2089359337503472
mse:
0.4178718675006944

loss:
0.20879335884606337
mse:
0.41758671769212674

loss:
0.20865416400258477


loss:
0.1941634974894417
mse:
0.3883269949788834

loss:
0.19408627850635507
mse:
0.38817255701271014

loss:
0.1940108156003381
mse:
0.3880216312006762

loss:
0.19393574928641352
mse:
0.38787149857282704

loss:
0.1938605748362419
mse:
0.3877211496724838

loss:
0.19378431696804874
mse:
0.3875686339360975

loss:
0.19370869971338542
mse:
0.38741739942677084
Annealing learning rate. New rate is 0.05295553809018752

loss:
0.19364125393228473
mse:
0.38728250786456947

loss:
0.19357340233840287
mse:
0.38714680467680573

loss:
0.1935060561034699
mse:
0.3870121122069398

loss:
0.1934384329200525
mse:
0.386876865840105

loss:
0.1933694031705529
mse:
0.3867388063411058

loss:
0.19330128958276144
mse:
0.3866025791655229

loss:
0.19323335399305447
mse:
0.38646670798610894

loss:
0.19316516872311715
mse:
0.3863303374462343

loss:
0.19309724734873984
mse:
0.3861944946974797

loss:
0.1930296864429985
mse:
0.386059372885997

loss:
0.19296284676047978
mse:
0.38592569352095957

loss:
0.19289579404517035



loss:
0.1863290937970435
mse:
0.372658187594087

loss:
0.1862934270343991
mse:
0.3725868540687982

loss:
0.1862586536015478
mse:
0.3725173072030956

loss:
0.18622404328397366
mse:
0.3724480865679473

loss:
0.1861886989267535
mse:
0.372377397853507

loss:
0.1861545029097941
mse:
0.3723090058195882

loss:
0.18611942286759356
mse:
0.3722388457351871

loss:
0.1860855561612046
mse:
0.3721711123224092

loss:
0.18605059480491062
mse:
0.37210118960982125
Annealing learning rate. New rate is 0.02814274411818735

loss:
0.18601971735241485
mse:
0.3720394347048297

loss:
0.1859896489214941
mse:
0.3719792978429882

loss:
0.18595913376473247
mse:
0.37191826752946494

loss:
0.1859296009899523
mse:
0.3718592019799046

loss:
0.18589956492820786
mse:
0.3717991298564157

loss:
0.1858702581559567
mse:
0.3717405163119134

loss:
0.1858402106652051
mse:
0.3716804213304102

loss:
0.1858114621990456
mse:
0.3716229243980912

loss:
0.1857824726353406
mse:
0.3715649452706812

loss:
0.18575349922134457
mse:
0.371


loss:
0.18252576163008064
mse:
0.36505152326016127

loss:
0.18250812571833308
mse:
0.36501625143666616

loss:
0.18249276049562826
mse:
0.36498552099125653

loss:
0.18247491569261767
mse:
0.36494983138523535

loss:
0.18245750913849113
mse:
0.36491501827698225

loss:
0.18244160467061105
mse:
0.3648832093412221

loss:
0.18242425065772833
mse:
0.36484850131545665

loss:
0.182408225639888
mse:
0.364816451279776

loss:
0.18239063609047415
mse:
0.3647812721809483

loss:
0.18237501193262523
mse:
0.36475002386525046
Annealing learning rate. New rate is 0.014956208076913608

loss:
0.18235906922881745
mse:
0.3647181384576349

loss:
0.18234492840764652
mse:
0.36468985681529303

loss:
0.18232872261622843
mse:
0.36465744523245686

loss:
0.18231465986730644
mse:
0.3646293197346129

loss:
0.18229866487020496
mse:
0.3645973297404099

loss:
0.1822828263926682
mse:
0.3645656527853364

loss:
0.18226875471370899
mse:
0.36453750942741797

loss:
0.18225266003901294
mse:
0.3645053200780259

loss:
0.182238300


loss:
0.1806727958049441
mse:
0.3613455916098882

loss:
0.18066460090649966
mse:
0.3613292018129993

loss:
0.18065663020219602
mse:
0.36131326040439204

loss:
0.18064709091791725
mse:
0.3612941818358345

loss:
0.18063924900799608
mse:
0.36127849801599216

loss:
0.1806300083338487
mse:
0.3612600166676974

loss:
0.18062194272538842
mse:
0.36124388545077685

loss:
0.18061248238500752
mse:
0.36122496477001503

loss:
0.18060434249160182
mse:
0.36120868498320363

loss:
0.1805965093710542
mse:
0.3611930187421084

loss:
0.18058699434651176
mse:
0.36117398869302353

loss:
0.1805791109690015
mse:
0.361158221938003
Annealing learning rate. New rate is 0.007948342176603044

loss:
0.180570570454229
mse:
0.361141140908458

loss:
0.18056339464316803
mse:
0.36112678928633607

loss:
0.1805549768053751
mse:
0.3611099536107502

loss:
0.18054762386676732
mse:
0.36109524773353463

loss:
0.180540311388236
mse:
0.361080622776472

loss:
0.180532109192383
mse:
0.361064218384766

loss:
0.18052494537448993
mse:

KeyboardInterrupt: 

In [None]:
# 7x34 (batch = 0)
batch = 0
width = 34
depth = 7

INPUT_SIZE = X_train_normalize.shape[1]
OUTPUT_SIZE = 1
LEARNING_RATE = 3

nodes = [INPUT_SIZE] + [width for i in range(depth)] + [OUTPUT_SIZE]
activations = ["relu" for i in range(len(nodes))]
nn = NeuralNetwork()

losses, mses = nn.fit(X = X_train_normalize,
                      y = y_train_normalize,
                      n_hidden = len(nodes) - 2,
                      nodes = nodes,
                      activations = activations,
                      lr = LEARNING_RATE,
                      validation_X = X_val_normalize,
                      validation_y = y_val_normalize,
                      batch_size = batch)

# RESULTS
## loss = 0.212
## mse = 0.424


loss:
0.446054319495708
mse:
0.892108638991416

loss:
0.4456410835021251
mse:
0.8912821670042502

loss:
0.44515602252676706
mse:
0.8903120450535341

loss:
0.4443780603470162
mse:
0.8887561206940324

loss:
0.4434748554042438
mse:
0.8869497108084876

loss:
0.44260926185499694
mse:
0.8852185237099939

loss:
0.44506370377343796
mse:
0.8901274075468759
INCREASE IN LOSS

loss:
0.5106281003278244
mse:
1.0212562006556487
INCREASE IN LOSS
Decreasing learning rate. New rate is 1.5

loss:
0.49122993432443507
mse:
0.9824598686488701

loss:
0.47066555198933563
mse:
0.9413311039786713

loss:
0.4561918224683817
mse:
0.9123836449367634

loss:
0.45197270692723357
mse:
0.9039454138544671

loss:
0.44690473802775
mse:
0.8938094760555

loss:
0.4458490382541645
mse:
0.891698076508329

loss:
0.44349918258658444
mse:
0.8869983651731689

loss:
0.44207789075976645
mse:
0.8841557815195329

loss:
0.44008399303463624
mse:
0.8801679860692725

loss:
0.4392426061347652
mse:
0.8784852122695304

loss:
0.43925696933665

In [13]:
# 8x34 (batch = 0)
batch = 0
width = 34
depth = 8

INPUT_SIZE = X_train_normalize.shape[1]
OUTPUT_SIZE = 1
LEARNING_RATE = 3

nodes = [INPUT_SIZE] + [width for i in range(depth)] + [OUTPUT_SIZE]
activations = ["relu" for i in range(len(nodes))]
nn = NeuralNetwork()

losses, mses = nn.fit(X = X_train_normalize,
                      y = y_train_normalize,
                      n_hidden = len(nodes) - 2,
                      nodes = nodes,
                      activations = activations,
                      lr = LEARNING_RATE,
                      validation_X = X_val_normalize,
                      validation_y = y_val_normalize,
                      batch_size = batch)

# RESULTS
## loss = 0.192
## mse = 0.383


loss:
0.446029208327168
mse:
0.892058416654336

loss:
0.44604037019737613
mse:
0.8920807403947523
INCREASE IN LOSS

loss:
0.44597707225131206
mse:
0.8919541445026241

loss:
0.4467118600236807
mse:
0.8934237200473614
INCREASE IN LOSS

loss:
0.4488302893938686
mse:
0.8976605787877372
INCREASE IN LOSS
Decreasing learning rate. New rate is 1.5

loss:
0.44783336850520583
mse:
0.8956667370104117

loss:
0.4462584863518524
mse:
0.8925169727037048

loss:
0.4462630286534122
mse:
0.8925260573068244
INCREASE IN LOSS

loss:
0.44598275095472023
mse:
0.8919655019094405

loss:
0.4460482869556994
mse:
0.8920965739113988
INCREASE IN LOSS

loss:
0.4459765588431891
mse:
0.8919531176863782

loss:
0.4459985322939996
mse:
0.8919970645879992
INCREASE IN LOSS

loss:
0.4459818544139639
mse:
0.8919637088279277

loss:
0.44598930443655627
mse:
0.8919786088731125
INCREASE IN LOSS

loss:
0.4459831253621205
mse:
0.891966250724241

loss:
0.44598454508593816
mse:
0.8919690901718763
INCREASE IN LOSS

loss:
0.4459813517


loss:
0.26304762958571143
mse:
0.5260952591714229

loss:
0.2590589705330074
mse:
0.5181179410660148

loss:
0.2559424358010722
mse:
0.5118848716021444

loss:
0.2546471763731104
mse:
0.5092943527462208

loss:
0.2529749967191195
mse:
0.505949993438239

loss:
0.25203365633979746
mse:
0.5040673126795949

loss:
0.2506417662526683
mse:
0.5012835325053366
Annealing learning rate. New rate is 0.19929037500000005

loss:
0.24992215684467692
mse:
0.49984431368935384

loss:
0.24880110816632386
mse:
0.49760221633264773

loss:
0.24810345611728055
mse:
0.4962069122345611

loss:
0.24706570510960266
mse:
0.4941314102192053

loss:
0.2463978857858518
mse:
0.4927957715717036

loss:
0.24542409079436894
mse:
0.4908481815887379

loss:
0.24472966942130944
mse:
0.4894593388426189

loss:
0.24384637989416635
mse:
0.4876927597883327

loss:
0.24326414249145364
mse:
0.4865282849829073

loss:
0.24242629334015467
mse:
0.48485258668030934

loss:
0.24198374506816217
mse:
0.48396749013632434

loss:
0.2411741920724754
ms


loss:
0.21523492801481522
mse:
0.43046985602963045

loss:
0.21509910291349923
mse:
0.43019820582699847

loss:
0.21496666798115488
mse:
0.42993333596230976

loss:
0.21483334284769054
mse:
0.4296666856953811

loss:
0.21469925873805637
mse:
0.42939851747611274

loss:
0.21456920241606275
mse:
0.4291384048321255

loss:
0.21443799435090352
mse:
0.42887598870180704

loss:
0.21430942870801298
mse:
0.42861885741602596

loss:
0.21417969063441258
mse:
0.42835938126882517

loss:
0.2140505482642916
mse:
0.4281010965285832

loss:
0.21391984524969562
mse:
0.42783969049939125
Annealing learning rate. New rate is 0.05295553809018752

loss:
0.21380442738141425
mse:
0.4276088547628285

loss:
0.2136909582376938
mse:
0.4273819164753876

loss:
0.21357508733136316
mse:
0.4271501746627263

loss:
0.21346425212228515
mse:
0.4269285042445703

loss:
0.21335064589688152
mse:
0.42670129179376304

loss:
0.21323915613908864
mse:
0.4264783122781773

loss:
0.2131266102286886
mse:
0.4262532204573772

loss:
0.2130141700


loss:
0.2025807280364383
mse:
0.4051614560728766

loss:
0.202531071863119
mse:
0.405062143726238

loss:
0.20247994390901117
mse:
0.40495988781802233

loss:
0.202428807596
mse:
0.404857615192

loss:
0.20237801556165108
mse:
0.40475603112330216

loss:
0.20232691534159628
mse:
0.40465383068319255

loss:
0.20227553707484916
mse:
0.4045510741496983

loss:
0.20222496675531243
mse:
0.40444993351062486

loss:
0.20217353535241167
mse:
0.40434707070482334

loss:
0.20212318454554626
mse:
0.4042463690910925

loss:
0.2020720371960275
mse:
0.404144074392055

loss:
0.20202009156502276
mse:
0.4040401831300455

loss:
0.20196898794505566
mse:
0.4039379758901113
Annealing learning rate. New rate is 0.02814274411818735

loss:
0.20192256589291335
mse:
0.4038451317858267

loss:
0.2018750282775991
mse:
0.4037500565551982

loss:
0.20182797596395058
mse:
0.40365595192790116

loss:
0.20178047351677783
mse:
0.40356094703355566

loss:
0.20173241054283997
mse:
0.40346482108567994

loss:
0.20168537444390383
mse:
0


loss:
0.19689555515918664
mse:
0.39379111031837327

loss:
0.196871792661308
mse:
0.393743585322616

loss:
0.19684763427965224
mse:
0.3936952685593045

loss:
0.19682338615755207
mse:
0.39364677231510414

loss:
0.1967993860677864
mse:
0.3935987721355728

loss:
0.19677511592103672
mse:
0.39355023184207344

loss:
0.19675072161115212
mse:
0.39350144322230424

loss:
0.19672632901066112
mse:
0.39345265802132223

loss:
0.1967034113396772
mse:
0.3934068226793544

loss:
0.19667971229074388
mse:
0.39335942458148776

loss:
0.1966562865885365
mse:
0.393312573177073

loss:
0.19663265220334705
mse:
0.3932653044066941

loss:
0.19660883999526968
mse:
0.39321767999053936

loss:
0.19658484192108985
mse:
0.3931696838421797
Annealing learning rate. New rate is 0.014956208076913608

loss:
0.19656306512419394
mse:
0.3931261302483879

loss:
0.19654144842829963
mse:
0.39308289685659925

loss:
0.19652006821767976
mse:
0.3930401364353595

loss:
0.19649870629478663
mse:
0.39299741258957327

loss:
0.1964773621776


loss:
0.19439243800927208
mse:
0.38878487601854417

loss:
0.19438161896417233
mse:
0.38876323792834466

loss:
0.19437093036406197
mse:
0.38874186072812394

loss:
0.1943601342976826
mse:
0.3887202685953652

loss:
0.19434930766983907
mse:
0.38869861533967814

loss:
0.1943389762485927
mse:
0.3886779524971854

loss:
0.1943283093913574
mse:
0.3886566187827148

loss:
0.19431736899829447
mse:
0.38863473799658893

loss:
0.19430666462130958
mse:
0.38861332924261915

loss:
0.19429567327151492
mse:
0.38859134654302985

loss:
0.19428481263707573
mse:
0.38856962527415145

loss:
0.1942740893960928
mse:
0.3885481787921856

loss:
0.19426343221038225
mse:
0.3885268644207645

loss:
0.1942529779304203
mse:
0.3885059558608406

loss:
0.19424239201207125
mse:
0.3884847840241425

loss:
0.19423199691189796
mse:
0.38846399382379593

loss:
0.19422140105564617
mse:
0.38844280211129234
Annealing learning rate. New rate is 0.007948342176603044

loss:
0.19421186360586395
mse:
0.3884237272117279

loss:
0.1942025669


loss:
0.19317665667328102
mse:
0.38635331334656203

loss:
0.19317153931729236
mse:
0.3863430786345847

loss:
0.19316638436404854
mse:
0.3863327687280971

loss:
0.19316114867721607
mse:
0.38632229735443213

loss:
0.1931560672599848
mse:
0.3863121345199696

loss:
0.19315086222966693
mse:
0.38630172445933386

loss:
0.19314565910649611
mse:
0.38629131821299223

loss:
0.19314056469253463
mse:
0.38628112938506926

loss:
0.19313536570184747
mse:
0.38627073140369494

loss:
0.1931301277525525
mse:
0.386260255505105

loss:
0.1931250347345132
mse:
0.3862500694690264

loss:
0.19311986347157736
mse:
0.3862397269431547

loss:
0.19311467238351776
mse:
0.3862293447670355

loss:
0.1931095827111414
mse:
0.3862191654222828

loss:
0.1931045688780269
mse:
0.3862091377560538

loss:
0.1930994962063064
mse:
0.3861989924126128

loss:
0.19309459453112238
mse:
0.38618918906224475

loss:
0.19308965679980075
mse:
0.3861793135996015

loss:
0.19308472913938227
mse:
0.38616945827876453
Annealing learning rate. New r


loss:
0.1926190629920726
mse:
0.3852381259841452

loss:
0.1926167426730931
mse:
0.3852334853461862

loss:
0.19261433299668415
mse:
0.3852286659933683

loss:
0.1926119567495638
mse:
0.3852239134991276

loss:
0.19260958085159396
mse:
0.38521916170318793

loss:
0.19260723098368807
mse:
0.38521446196737613

loss:
0.1926048373726862
mse:
0.3852096747453724

loss:
0.19260246118695146
mse:
0.3852049223739029

loss:
0.19260013423925926
mse:
0.3852002684785185

loss:
0.19259772106265566
mse:
0.3851954421253113

loss:
0.19259533993269287
mse:
0.38519067986538574

loss:
0.19259294470091587
mse:
0.38518588940183174

loss:
0.19259058309212798
mse:
0.38518116618425596

loss:
0.19258817666410852
mse:
0.38517635332821704

loss:
0.19258576044175568
mse:
0.38517152088351136

loss:
0.1925834035334543
mse:
0.3851668070669086

loss:
0.19258095810746206
mse:
0.3851619162149241

loss:
0.19257854939863364
mse:
0.3851570987972673

loss:
0.19257611981256692
mse:
0.38515223962513384

loss:
0.19257373453744833
m


loss:
0.19232793879949223
mse:
0.38465587759898445

loss:
0.19232659772618851
mse:
0.38465319545237703

loss:
0.19232524594115855
mse:
0.3846504918823171

loss:
0.19232391135541058
mse:
0.38464782271082115

loss:
0.1923226031942999
mse:
0.3846452063885998

loss:
0.19232124886330806
mse:
0.38464249772661613

loss:
0.19231988802020705
mse:
0.3846397760404141

loss:
0.19231857377512135
mse:
0.3846371475502427

loss:
0.19231722754821828
mse:
0.38463445509643657

loss:
0.19231585991319528
mse:
0.38463171982639055

loss:
0.19231451373425293
mse:
0.38462902746850586

loss:
0.19231319811031605
mse:
0.3846263962206321

loss:
0.19231185020236005
mse:
0.3846237004047201

loss:
0.1923104833723555
mse:
0.384620966744711

loss:
0.19230916594856035
mse:
0.3846183318971207

loss:
0.19230781972157077
mse:
0.38461563944314153

loss:
0.1923064729182115
mse:
0.384612945836423

loss:
0.19230511103357606
mse:
0.3846102220671521

loss:
0.19230379029305922
mse:
0.38460758058611844

loss:
0.19230244076757144


KeyboardInterrupt: 

In [None]:
# 10x34 (batch = 0)
batch = 0
width = 34
depth = 10

INPUT_SIZE = X_train_normalize.shape[1]
OUTPUT_SIZE = 1
LEARNING_RATE = 5

nodes = [INPUT_SIZE] + [width for i in range(depth)] + [OUTPUT_SIZE]
activations = ["relu" for i in range(len(nodes))]
nn = NeuralNetwork()

losses, mses = nn.fit(X = X_train_normalize,
                      y = y_train_normalize,
                      n_hidden = len(nodes) - 2,
                      nodes = nodes,
                      activations = activations,
                      lr = LEARNING_RATE,
                      validation_X = X_val_normalize,
                      validation_y = y_val_normalize,
                      batch_size = batch)

# RESULTS
## loss = 0.446
## mse = 0.892

In [None]:
def solve_for_B(x, y, n):
    # takes a data frame, and a numpy array
    x = np.concatenate((np.ones((n,1)), x.to_numpy()), axis=1)
    x_transpose = np.transpose(x)
    return np.linalg.solve(np.matmul(x_transpose, x), x_transpose @ y)

def get_predicted_values(beta, design_matrix, n):
    return np.matmul(np.concatenate((np.ones((n,1)), design_matrix), axis = 1), beta)

def BIC(predictions, actuals, d, n):
    # numpy array - predictions, numpy array - actual 
    return (np.square(actuals - predictions).sum()) + (d * np.log(n))

def RSquaredAdj(predictions, actuals, d, n):
    x1 = np.square(actuals - actuals.mean()).sum()
    x2 = np.square(predictions - actuals.mean()).sum()
    r2 = x2/x1
    return (1 - ((1 - r2) * (n - 1) / (n - d - 1)))

def RMSE(predictions, actuals, n):
    return np.sqrt((np.square(actuals - predictions).sum()) / n) 

def train_model(design_matrix, dependent_variable_series):
    n = design_matrix.shape[0]
    beta = solve_for_B(design_matrix, dependent_variable_series, n)
    predicted_values = get_predicted_values(beta, design_matrix, n)
    calculated_BIC = BIC(predicted_values, dependent_variable_series, d = design_matrix.shape[1], n = n)
    calculated_RMSE = RMSE(predicted_values, dependent_variable_series, n = n)
    residuals = predicted_values - dependent_variable_series
    return beta, calculated_BIC, calculated_RMSE, residuals

In [None]:
def train_crossv_model(df, y): 
    perm = np.random.permutation(df.index)
    m = len(df.index)
    train_end = int(.25 * m)
    train_1 = df.iloc[perm[:train_end]]
    test_1 = y.iloc[perm[:train_end]]

    train_2 = df.iloc[perm[train_end:train_end*2]]
    test_2 = y.iloc[perm[train_end:train_end*2]]

    train_3 = df.iloc[perm[train_end*2:train_end*3]]
    test_3 = y.iloc[perm[train_end*2:train_end*3]]

    train_4 = df.iloc[perm[train_end*3:]]
    test_4 = y.iloc[perm[train_end*3:]]
    return train_1, test_1, train_2, test_2, train_3, test_3, train_4, test_4


def get_average_RMSE_for_one_model(df, y):
    train_1, test_1, train_2, test_2, train_3, test_3, train_4, test_4 = train_crossv_model(df, y)

    beta4, bic4, RMSE4, _ = train_model(train_1.append([train_2, train_3]), test_1.append([test_2, test_3]))

    beta2, bic3,RMSE2, _ = train_model(train_1.append([train_4, train_3]), test_1.append([test_4, test_3]))

    beta3, bic2, RMSE3, _ = train_model(train_1.append([train_2, train_4]), test_1.append([test_2, test_4]))

    beta1, bic1, RMSE1, _ = train_model(train_4.append([train_2, train_3]), test_4.append([test_2, test_3]))

    return sum([RMSE1, RMSE2, RMSE3, RMSE4]) / 4, sum([bic1, bic2, bic3, bic4]) / 4


In [None]:
# forward cross validated stepwise

train_data = X_train.copy()
df_with_col = pd.DataFrame()

#min_RMSE, min_bic = get_average_RMSE_for_one_model(train_data, y_train_sample)

col_added = []
rmses = []

for i in range(0,50):

    min_col = None
    min_RMSE = 100000000000000000000
    min_bic = 10000000000000000000

    for col in train_data.columns:

        df_with_col[col] = train_data[col]
        
        new_RMSE, new_bic = get_average_RMSE_for_one_model(df_with_col, y_train)
                
        if new_RMSE[0] <= min_RMSE:
            min_bic = new_bic[0]
            min_RMSE = new_RMSE[0]
            min_col = col
            
        df_with_col.drop(columns=[col], inplace=True)

    if min_col is not None:
        print(min_col)
        df_with_col[min_col] = train_data[min_col]
        train_data.drop(columns=[min_col], inplace=True)
        rmses.append(min_RMSE)
        print("bic: " + str(min_bic))
        print("rmse: " + str(min_RMSE))
        col_added.append(min_col)
        print("")
    else:
        print("Failed #2")


In [None]:
# RMSEs
plt.plot(range(0,50), rmses)

In [None]:
col_added