In [37]:
import numpy as np
import pandas as pd

In [38]:
X_train = pd.read_csv("/datasets/sberbank-russian-housing-market/train.csv")
df_macro = pd.read_csv("/datasets/sberbank-russian-housing-market/macro.csv")

In [39]:
y_train = X_train[["price_doc"]]
X_train = X_train.drop("price_doc", axis = 1)

In [67]:
X_train_sample = X_train[["full_sq", "life_sq","floor"]].fillna(0).sample(5000)
y_train_sample = y_train.loc[X_train_sample.index]

In [68]:
y_train_sample = y_train_sample/1000000

In [69]:
y_train_sample

Unnamed: 0,price_doc
16826,6.500000
28971,4.657415
6980,5.851250
5029,6.650000
1370,18.327100
...,...
29178,3.713450
29789,9.100000
16673,8.308714
30185,8.972619


## Neural Network

In [80]:
class NeuralNetwork():
    def fit(self, X, y, n_hidden, nodes, activations, lr):
        self._lr = lr
        self._X = X.values
        self._y = y.values
        self._n_hidden = n_hidden
        self._nodes = nodes
        self._weights = self._generate_weights()
        self._biases = self._generate_bias()
        self._activations = activations
        self._forward_inputs = []
        
        self._train()
        
    def _activation(self, data, activation = "relu"):
        if activation == "relu":
            def relu(data):
                return np.array([max(0,i) for i in data]).reshape(data.shape)
            return np.apply_along_axis(relu, 1, data)
        if activation == "sigmoid":
            def sigmoid(data):
                return (1/(1 + np.exp(-data))).reshape(data.shape)
            return np.apply_along_axis(sigmoid, 1, data)
    
    def _der_activation(self, points, activation = "relu"):
        if activation == "relu":
            def d_relu(point):
                return np.array([0 if y <= 0 else 1 for y in point])
            return np.apply_along_axis(d_relu, 1, points)
        if activation == "sigmoid":
            ## todo
            return
    
    def _loss_function(self, ypred, loss = "l2"):
        if loss == "mse":
            return ((ypred - self._y) ** 2).mean()
        if loss == "l2":
            return (((ypred - self._y) ** 2)/2)
    
    def _loss_jacobian(self, ypred, loss = "l2"):
        if loss == "l2":
            return (ypred - self._y)/(len(ypred))
    
    def _generate_weights(self):
        hidden_weights = []
        nodes = self._nodes
        for idx in range(1,len(nodes)):
            hidden_weights.append(0.01 * np.random.randn(nodes[idx -1], nodes[idx]))

        return hidden_weights
    
    def _generate_bias(self):
        hidden_layers = []
        nodes = self._nodes
        for i in range(self._n_hidden + 1):
            hidden_layers.append(np.zeros((nodes[i + 1], 1)))
        return hidden_layers
    
    def _forward_propagation(self):
        """
        Suppose 2 observations
        
        Suppose previous layer is 3 nodes
        Suppose current layer is 2 nodes
        
        prev shape (2,3)
        prev = ob1 [prev_node_1 val, prev_node_2 val, prev_node_3 val]
               ob2 [prev_node_1 val, prev_node_2 val, prev_node_3 val]
               
        layer shape (3,2)
        layer = [weight for current_node_1 for prev_node_1, weight for current_node_2 for prev_node_1]
                [weight for current_node_1 for prev_node_2, weight for current_node_2 for prev_node_2]
                [weight for current_node_1 for prev_node_3, weight for current_node_2 for prev_node_3]
                
        output shape (2,2) # since 2 observations and 2 layers
        output = ob1 [current_node_1 val, current_node_2 val]
                 ob2 [current_node_1 val, current_node_2 val]
                 
        Then for bias in current layer it is (2,1) since 2 nodes in current layer
        
        So for each row in output we add the bias row wise and apply the activation function to each row
        
        prev <- ouput
        
        Move onto next layer...
        """
        prev = self._X
        weights = self._weights
        biases = self._biases
        activations = self._activations[1:-1]
    
        for idx, layer in enumerate(weights):
            if idx == (len(weights) - 1):
                self._forward_inputs.append((prev, None))
                prev = (prev @ layer) + biases[idx].T,
            else:
                weight_output = (prev @ layer) + biases[idx].T
                self._forward_inputs.append((prev, weight_output))
                prev = self._activation(data = weight_output, activation = activations[idx])

        return prev
    
    def _backward_propagation(self, ypred):
        j = self._loss_jacobian(ypred)
        #print("\nj\n")
        #print(j)
                
        for i in range(len(self._forward_inputs)-1, -1, -1):
            if i != (len(self._forward_inputs) - 1):
                # activation func on all layers except the last
                der_acti = self._der_activation(self._forward_inputs[i][1])
                j = np.multiply(j,der_acti)

            x = self._forward_inputs[i][0]
            #print("\nx:")
            #print(x)
            jw = x.T.dot(j)
            #print("\nweights before:")
            #print(self._weights[i])
            self._weights[i] -= self._lr * jw
            #print("\nweights after:")
            #print(self._weights[i])
            # todo: update bias
            j = j.dot(self._weights[i].T)
            
        self._forward_inputs = []
        
    
    def _train(self):
        for i in range(0, 100):
            out = self._forward_propagation()
            loss = self._loss_function(out[0])
            mse = self._loss_function(out[0], loss = "mse")
            print("\nloss:")
            print(self._loss_function(out[0]).mean())
            print("nmse:")
            print(mse)
            #print("\npredictions\n")
            #print(out)
            self._backward_propagation(out[0])

In [81]:
INPUT_SIZE = X_train_sample.shape[1]
OUTPUT_SIZE = 1
LEARNING_RATE = 0.001
nodes = [INPUT_SIZE,50,OUTPUT_SIZE]
activations = ["relu" for i in range(len(nodes))]

nn = NeuralNetwork()

nn.fit(X = X_train_sample,
       y = y_train_sample,
       n_hidden = len(nodes) - 2,
       nodes = nodes,
       activations = activations,
       lr = LEARNING_RATE)


loss:
37.68895810752857
nmse:
75.37791621505714

loss:
35.92765305090365
nmse:
71.8553061018073

loss:
32.64316179440443
nmse:
65.28632358880886

loss:
27.39337281629731
nmse:
54.78674563259462

loss:
23.286256232158642
nmse:
46.572512464317285

loss:
22.013669995923006
nmse:
44.02733999184601

loss:
21.21967247536317
nmse:
42.43934495072634

loss:
20.431768908347312
nmse:
40.863537816694624

loss:
19.64663463553195
nmse:
39.2932692710639

loss:
18.870810330410716
nmse:
37.74162066082143

loss:
18.11700855348271
nmse:
36.23401710696542

loss:
17.395778307104813
nmse:
34.791556614209625

loss:
16.728202629564638
nmse:
33.456405259129276

loss:
16.132053090240756
nmse:
32.26410618048151

loss:
15.63997621709854
nmse:
31.27995243419708

loss:
15.238226845308311
nmse:
30.476453690616623

loss:
14.912118820783714
nmse:
29.82423764156743

loss:
14.658356209590307
nmse:
29.316712419180615

loss:
14.477137667626126
nmse:
28.95427533525225

loss:
14.350366373369415
nmse:
28.70073274673883

los