In [58]:
import numpy as np
import pandas as pd

In [59]:
X_train = pd.read_csv("../Data/train.csv")
df_macro = pd.read_csv("../Data/macro.csv")

In [60]:
y_train = X_train[["price_doc"]]
X_train = X_train.drop("price_doc", axis = 1)

In [61]:
X_train_sample = X_train[["full_sq", "life_sq","floor"]].fillna(0).sample(5000)
y_train_sample = y_train.loc[X_train_sample.index]

In [62]:
y_train_sample = y_train_sample/1000000

In [126]:
df_train = pd.read_csv("../Data/df_train_without_dummies")
df_train_dummies = pd.read_csv("../Data/df_train_dummies")

In [127]:
df_full = df_train.merge(df_train_dummies)

In [128]:
col_added = ['num_room',
 'full_sq',
 'sadovoe_km',
 'deposits_value',
 'state_4.0',
 'bulvar_ring_km',
 'sport_count_3000',
 'unemployment',
 'floor',
 'big_market_km',
 'build_count_before_1920',
 'life_sq',
 'big_church_count_2000',
 'sport_count_500',
 'mosque_count_5000',
 'cpi',
 'cafe_count_1000_price_high',
 'state_2.0',
 'prom_part_3000',
 'invest_fixed_capital_per_cap',
 'nuclear_reactor_km',
 'green_part_2000',
 'cafe_count_5000_price_2500',
 'material_4.0',
 'cafe_count_500_price_high',
 'cafe_avg_price_5000',
 'max_floor',
 'mkad_km',
 'material_6.0',
 'cafe_count_2000',
 'big_church_count_3000',
 'office_sqm_1500',
 'university_km',
 '0_17_female',
 'cafe_count_2000_price_4000',
 'leisure_count_500',
 'industrial_km',
 'school_quota',
 '0_17_male',
 'church_count_3000',
 'students_state_oneshift',
 'pop_total_inc',
 'heating_share',
 'gdp_deflator',
 'ttk_km',
 'school_education_centers_raion',
 'prom_part_1500',
 'office_count_5000',
 '0_13_all',
 'fitness_km']

In [129]:
X_train = df_full[col_added]

In [168]:
X_train = pd.read_csv("../Data/df_train_final").drop("Unnamed: 0", axis = 1)

In [169]:
y_train = pd.read_csv("../Data/y_train_final").drop("Unnamed: 0", axis = 1)

In [170]:
X_train_normalize = (X_train - X_train.mean())/X_train.std()

In [171]:
y_train_normalize = (y_train - y_train.mean())/y_train.std()

In [172]:
y_train_normalize

Unnamed: 0,price_doc
0,-0.265169
1,-0.234011
2,-0.296328
3,1.240843
4,0.409940
...,...
27419,0.056806
27420,3.712780
27421,-0.032317
27422,1.323933


## Neural Network

In [189]:
class NeuralNetwork(): 
    def fit(self, X, y, n_hidden, nodes, activations, lr, batch_size = 0):
        self._lr = lr
        self._X = X.values
        self._y = y.values
        self._n_hidden = n_hidden
        self._nodes = nodes
        self._weights = self._generate_weights()
        self._biases = self._generate_bias()
        self._activations = activations
        self._forward_inputs = []
        self._batch_size = batch_size

        
        self._train()
        
    def _activation(self, data, activation = "relu"):
        if activation == "relu":
            def relu(data):
                return np.array([max(0,i) for i in data]).reshape(data.shape)
            return np.apply_along_axis(relu, 1, data)
        if activation == "sigmoid":
            def sigmoid(data):
                return (1/(1 + np.exp(-data))).reshape(data.shape)
            return np.apply_along_axis(sigmoid, 1, data)
    
    def _der_activation(self, points, activation = "relu"):
        if activation == "relu":
            def d_relu(point):
                return np.array([0 if y <= 0 else 1 for y in point])
            return np.apply_along_axis(d_relu, 1, points)
        if activation == "sigmoid":
            ## todo
            return
    
    def _loss_function(self, ypred, loss = "l2"):
        if self._batch_size > 0:
            y = self._batchy
        else:
            y = self._y
        if loss == "mse":
            return ((ypred - y) ** 2).mean()
        if loss == "l2":
            return (((ypred - y) ** 2)/2).mean()
    
    def _loss_jacobian(self, ypred, loss = "l2"):
        if self._batch_size > 0:
            y = self._batchy
        else:
            y = self._y
        if loss == "l2":
            return (ypred - y)/(len(ypred))
    
    def _generate_weights(self):
        hidden_weights = []
        nodes = self._nodes
        for idx in range(1,len(nodes)):
            hidden_weights.append(0.01 *np.random.randn(nodes[idx -1], nodes[idx]))

        return hidden_weights
    
    def _generate_bias(self):
        hidden_layers = []
        nodes = self._nodes
        for i in range(self._n_hidden + 1):
            hidden_layers.append(np.zeros((nodes[i + 1], 1)))
        return hidden_layers
    
    
    def _forward_propagation(self):
        """
        Suppose 2 observations
        
        Suppose previous layer is 3 nodes
        Suppose current layer is 2 nodes
        
        prev shape (2,3)
        prev = ob1 [prev_node_1 val, prev_node_2 val, prev_node_3 val]
               ob2 [prev_node_1 val, prev_node_2 val, prev_node_3 val]
               
        layer shape (3,2)
        layer = [weight for current_node_1 for prev_node_1, weight for current_node_2 for prev_node_1]
                [weight for current_node_1 for prev_node_2, weight for current_node_2 for prev_node_2]
                [weight for current_node_1 for prev_node_3, weight for current_node_2 for prev_node_3]
                
        output shape (2,2) # since 2 observations and 2 layers
        output = ob1 [current_node_1 val, current_node_2 val]
                 ob2 [current_node_1 val, current_node_2 val]
                 
        Then for bias in current layer it is (2,1) since 2 nodes in current layer
        
        So for each row in output we add the bias row wise and apply the activation function to each row
        
        prev <- ouput
        
        Move onto next layer...
        """
        if self._batch_size > 0:
            prev = self._batchX
        else:
            prev = self._X
        weights = self._weights
        biases = self._biases
        activations = self._activations[1:-1]
    
        for idx, layer in enumerate(weights):
            if idx == (len(weights) - 1):
                self._forward_inputs.append((prev, None))
                prev = (prev @ layer) + biases[idx].T,
            else:
                weight_output = (prev @ layer) + biases[idx].T
                self._forward_inputs.append((prev, weight_output))
                prev = self._activation(data = weight_output, activation = activations[idx])

        return prev
    
    def _backward_propagation(self, ypred):
            
        j = self._loss_jacobian(ypred)
        
        temp_weights = self._weights
        temp_biases = self._biases
                
        for i in range(len(self._forward_inputs)-1, -1, -1):
            if i != (len(self._forward_inputs) - 1):
                # activation func on all layers except the last
                der_acti = self._der_activation(self._forward_inputs[i][1])
                j = np.multiply(j,der_acti)

            x = self._forward_inputs[i][0]

            jw = x.T.dot(j)

            b = np.ones((j.shape[0],1))
            jb = j.T.dot(b)
            
            j = j.dot(temp_weights[i].T)
            
            temp_weights[i] -= self._lr * jw
            temp_biases[i] -= self._lr * jb

            #j = j.dot(self._weights[i].T)
            
        self._forward_inputs = []
        return temp_weights, temp_biases
        #self._forward_inputs = []
        
    
    def _train(self):
        old_loss = np.inf
        tol = 0.0001
        w = self._weights
        b = self._biases
        while True:
            if self._batch_size > 0:
                X_index = np.arange(self._X.shape[0])
                np.random.shuffle(X_index)
                batch_index = X_index[:self._batch_size]
                self._batchX = self._X[batch_index,:]
                self._batchy = self._y[batch_index,:]
            out = self._forward_propagation()
            loss = self._loss_function(out[0])
            mse = self._loss_function(out[0], loss = "mse")
            print("\nloss:")
            print(self._loss_function(out[0]))
            print("mse:")
            print(mse)
            #print("\npredictions\n")
            #print(out)
            if loss < old_loss:
              old_loss = loss
              self._weights = w
              self._biases = b
            else:
              self._lr = self._lr/2
              print("Decreasing learning rate. New rate is " + str(self._lr))
              if self._lr < tol:
                break
            w,b = self._backward_propagation(out[0])

In [193]:
INPUT_SIZE = X_train_normalize.shape[1]
OUTPUT_SIZE = 1
LEARNING_RATE = 0.1
nodes = [INPUT_SIZE,100,100,100,OUTPUT_SIZE]
activations = ["relu" for i in range(len(nodes))]

nn = NeuralNetwork()

nn.fit(X = X_train_normalize,
       y = y_train_normalize,
       n_hidden = len(nodes) - 2,
       nodes = nodes,
       activations = activations,
       lr = LEARNING_RATE,
       batch_size = 100)


loss:
2.116094374941181
mse:
4.232188749882362

loss:
0.3644519756410809
mse:
0.7289039512821618

loss:
0.6943607985896049
mse:
1.3887215971792097
Decreasing learning rate. New rate is 0.05

loss:
0.7120339062155824
mse:
1.4240678124311648
Decreasing learning rate. New rate is 0.025

loss:
0.32840028988837644
mse:
0.6568005797767529

loss:
0.4989303882415925
mse:
0.997860776483185
Decreasing learning rate. New rate is 0.0125

loss:
0.6819586194147176
mse:
1.3639172388294352
Decreasing learning rate. New rate is 0.00625

loss:
0.8440459638201975
mse:
1.688091927640395
Decreasing learning rate. New rate is 0.003125

loss:
0.5613308911327994
mse:
1.1226617822655989
Decreasing learning rate. New rate is 0.0015625

loss:
0.6057394189386764
mse:
1.2114788378773529
Decreasing learning rate. New rate is 0.00078125

loss:
0.44979797989118014
mse:
0.8995959597823603
Decreasing learning rate. New rate is 0.000390625

loss:
0.24427040649245096
mse:
0.48854081298490193

loss:
0.690945230987479
mse