In [1]:
import pandas as pd
import numpy as np 
from util import get_split_cols, get_split_frame


In [2]:
train = pd.read_csv("/data/project2/train.csv")

test = pd.read_csv("/data/project2/test.csv")

In [3]:
num_train_cols, cat_train_cols = get_split_cols(train)
num_test_cols, cat_test_cols = get_split_cols(test)

In [4]:
assert(len(num_train_cols)==len(num_test_cols))
assert(len(cat_train_cols)==len(cat_test_cols))

In [5]:
num_train_df, cat_train_df = get_split_frame(train)
num_test_df, cat_test_df = get_split_frame(test)

In [6]:
y = num_train_df['Total Household Income']

In [21]:
class Neuron(object):
    def __init__(self, weights):
        self.output = None
        self.delta = None
        self.weights = weights

class NeuralNetwork(object):
    def __init__(self, num_inputs, hidden_nodes, num_outputs, eta=.005, activation='sigmoid'):
        self.outputs = None
        self.hidden_index = 0
        self.output_index = 1
        self.sum_error = 0
        self.num_hidden = hidden_nodes
        self.num_outputs = num_outputs
        self.hidden_layer_section = self.create_randomized_layer_section(num_inputs, hidden_nodes)
        self.output_layer_section = self.create_randomized_layer_section(hidden_nodes, num_outputs -1)
        self.network = [self.hidden_layer_section, self.output_layer_section]
        self.activation_type = activation
        self.eta = eta
        
    def create_randomized_layer_section(self, prev_nodes, num_nodes):
        return [Neuron(np.random.rand(prev_nodes+1)) for _ in range(num_nodes + 1)]
    
    def activation(self, neuron, prev_x):
        z = self.calc_z(neuron, prev_x)
        if self.activation == 'sigmoid':
            return 1 / (1 - exp(-z))
        elif self.activation == 'tanh':
            return np.tanh(z)
        elif self.activation == 'relu':
            if z > 0: 
                return z
            else:
                return 0 
        else:
            return -1 
        
    def d_activation(self, value):
        if self.activation == 'sigmoid':
            return value * (1 - value)
        elif self.activation == 'tanh':
            return 1 - value ** 2 
        elif self.activation == 'relu':
            if value > 0: 
                return 1
            else:
                return 0 
        else:
            return -1 
        
    def calc_z(self, neuron, prev_x):
        return sum([x * w for x,w in zip(prev_x, neuron.weights)])
    
        
    def calc_output(self, neuron, prev_x, i):
        if i == self.output_index:
            return self.calc_z(neuron, prev_x)
        return self.activation(neuron, prev_x)
    
    def output_delta(self, neuron, expected):
        return expected - neuron.output
        
    def hidden_delta(self, neuron, n_idx, next_neuron):
        return next_neuron.weights[n_idx] * next_neuron.delta * self.d_activation(neuron.output) 
        
    def forward(self, x):
        for i, layer in enumerate(self.network):
            for n in layer:
                if i == 0:
                    prev_layer = x
                else: 
                    prev_layer = [x.output for x in self.network[i-1]]
                n.output = self.calc_output(n, prev_layer, i)
                
        # print(self.network[1][0].output)
        
        
    def back_prop(self, y):
        for layer_idx, layer in reversed(list(enumerate(self.network))):
            if layer_idx == self.output_index:
                for n in layer:
                    #output layer deltas
                    n.delta = self.output_delta(n, y) #NEED n index 
            else: 
                next_layer = self.network[layer_idx+1]
                for n_i, n in enumerate(layer):
                    #hidden deltas 
                    n.delta = 0
                    for next_neuron in next_layer: 
                        n.delta += self.hidden_delta(n, n_i, next_neuron) 
        self.sum_error += (self.network[self.output_index][0].output - y) ** 2
    
    def update(self):
        for layer_index, layer in enumerate(self.network):
            for neuron in layer:
                #print(self.eta, neuron.delta)
                neuron.weights -= (self.eta * neuron.delta)    
                
                
    def train(self, x, expected, epoch):
        for e in range(epoch):
            self.sum_error = 0 
            for row, y in zip(x, expected): 
                self.forward(row)
                self.back_prop(y)
                self.update()
            print(self.sum_error)  

In [22]:
nn = NeuralNetwork(2, 2, 1, .5)
nn.forward([1, 1])
nn.back_prop( 1)
nn.train([[5,1]], [1], 20)

6.02106061437
1.50526515359
0.376316288398
0.0940790720996
0.0235197680249
0.00587994200622
0.00146998550156
0.000367496375389
9.18740938473e-05
2.29685234618e-05
5.74213086545e-06
1.43553271636e-06
3.58883179091e-07
8.97207947727e-08
2.24301986932e-08
5.60754967328e-09
1.40188741831e-09
3.5047185458e-10
8.76179636451e-11
2.19044909113e-11


In [None]:
nn = NeuralNetwork(len(num_train_cols), 100, 1, .5)
x_cols = num_train_cols
#x_cols.remove("Total Household Income")
x = num_train_df[x_cols]
nn.train(num_train_df.as_matrix(), y.as_matrix(), 5)



nan
nan
nan
