In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import math
import time
import sys
%matplotlib inline

In [2]:
class Network(object):
    
    def __init__(self, layer_node): #layer_node is a list contains the number of nodes in each layer
        self.num_layer = len(layer_node)
        self.layer_node = layer_node
        self.input_node = []
        self.weights = [ np.random.randn(next_node,previous_node+1) for next_node, previous_node in zip(layer_node[1:],layer_node[:-1]) ]
        
    def add_input_node(self,input_vector): #must be of size (n,1)
        if len(input_vector) == self.layer_node[0]: 
            self.input_node = np.append([[1]],input_vector, axis=0) # add bias node
        else:
            print "Input vector length invalid: given len {1}, need len {0}".format(self.layer_node[0], len(input_vector))
            
    def forward_propagation(self):
        output_value = self.input_node
        for weight in self.weights: 
            s = np.dot(weight, output_value)
            output_value = np.append([[1]], activation_function(s), axis=0) # add bias node -> size (n+1,1)
        return np.sign(output_value[1:][0][0]) # we omit the bias node 
        
    def backpropagation(self,input_vector, output_vector):
        s = [] # vector before entering activation function
        x = [np.append([[1]],input_vector, axis=0)] # vector after entering activation function
        sensitivity = [ np.zeros([1,i]) for i in self.layer_node[1:] ]
        
        #feedforward, save all the node coefs of each layer
        for weight in self.weights:
            s.append(np.dot(weight, x[-1]))
            x.append(np.append([[1]], activation_function(s[-1]), axis=0))
        sensitivity[-1] = 2.0*(x[-1][1:] - output_vector)*(1-np.square(x[-1][1:])) # omit the bias node 
        
        #backpropagation, compute sensitivity backward
        for layer_index in xrange(2, self.num_layer):
            w = self.weights[-layer_index+1]
            #excluding the bias componnent which has the index 0
            sensitivity[-layer_index] = (1-np.square(x[-layer_index][1:]))*(np.dot(w.T,sensitivity[-layer_index+1]))[1:] 
        return x, sensitivity
    
    def SGD(self, training_data, testing_data, eta, epochs, print_message=True, visualize=True): #stochastic gradient descent
        error_ratio_list = []
        error_ratio = 0
        error_count = 0
        predicted_result = []
        result_df = None
        
        for epoch in xrange(epochs): 
            gradient = [np.zeros(w.shape) for w in self.weights]
            
            for index, row in training_data.iterrows():
                vector = np.array(row).reshape(3,1) 
                input_vector, output_vector = vector[:2], vector[2:]
                x, sensitivity = self.backpropagation(input_vector, output_vector)
                
                for l in xrange(len(gradient)):
                    gradient[l] = np.dot(sensitivity[l],np.transpose(x[l]))
                    self.weights[l] = self.weights[l] - eta*gradient[l]
                    
            error_ratio, error_count, predicted_result = self.evaluate(testing_data)
            error_ratio_list.append(error_ratio)
            
        if print_message:
            print "Error count = {0}/{1}".format(error_count[0,0], testing_data.shape[0])
            print "Error ratio = ", error_ratio_list[-1]

        if visualize: 
            result_df = visualization(testing_data, error_ratio_list, predicted_result)
        
        return error_ratio_list, predicted_result, result_df
    
    
    
    def evaluate(self,testing_data):
        error_count = 0
        predicted_result = []
        for index, row in testing_data.iterrows():
            vector = np.array(row).reshape(3,1) 
            input_vector, output_vector = vector[:2], vector[2:]
            self.add_input_node(input_vector)
            predicted_value = self.forward_propagation()
            predicted_result.append(predicted_value)
            error_count += predicted_value != output_vector
        return float(error_count)/testing_data.shape[0], error_count, predicted_result

In [3]:
def visualization(df, MSE, predicted_result): 
    result_df = df.copy()
    result_df['Predicted'] = predicted_result

    plt.figure(1)
    plt.plot(MSE)
    plt.title("Error Ratio Plot")
    
    colormap = np.array(['b','b' ,'r'])
    
    plt.figure(2)
    plt.scatter(result_df.x, result_df.y, c=colormap[result_df.Class], s=40)
    plt.title("True Plot")
    
    plt.figure(3)
    plt.scatter(result_df.x, result_df.y, c=colormap[result_df.Predicted], s=40)
    plt.title("Predicted Plot")
    
    plt.show()
    
    return result_df

In [4]:
def activation_function(z): 
    return np.tanh(z) # sigmoid function tanh

#### Create dataframe

In [5]:
def str_to_float(df): 
    new_df = df.copy()
    return new_df.applymap(lambda x: float(x.replace(',','.')))

In [6]:
def z_score(df):
    new_df = df.copy()
    new_df.x = (new_df.x - new_df.x.mean())/new_df.x.std(ddof=0)
    new_df.y = (new_df.y - new_df.y.mean())/new_df.y.std(ddof=0)
    return new_df

In [8]:
def create_df(file_link):
    df = pd.read_csv(file_name,sep=' ',header=None)
    df.columns = ['x','y','Value']
    df = str_to_float(df)
    df = z_score(df) 
    binary_code = [np.sign(x-.5) for x in df.Value] 
    df['Class'] = binary_code 
    df = df.drop('Value',1)
    return df 

In [11]:
df = create_df(file_name="donneespb4.txt")

In [12]:
net = Network([2,30,1])
t0 = time.clock()
MSE_list, predicted_result, result_df = net.SGD(df,df,0.01,200)
print "Processing time: {0} seconds".format((time.clock() - t0))

KeyboardInterrupt: 

#### 10-fold Cross-validation

In [52]:
from sklearn.cross_validation import KFold

In [53]:
def kfold_neural_net(neural_net, n_folds, data, eta, epochs, print_message=True):
    kf = KFold( n=data.shape[0], n_folds=n_folds, shuffle=False,random_state=None)
    cumulative_MSE = 0
    counter = 1 
    for train_index, test_index in kf:
        net = Network(neural_net)
        train_set = data.iloc[train_index,:]
        test_set = data.iloc[test_index,:]
        if print_message:
            print "----Iteration {0}----".format(counter)
        MSE_list, predicted_result, result_df = net.SGD(training_data = train_set, testing_data = test_set, 
                    eta = eta, epochs = epochs, print_message = print_message,visualize = False)
        cumulative_MSE += MSE_list[-1]
        counter += 1
        if print_message: 
            print
        
    average_error_ratio = cumulative_MSE/float(n_folds)
    if print_message: 
        print "Average error ratio = ", average_error_ratio
    return average_error_ratio

In [11]:
kfold_neural_net(neural_net=[2,30,1], n_folds=10,data=df,eta=0.01,epochs=5)

----Iteration 1----
Error count = 15/300
Error ratio =  0.05

----Iteration 2----
Error count = 15/300
Error ratio =  0.05

----Iteration 3----
Error count = 23/300
Error ratio =  0.0766666666667

----Iteration 4----
Error count = 40/300
Error ratio =  0.133333333333

----Iteration 5----
Error count = 10/300
Error ratio =  0.0333333333333

----Iteration 6----
Error count = 8/300
Error ratio =  0.0266666666667

----Iteration 7----
Error count = 13/300
Error ratio =  0.0433333333333

----Iteration 8----
Error count = 5/300
Error ratio =  0.0166666666667

----Iteration 9----
Error count = 42/300
Error ratio =  0.14

----Iteration 10----
Error count = 40/300
Error ratio =  0.133333333333

Average error ratio =  0.0703333333333


0.07033333333333333

#### Parameters seletion

In [45]:
def create_parameter_df(max_learning_rate, min_learning_rate, max_neuron_num, min_neuron_num): 
    learning_rate = np.linspace(min_learning_rate, max_learning_rate, 10)
    neuron_num = range(min_neuron_num, max_neuron_num,10)[1:] #ignore first value, which is 0
    
    learning_rate_lst = []
    for x in xrange(len(neuron_num)):
        learning_rate_lst.extend(learning_rate)
    
    neuron_num_list = []
    for num in neuron_num:
        temp_list = [num] * len(learning_rate)
        neuron_num_list.extend(temp_list)
    
    parameter_df = pd.DataFrame(columns=['Neuron_number','Learning_rate','Error ratio'])
    parameter_df['Neuron_number'] = neuron_num_list
    parameter_df['Learning_rate'] = learning_rate_lst
    
    return parameter_df

In [46]:
parameter_df = create_parameter_df(max_learning_rate=0.1, min_learning_rate=0.001, max_neuron_num=55, min_neuron_num=0)

In [47]:
parameter_df.head()

Unnamed: 0,Neuron_number,Learning_rate,Error ratio
0,10,0.001,
1,10,0.012,
2,10,0.023,
3,10,0.034,
4,10,0.045,


In [48]:
parameter_df.shape

(50, 3)

In [54]:
def parameter_testing(df, parameter_df):
    new_parameter_df = parameter_df.copy()
    min_error_ratio = np.Inf
    best_row = None
    for index, row in new_parameter_df.iterrows():
        print "Calculating ({0}/{1})...".format(index+1,new_parameter_df.shape[0]) 
        print 
        sys.stdout.flush()
        time.sleep(.2) 
        
        layers = [2,row.Neuron_number,1]
        average_error_ratio = kfold_neural_net(neural_net=layers, n_folds=10, data=df, eta=row.Learning_rate, epochs= 20, 
                                      print_message=False)
        row.Error_ratio = average_error_ratio
        
        if average_error_ratio < min_error_ratio: 
            min_error_ratio = average_error_ratio
            best_row = row 
            print "New best error ratio: ",average_error_ratio
            print "   Number of neurons in the hidden layer: ", row.Neuron_number
            print "   Learning rate: ", row.Learning_rate
            print
        sys.stdout.flush()
        time.sleep(.2) 
    
    print "----------------------------------------------------------"
    print "Best error ratio: ",min_error_ratio
    print "Number of neurons in the hidden layer: ", best_row.Neuron_number
    print "Learning rate: ", best_row.Learning_rate
    print "----------------------------------------------------------"
        
    return new_parameter_df, best_row

In [55]:
t0 = time.clock()
param_df, best_param = parameter_testing(df=df, parameter_df=parameter_df)
print "Processing time: {0} minutes".format((time.clock() - t0)/60)

Calculating (1/50)...

New best error ratio:  0.170333333333
   Number of neurons in the hidden layer:  10
   Learning rate:  0.001

Calculating (2/50)...

New best error ratio:  0.0316666666667
   Number of neurons in the hidden layer:  10
   Learning rate:  0.012

Calculating (3/50)...

New best error ratio:  0.0223333333333
   Number of neurons in the hidden layer:  10
   Learning rate:  0.023

Calculating (4/50)...

Calculating (5/50)...

Calculating (6/50)...

Calculating (7/50)...

Calculating (8/50)...

Calculating (9/50)...

Calculating (10/50)...

Calculating (11/50)...

Calculating (12/50)...

New best error ratio:  0.013
   Number of neurons in the hidden layer:  20
   Learning rate:  0.012

Calculating (13/50)...

Calculating (14/50)...

Calculating (15/50)...

Calculating (16/50)...

Calculating (17/50)...

Calculating (18/50)...

Calculating (19/50)...

Calculating (20/50)...

Calculating (21/50)...

Calculating (22/50)...

New best error ratio:  0.0123333333333
   Number