In [319]:
import numpy as np
import pandas as pd

In [320]:
class NeuralNetwork:

    def __init__(self, n_inputs, n_hiddens, n_outputs, lr=0.1):
        self.n_outputs = n_outputs
        self.lr = 0.1

        self.network = list()
        hidden_layer = [{'weights': np.random.randn(n_inputs+1)} for i in range(n_hiddens)]
        self.network.append(hidden_layer)
        output_layer = [{'weights': np.random.randn(n_hiddens+1)} for i in range(n_outputs)]
        self.network.append(output_layer)

        self.num_layers = len(self.network)
    
    def sigmoid_function(self, x):
        return 1.0 / (1.0 + np.exp(-x))
    
    def activate_function(self, weights, inputs):
        activation = weights[-1]
        for i in range(len(inputs)):
            activation += weights[i] * inputs[i]
        return  self.sigmoid_function(activation)

    def forward(self):
        inputs = self.x
        for layer in self.network:
            layer_output = list()
            for neuron in layer:
                neuron['output'] = self.activate_function(neuron['weights'], inputs)
                layer_output.append(neuron['output'])
            inputs = layer_output
        return inputs

    def __call__(self, x):
        # one sample
        # label 必须是 one-hot 编码
        self.x = x[:-1]
        self.labels = [0 for i in range(self.n_outputs)]
        self.labels[x[-1]] = 1
        self.output = self.forward()
        return self.output

    def criterion(self):
        # return error
        return 0.5 * np.sum((np.array(self.output) - np.array(self.labels))**2)
    
    def error_backpropagate(self):
        for i in reversed(range(self.num_layers)):
            layer = self.network[i]
            errors = list()
            if i != self.num_layers -1 :
                for j in range(len(layer)):
                    error = 0.0
                    for neuron in self.network[i+1]:
                        error += (neuron['weights'][j] * neuron['delta'])
                    errors.append(error)
            else:
                errors = np.array(self.labels) - np.array(self.output)
            for j in range(len(layer)):
                neuron = layer[j]
                neuron['delta'] = errors[j] * neuron['output'] * ( 1 - neuron['output'] )
    
    def step(self):
        self.error_backpropagate()
        for i in range(len(self.network)):
            inputs = self.x
            if i != 0:
                inputs = [neuron['output'] for neuron in self.network[i - 1]]
            inputs.append(0)
            for neuron in self.network[i]:
                neuron['weights'] += self.lr * neuron['delta'] * np.array(inputs)
                # 更新bias
                neuron['weights'][-1] += self.lr * neuron['delta']

In [321]:
seeds_dataset = pd.read_csv('./seeds_dataset.csv', index_col=0)
seeds_dataset

Unnamed: 0_level_0,perimeter P,compactness C,length of kernel,width of kernel,asymmetry coefficient,length of kernel groove,varieties
area A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.440982,0.502066,0.570780,0.486486,0.486101,0.189302,0.345150,0
0.405099,0.446281,0.662432,0.368806,0.501069,0.032883,0.215165,0
0.349386,0.347107,0.879310,0.220721,0.503920,0.251453,0.150665,0
0.306893,0.316116,0.793103,0.239302,0.533856,0.194243,0.140817,0
0.524079,0.533058,0.864791,0.427365,0.664291,0.076701,0.322994,0
...,...,...,...,...,...,...,...
0.151086,0.163223,0.637024,0.134009,0.250178,0.372635,0.172821,2
0.060434,0.097107,0.390200,0.135698,0.117605,0.462872,0.238306,2
0.246459,0.258264,0.727768,0.189752,0.429081,0.981667,0.264402,2
0.118036,0.165289,0.399274,0.155405,0.146828,0.368344,0.258493,2


In [322]:
def cross_validation_split(dataset, n_fold):
    data_split = list()
    for i in range(n_fold):
        train_data = dataset.sample(frac=0.8, random_state=i)
        test_data = dataset[~dataset.index.isin(train_data.index)]
        data_split.append([train_data, test_data])
    return data_split
data_split = cross_validation_split(seeds_dataset, 5)
max_epoch = 100
for idx, (train_data, valid_data) in enumerate(data_split):
    net = NeuralNetwork(7,5,3)
    for epcoh in range(max_epoch):
        for x in train_data.itertuples():
            x = list(x)
            output = net(x)
            loss = net.criterion()
            net.step()
    total = 0
    correct = 0
    for x in valid_data.itertuples():
        x = list(x)
        outputs = net(x)
        predicted = outputs.index(max(outputs))
        correct += x[-1] == predicted
        total += 1
    print('seeds dataset, k_fold: {}, accurancy: {}%'.format(idx+1, correct*100/total))

seeds dataset, k_fold: 1, accurancy: 88.57142857142857%
seeds dataset, k_fold: 2, accurancy: 86.11111111111111%
seeds dataset, k_fold: 3, accurancy: 86.48648648648648%
seeds dataset, k_fold: 4, accurancy: 90.9090909090909%
seeds dataset, k_fold: 5, accurancy: 97.05882352941177%


In [323]:
iris_dataset = pd.read_csv('./iris_data.csv', index_col=0)
iris_dataset

Unnamed: 0_level_0,sepal width in cm,petal length in cm,petal width in cm,varieties
sepal length in cm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.222222,0.625000,0.067797,0.041667,0
0.166667,0.416667,0.067797,0.041667,0
0.111111,0.500000,0.050847,0.041667,0
0.083333,0.458333,0.084746,0.041667,0
0.194444,0.666667,0.067797,0.041667,0
...,...,...,...,...
0.666667,0.416667,0.711864,0.916667,2
0.555556,0.208333,0.677966,0.750000,2
0.611111,0.416667,0.711864,0.791667,2
0.527778,0.583333,0.745763,0.916667,2


In [324]:
data_split = cross_validation_split(iris_dataset, 5)
max_epoch = 100
for idx, (train_data, valid_data) in enumerate(data_split):
    net = NeuralNetwork(4,5,3)
    for epcoh in range(max_epoch):
        for x in train_data.itertuples():
            x = list(x)
            output = net(x)
            loss = net.criterion()
            net.step()
    total = 0
    correct = 0
    for x in valid_data.itertuples():
        x = list(x)
        outputs = net(x)
        predicted = outputs.index(max(outputs))
        correct += x[-1] == predicted
        total += 1
    print('iris dataset, k_fold: {}, accurancy: {}%'.format(idx+1, correct*100/total))

iris dataset, k_fold: 1, accurancy: 100.0%
iris dataset, k_fold: 2, accurancy: 100.0%
iris dataset, k_fold: 3, accurancy: 100.0%
iris dataset, k_fold: 4, accurancy: 100.0%
iris dataset, k_fold: 5, accurancy: 100.0%
