In [1]:
%matplotlib inline

In [2]:
import numpy as np
import cPickle as pickle
import matplotlib.pyplot as plt

In [3]:
def get_key_map():
    a = np.arange(0,10)
    b = a
    dic = {}
    for i in a:
        for j in b:
            dic[i + j] = 1
            dic[i * j] = 1

    key_map = {}
    index = 0
    for key in dic:
        key_map[key] = index
        index += 1
    return key_map

In [4]:
#data is a list of int
def OneHotKey(data, key_map):
    key_size = len(key_map)
    data_size = len(data)
    result = np.zeros([data_size, key_size])
    for i in xrange(0, data_size):
        result[i][key_map[int(data[i])]] = 1
    return result

In [21]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [35]:
#1. define the structure
#2. given training datas and training labels, train the neural_network.
#3. given test datas, predict the output
class neural_network:

    def __init__(self):
        self.structure = [] #store the structure of the nn
        self.weights = [] #store the weights of each layer. Note: weights of each layer is a matrix.
        self.biases = [] # store the bias of each layer
        self.outputs = [] # store each layer's output, the first layer is the input.
        self.gradients_w = [] # store the gradient of weights of each layer. it should has the same dimension as self.weights
        self.gradients_b = [] # store the gradient of bias of each layer. it should has the smae dimension as self.bias
        self.alpha = 0.1 # the learning rate

    #using a list structure to define the structure of the network
    #e.g [2,3,2] means the input layer has two nodes, the hidden layer has three nodes and the output layer has two nodes.
    def def_structure(self, structure):
        self.structure = structure
        self.outputs.append(np.zeros([structure[0],1]))
        for i in xrange(0 , len(structure) - 1):
            self.weights.append(np.random.randn(structure[i + 1], structure[i]))
            self.gradients_w.append(np.zeros([structure[i + 1], structure[i]]))
            self.biases.append(np.random.randn(structure[i + 1],1))
            self.gradients_b.append(np.zeros([structure[i + 1],1]))
            self.outputs.append(np.zeros([structure[i + 1],1]))
        self.weights = np.array(self.weights)
        self.biases = np.array(self.biases)
        self.gradients_w = np.array(self.gradients_w)
        self.gradients_b = np.array(self.gradients_b)
        self.outputs = np.array(self.outputs)
    
    
    # compute the forward result of the nn and record output of each layer
    def forward(self, data):
        data = data.reshape(-1, 1)
        sum_product = data
        self.outputs[0] = data
        index = 1
        for (w , b) in zip(self.weights, self.biases):
            sum_product = np.dot(w, sum_product) + b
            sum_product = sigmoid(sum_product)
            self.outputs[index] = sum_product
            index += 1
        return sum_product
    
    # compute gradient of weights and biases of each layer
    def backpropogate(self, labels):
        #output layer error:
        labels = labels.reshape(-1,1)
        sum_grad = self.outputs[-1] - labels 
        grad = np.dot(sum_grad, self.outputs[-2].T)
        self.gradients_w[-1] += grad
        self.gradients_b[-1] += sum_grad

        #layer error of the rest layers.
        layer_num = len(self.structure)
        for i in xrange(layer_num - 2, 0, -1):
            node_grad = np.dot(sum_grad.T, self.weights[i]).T
            sum_grad = node_grad * self.outputs[i] * (1 - self.outputs[i])
            grad = np.dot(sum_grad, self.outputs[i - 1].T)
            self.gradients_w[i - 1] += grad
            self.gradients_b[i - 1] += sum_grad

    #train network on mini_batch
    #first, compute the forward result and record
    #second, compute the gradient of each weights and biases
    #thrid, updata the network
    def train_mini_batch(self, batch_datas, batch_labels):
        self.gradients_w.fill(0)
        self.gradients_b.fill(0)
        for data, label in zip(batch_datas, batch_labels):
            self.forward(data)
            self.backpropogate(label)
        self.weights -= self.alpha * self.gradients_w / len(batch_datas)
        self.biases -= self.alpha * self.gradients_b / len(batch_datas)
        #print self.weights

    #train_data: numpy_array, n*m, n is the number of items, m is the number of features.
    #train_labels: numpy_array, n*1, n is the number of items.
    #first, shuffle and split the train_data and train_label
    #second, train network on each mini_batch
    def fit(self, train_datas, train_labels, epochs = 10, batch_size = 64, learning_rate = 0.1):
        self.alpha = learning_rate
        data_size = len(train_datas)
        for epoch in xrange(0, epochs):
            index = np.arange(0, data_size)
            np.random.shuffle(index)
            start_data = 0
            print("epoch : {}\n".format(epoch))
            while (start_data < data_size):
                end_data = min(start_data + batch_size, data_size)
                batch_datas = train_datas[index[start_data: end_data]]
                batch_labels = train_labels[index[start_data: end_data]]
                self.train_mini_batch(batch_datas, batch_labels)
                start_data += batch_size
            print("training error : {}".format(self.evaluate(train_datas, train_labels)))
    
    def predict(self, data):
        return self.forward(data)
    
    def evaluate(self, datas, labels):
        error = 0
        for data, label in zip(datas, labels):
            label = label.reshape(-1,1)
            error += sum(abs(self.predict(data) - label))
            #print error
        return error / len(datas)

In [6]:
sample_x = pickle.load(open("sample_train_x"))
sample_y = pickle.load(open("sample_train_y"))

In [7]:
sample_x = np.array(sample_x).reshape(-1, 4096)
sample_y = np.array(sample_y).reshape(-1, 1)

In [8]:
sample_x.shape

(1000, 4096)

In [9]:
sample_y.shape

(1000, 1)

In [11]:
key_map = get_key_map()

In [12]:
sample_y = OneHotKey(sample_y, key_map)

In [36]:
nn = neural_network()
nn.def_structure([4096, 1024, 40])

In [None]:
nn.fit(sample_x, sample_y, 100)

epoch : 0



  from ipykernel import kernelapp as app


training error : [ 1.98280334]
epoch : 1

training error : [ 1.95716651]
epoch : 2

training error : [ 1.94487433]
epoch : 3

training error : [ 1.80882133]
epoch : 4

training error : [ 1.89133213]
epoch : 5

training error : [ 1.83292845]
epoch : 6

training error : [ 1.81897761]
epoch : 7

training error : [ 1.82564209]
epoch : 8

training error : [ 1.90898327]
epoch : 9

training error : [ 1.80616153]
epoch : 10

training error : [ 1.79750555]
epoch : 11

training error : [ 1.7447687]
epoch : 12

training error : [ 1.82698941]
epoch : 13

training error : [ 1.80526511]
epoch : 14

training error : [ 1.71016166]
epoch : 15

training error : [ 1.80182959]
epoch : 16

training error : [ 1.77617176]
epoch : 17

training error : [ 1.70297323]
epoch : 18

training error : [ 1.77098051]
epoch : 19

training error : [ 1.67368498]
epoch : 20

training error : [ 1.73960669]
epoch : 21

training error : [ 1.61811733]
epoch : 23

training error : [ 1.78934953]
epoch : 24

training error : [ 1.

In [38]:
weights_file = open("weights" , "w")
pickle.dump(nn.weights, weights_file)
weights_file.close()

In [43]:
np.argmax(nn.predict(sample_x[0]))

  from ipykernel import kernelapp as app


8

In [44]:
np.argmax(sample_y[0])

8