In [109]:
# %load mnist_loader.py
"""
mnist_loader
~~~~~~~~~~~~
A library to load the MNIST image data.  For details of the data
structures that are returned, see the doc strings for ``load_data``
and ``load_data_wrapper``.  In practice, ``load_data_wrapper`` is the
function usually called by our neural network code.
"""

#### Libraries
# Standard library
import pickle
import gzip

# Third-party libraries
import numpy as np

def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.
    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.
    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.
    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.
    This is a nice data format, but for use in neural networks it's
    helpful to modify the format of the ``training_data`` a little.
    That's done in the wrapper function ``load_data_wrapper()``, see
    below.
    """
    f = gzip.open('./data/mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
    f.close()
    return (training_data, validation_data, test_data)

def load_data_wrapper():
    """Return a tuple containing ``(training_data, validation_data,
    test_data)``. Based on ``load_data``, but the format is more
    convenient for use in our implementation of neural networks.
    In particular, ``training_data`` is a list containing 50,000
    2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
    containing the input image.  ``y`` is a 10-dimensional
    numpy.ndarray representing the unit vector corresponding to the
    correct digit for ``x``.
    ``validation_data`` and ``test_data`` are lists containing 10,000
    2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
    numpy.ndarry containing the input image, and ``y`` is the
    corresponding classification, i.e., the digit values (integers)
    corresponding to ``x``.
    Obviously, this means we're using slightly different formats for
    the training data and the validation / test data.  These formats
    turn out to be the most convenient for use in our neural network
    code."""
    tr_d, va_d, te_d = load_data()
    training_inputs = [x for x in tr_d[0]]
    training_results = np.array([vectorized_result(y) for y in tr_d[1]])
    training_data = zip(training_inputs, training_results)
    validation_inputs = [x for x in va_d[0]]
    validation_data = zip(validation_inputs, va_d[1])
    test_inputs = [x for x in te_d[0]]
    test_data = zip(test_inputs, te_d[1])
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    """Return a 10-dimensional unit vector with a 1.0 in the jth
    position and zeroes elsewhere.  This is used to convert a digit
    (0...9) into a corresponding desired output from the neural
    network."""
    e = np.zeros(10)
    e[j] = 1.0
    return e

In [303]:
import numpy as np
import random
import time
class Network3(object):
    def __init__(self,sizes):
        self.sizes = sizes
        self.num_layers = len(sizes)
        self.bias = [np.random.randn(x,1) for x in sizes[1:]]
        self.weights = [np.random.randn(x,y) for x,y in zip(sizes[1:],sizes[:-1])]
    
    def SGD(self,training_data,epochs,mini_batch_size,eta,test_data=None):
        if test_data:
            test_data = list(test_data)
            n_test = len(test_data)
        
        training_data = list(training_data)
        n = len(training_data)
        
        for i in range(epochs):
            start_time = time.time()
            
            random.shuffle(training_data)
            mini_batches = [training_data[x:x+mini_batch_size] for x in range(0,n,mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch,eta)
                
            end_time = time.time()
            print("time:{}".format(end_time-start_time))
            if test_data:
                print("Epoch {} : {} / {}".format(i,self.evaluate(test_data),n_test))
            else:
                print("Epoch {} complete".format(i))
                
    def update_mini_batch(self,mini_batch,eta):
        nabla_w , nabla_b = self.back_pro(mini_batch)
        
#         print(nabla_w[-1].shape)
#         print(self.weights[-1].shape)
#         print(nabla_b[-1].shape)
#         print(self.bias[-1].shape)
#         x=input()
        
        self.weights = [w - eta *nw for w,nw in zip(self.weights,nabla_w)]
        self.bias = [b - eta * nb for b,nb in zip(self.bias,nabla_b)]
        
    def back_pro(self,mini_batch):
        m = len(mini_batch)
        
        nabla_b = [np.zeros(b.shape) for b in self.bias]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        
        nabla_bs = [np.zeros((b.shape[0],m)) for b in self.bias]
#         nabla_ws = [np.zeros((w.shape[0],w.shape[1],n)) for w in self.weights]
        
        x,y = zip(*mini_batch)
        x=np.array(x).T
        y=np.array(y).T
        
        
        activation = x
        activations = [x]       
        zs = []
        
        # feedward propagation
        for w,b in zip(self.weights,self.bias):
            z = np.dot(w,activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
            
 
        nabla_bs[-1] = self.delta_L(y,activations[-1])
        nabla_b[-1] = np.sum(nabla_bs[-1],axis=1).reshape(self.bias[-1].shape) / m

        for l in range(2,self.num_layers):
            nabla_bs[-l] = np.dot(self.weights[-l+1].T,nabla_bs[-l+1]) * sigmoid_prime(zs[-l])
            nabla_b[-l] = np.sum(nabla_bs[-l],axis=1).reshape(self.bias[-l].shape) / m
        
        

        for l in range(1,self.num_layers):
            for i in range(m):
                nabla_b_i = nabla_bs[-l][:,i]
                nabla_b_i = nabla_b_i.reshape(len(nabla_b_i),1)
                activation_i = activations[-l-1][:,i]
                activation_i = activation_i.reshape(1,len(activation_i))
                             
                nabla_w[-l] = nabla_w[-l] + np.dot(nabla_b_i,activation_i)
                
            nabla_w[-l] = nabla_w[-l] / m
            
            
        return (nabla_w,nabla_b)
        
    def delta_L(self,y,activation_L):
        """delta_L for sigmod activation function with cross-entropy cost function"""
        return activation_L - y
    
    def feedward(self,a):
        for w,b in zip(self.weights,self.bias):
#             print(w.shape)
            a = sigmoid(np.dot(w,a)+b)
        return a
    def evaluate(self,x):
        a,y = zip(*x)
        a=np.array(a).T
        y=np.array(y)
        
#         print(a.shape)
#         print(y.shape)
        
        test_results = np.argmax(self.feedward(a),axis=0)
#         print(self.feedward(a).shape)
#         print(test_results.shape)
#         x=input()
        return sum(y == test_results)

    
def sigmoid(x):
    return 1 / (1+np.exp(-x))
def sigmoid_prime(x):
    return sigmoid(x) * (1-sigmoid(x))

In [304]:
training_data, validation_data, test_data = load_data_wrapper()

net3 = Network3([784,30,10])

training_data = list(training_data)
net3.SGD(training_data, 30, 10, 3.0, test_data=test_data)

time:2.66577410697937
Epoch 0 : 9028 / 10000
time:2.633054256439209
Epoch 1 : 8977 / 10000


KeyboardInterrupt: 

In [134]:
array1 = np.array([1,2])
array2 = np.array([3,4])

array1 = array1.reshape(len(array1),1)
array2 = array2.reshape(len(array2),1)

print(array1.shape)
print(array2.T.shape)

matrix = np.dot(array1,array2.T)

print(matrix)

(2, 1)
(1, 2)
[[3 4]
 [6 8]]


In [147]:
arrays = []
array1 = np.array([[1,2],[3,4]])
array2 = np.array([[5,6],[7,8]])

arrays.append(array1)
arrays.append(array2)

arrays = np.array(arrays)

arrays.shape

(2, 2, 2)

In [235]:
array1 = np.array([[1,2],[3,4]])
print(array1)

np.sum(array1,axis=1)

[[1 2]
 [3 4]]


array([3, 7])

In [279]:
array1 = np.array([[5,2,3],[4,7,1]]) 
array2 = np.array([1,2])
array2 = array2.reshape(len(array2),1)
print(array1)

matrix = array1 * array2

print(matrix)
print("\n")
print(np.argmax(array1,axis = 0))

[[5 2 3]
 [4 7 1]]
[[ 5  2  3]
 [ 8 14  2]]


[0 1 0]


In [286]:
array1 = np.array([1,2,43,5])
array2 = np.array([1,2,43,1])

print(array1 == array2)

print(sum(array1 == array2))

[ True  True  True False]
3
