In [19]:
import numpy as np

In [20]:
class NN():
    def __init__(self, dim):
        self.layers = len(dim)
        self.dimensions = dim
        self.weight = [np.random.randn(i,j) for i,j in [(x,y) for x,y in zip(dim[1:], dim[:-1])]]
        self.bias = [np.random.randn(y, 1) for y in dim[1:]]
        #print("Weight: ",self.weight,"\nBias: ",self.bias)
        
    def sigmoid(self,z):
        """The sigmoid function."""
        return 1.0/(1.0+np.exp(-z))
    
    def sigmoid_prime(self,z):
        """Derivative of the sigmoid function."""
        return self.sigmoid(z)*(1-self.sigmoid(z))
    
    def feedforward(self,X):
        print(":::::: Feedforward ::::::")
        A = X
        for w, b in zip(self.weight, self.bias):
            A = self.sigmoid((np.dot(w,A)+b))
            #print("A: ",A)
            #print("feedforward:\n","w: ",w.shape,"\nb: ",b.shape)
        return A
    def backprop(self, X,y):
        #print("::::::: Backprop :::::::\n")
        #FeedForward
        nabla_b = [np.zeros(b.shape) for b in self.bias]
        nabla_w = [np.zeros(w.shape) for w in self.weight]
        activation = X
        activations = [X]
        zs = []
        for b, w in zip(self.bias, self.weight):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = self.sigmoid(z)
            activations.append(activation)
            
        # backward pass
        # Delta for Output Layer
        #delta = (activations[-1] - y) * sigmoid_prime(zs[-1]) # For mean squared error
        delta = (activations[-1] - y) # For Cross Entropy Error (Logistic Error)
        #nabla_b[-1] = delta
        nabla_b[-1] = (1/X.shape[1]) * np.sum(delta, axis=1, keepdims=True) #(1,1)
        
        #print("nabla_b[-1]: ",nabla_b[-1].shape,"\n")
        #print("activations[-1]: ",activations[-1].shape)
        #print("y: ",y.shape)
        #print("delta: ",delta)
        #print("activations[-2]: ",activations[-2].shape)
        #print("Activation Shapes:\n")
        #for i in activations:
        #    print(i.shape)
        
        #nabla_w[-1] =  np.dot(delta, activations[-2].transpose())        
        nabla_w[-1] =  (1/X.shape[1]) * np.dot(delta, activations[-2].transpose())
        
        #Delta for Hidden Layers
        #print("Delta for Hidden Layers:\n")
        for l in range(2, self.layers):
            z = zs[-l]
            sp = self.sigmoid_prime(z)
            
            #print("\nself.weight[-l+1].transpose():",self.weight[-l+1].transpose().shape)
            #print("\nDelta Shape: ", delta.shape)
            #print("\nsp Shape: ",sp.shape)
            #print("\nZ: ",z.shape)
           
            #print("l: ",l," of ", self.layers)
            #print("Weight ",l," : ",self.weight[-l])
            #print("nabla_b[-l]: ",nabla_b[-l].shape)
            #print("nabla_w[-l]: ", nabla_w[-l].shape)
            
            delta = np.dot(self.weight[-l+1].transpose(), delta) * sp           
            nabla_b[-l] = (1/X.shape[1]) * np.sum(delta, axis=1, keepdims=True)
            nabla_w[-l] = (1/X.shape[1]) * np.dot(delta, activations[-l-1].transpose())
            
            #print("nabla_b[-l] (After Update): ",nabla_b[-l].shape)
            #print("nabla_w[-l] (After Update):", nabla_w[-l].shape)
            
        #print("::::::: End Backprop :::::::\n")
        return (nabla_b, nabla_w)
    
    def update_weights(self,X, y,eta):
        #print("::::::: Update Weights :::::::\n")
        #eta = 0.01
        
        nabla_b = [np.zeros(b.shape) for b in self.bias]
        nabla_w = [np.zeros(w.shape) for w in self.weight]
        
        delta_nabla_b, delta_nabla_w = self.backprop(X, y)
        #print("\nnabla_b shape: ",[nb.shape for nb in nabla_b])
        #print("\nnabla_w shape: ",[nw.shape for nw in nabla_w])
        
        #print("\ndelta_nabla_b shape: ",[nb.shape for nb in delta_nabla_b])
        #print("\ndelta_nabla_w shape: ",[nw.shape for nw in delta_nabla_w])
                
        nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
        nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        
        #print("Bias:\n", [b.shape for b in self.bias], "\nWeights:\n",[w.shape for w in self.weight])
        self.weight = [w-(eta/X.shape[1])*nw for w, nw in zip(self.weight, nabla_w)]   
        #print("\nbias and nabla_b",[(b.shape,nb.shape) for b, nb in zip(self.bias, nabla_b)])       
        self.bias = [b-(eta/X.shape[1])*nb for b, nb in zip(self.bias, nabla_b)]
        #print("Bias After Update:\n ", [b.shape for b in self.bias],"\nWeights After Update:\n",[w.shape for w in self.weight])
        #print("::::::: End Update Weights :::::::\n")
    
    def train(self, X, y, epochs=10, eta=0.01):
        for i in range(epochs):
            #print("Epoch: ",i,"\n")
            self.update_weights(X,y,eta)
            print("Epoch: ",(i+1))
    
    def predict(self, ip):
        return feedforward(ip)
    
    def display(self):
        print("Weights\n")
        for x in self.weight:
            print(x,x.shape,"\n")
        print("Bias\n")
        for y in self.bias:
            print(y, y.shape,"\n")
    def display_2(self):
        for b,w in zip(self.bias, self.weight):
            print("Bias: ",b,b.shape,"\n", "Weight: ", w, w.shape)


In [18]:
#Load Training Data
import pickle
with open("xor_dataset.dat","rb") as f:
    xor_data = pickle.load(f)
training_X = xor_data[0]
training_Y = xor_data[1]
print("Training Data Loaded")
print(training_X.shape)
print(training_Y.shape)

Training Data Loaded
(800, 2)
(800, 1)


In [17]:
#Randomize Data
r = np.random.rand(training_X.shape[0],2)
training_X = training_X + r
print(training_X.shape)

(40, 2)


In [None]:
#Train NET
net = NN([2,2,1])
#net.display()
net.train(training_X.T, training_Y.T, epochs=100, eta = 0.05)

In [42]:
#Pedict
test_data = np.array([[0.0255,0.25]])
#print(test_data.T.shape)
print(net.feedforward(test_data.T))

:::::: Feedforward ::::::
[[ 0.1911246]]


In [None]:
cl = NN([2,3,2,1])
cl.display_2()

In [None]:
for i,j in zip(training_X,training_y):
    print("X: ",i,"\ny:",j)

In [12]:
a = np.array([[1,2,3],[4,5,6]])
print(a)
#print(a + np.array([[5],[5]]))
s = np.sum(a, axis=1, keepdims=True)
print("Sum\n",s.shape)
print(a.shape)

[[1 2 3]
 [4 5 6]]
Sum
 (2, 1)
(2, 3)


In [1]:
import MyNN,numpy as np
nn = MyNN.NN([2,3,1])
print(nn.weight)

[array([[-1.02499797, -2.23708689],
       [-0.51879056,  0.2669871 ],
       [-0.04187466, -0.67949619]]), array([[ 1.47762289,  0.28373874,  1.12427594]])]
