In [9]:
from __future__ import division
import numpy as np

In [10]:
class Tensor(object):
    """docstring for tensor"""
    def __init__(self, initial_value,op,graph):
        self.initial_value = initial_value
        self.graph = graph
        self.op = op

    def __add__(self, other): # change the + operateor's methods, other is another self 
        return self.graph.add(self, other)
    
    def __neg__(self):
        return self.graph.neg(self)
    
    def __sub__(self, other):
        return self.graph.sub(self, other)

    def __mul__(self, other):
        return self.graph.mul(self, other)
    
    def __truediv__(self, other):
        return self.graph.div(self, other)
    
    
    # ## [Reverse Operator Overloading](https://docs.python.org/2/reference/datamodel.html?highlight=__radd__#object.__radd__)
    def __radd__(self, other):
        return self.graph.add(other, self)

    def __rsub__(self, other):
        return self.graph.sub(other, self)

    def __rmul__(self, other):
        return self.graph.mul(other, self)

    def __rtruediv__(self, other):
        return self.graph.div(other, self)

In [13]:
class BaseOp(object):
    """docstring for BaseOp"""
    def __init__(self, inputs,graph):
        self.inputs = [graph.convert(input_) for input_ in inputs]
        self.output = graph.tensor(op=self)
        self.graph  = graph

    def compute(self,sess,*args):
        raise NotImplementedError()
    def gradient(self,grad):
        raise NotImplementedError()
'''
class myParent( object ):
    def __init__( self, customParam ):
        self.parentNumber = 5
        self.customParam = customParam

class Child( myParent ):
    def __init__( self, customParam ):
        myParent.__init__( self, customParam )
        self.childNumber = 4        
'''        
        
class AddOp(BaseOp):

    
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0],inputs[1]),op =self) # build tensor with op=self
        '''
        print("11111111111 is :"+str(inputs[0].initial_value))
        if inputs[0] is not None and inputs[1] is not None:
            self.output = graph.convert(   self.compute(inputs[0],inputs[1])   )
        #self.output = graph.convert(1000000000000000000000000000000)
    '''
    def compute(self,a=0,b=0):
        a_ = graph.convert(a)
        b_ = graph.convert(b)
        return a_.initial_value + b_.initial_value
    def gradient(self,grad):
        return [grad,grad]
    
    
    
class NegOp(BaseOp):
    """
    `NegOp` negates a tensor.
    """
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        self.output = graph.convert(   self.compute(inputs[0])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0]),op =self) # build tensor with op=self
    
    def compute(self,  x):
        x_ = graph.convert(a)
        return -x_.initial_value

    def gradient(self, grad):
        return [-grad]

    
class SubOp(BaseOp):
    """
    `SubOp` subtracts a tensor from another tensor. Also uses the
    [sum rule](https://en.wikipedia.org/wiki/Sum_rule_in_differentiation) to
    compute the partial derivatives.
    """

    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0],inputs[1]),op =self) # build tensor with op=self
    
    
    def compute(self, a, b):
        a_ = graph.convert(a)
        b_ = graph.convert(b)
        return a_.initial_value - b_.initial_value

    def gradient(self, grad):
        return [grad, -grad]
    
    
class MulOp(BaseOp):
    """
    `MulOp` multiplies a tensor by another tensor. Uses the
    [product rule](https://en.wikipedia.org/wiki/Product_rule) to compute the
    partial derivatives.
    """
    
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0],inputs[1]),op =self) # build tensor with op=self
    

    def compute(self, a, b):
        a_ = graph.convert(a)
        b_ = graph.convert(b)
        return a_.initial_value * b_.initial_value

    def gradient(self, grad):
        a, b = self.inputs
        return [grad * b, grad * a]
    
    
class SquareOp(BaseOp):
    """
    `SquareOp` squares a tensor.
    """
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        self.output = graph.convert(   self.compute(inputs[0])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0]),op =self) # build tensor with op=self
    

    def compute(self,  x):
        x_=graph.convert(x)
        return np.square(x_.initial_value)

    def gradient(self, grad):
        x = self.inputs[0].initial_value
        return [grad * (2 * x)]
    
class DivOp(BaseOp):
    """
    `DivOp` divides a tensor by another tensor. Uses the
    [quotient rule](https://en.wikipedia.org/wiki/Quotient_rule) to compute the
    partial derivatives.
    """
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        #self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0],inputs[1]),op =self) # build tensor with op=self
    

    def compute(self, a, b):

        a_ = graph.convert(a)
        b_ = graph.convert(b)
        return a_.initial_value/b_.initial_value
    def gradient(self, grad):
        a_, b_ = self.inputs
        a=a_.initial_value
        b=b_.initial_value
        grad_=grad.initial_value
        print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
        print(type(a))
        print(type(b))
        print(type(grad))
        #grad_=self.graph.tensor(grad)
        return [self.graph.tensor(grad_ / b), self.graph.tensor( grad_ * (-a / self.graph.square(b).initial_value)) ] 

    
class TransposeOp(BaseOp):
    """
    `TransposeOp` tranposes a tensor.
    """

    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        #self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0].initial_value),op =self) # build tensor with op=self

    def compute(self, x):
        x_=graph.convert(x)
        return np.transpose(x_.initial_value)

    def gradient(self, grad):
        return [self.graph.transpose(grad)]    
    
class DotOp(BaseOp):
    """
    `DotOp` computes the dot product between two tensors. Uses the
    [product rule](https://en.wikipedia.org/wiki/Product_rule) to compute the
    partial derivatives. Note that here we need to transpose the terms and
    perform a dot product, assuming matrices rather than scalars.
    """
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        #self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0].initial_value,inputs[1].initial_value),op =self) # build tensor with op=self
    

    def compute(self,  a, b):
        a_=graph.convert(a)
        b_=graph.convert(b)
        return np.dot(a_.initial_value, b_.initial_value)

    def gradient(self, grad):
        a, b = self.inputs
        aT = self.graph.transpose(a)
        bT = self.graph.transpose(b)
        return [
            self.graph.dot(grad, bT),
            self.graph.dot(aT, grad),
        ]
    
class SigmoidOp(BaseOp):
    """
    `SigmoidOp` implements the
    [sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function) and its
    derivative. Notice that the derivative uses the output of the operation
    which saves recomputation.
    """
    
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        #self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0].initial_value),op =self) # build tensor with op=self
    

    def compute(self,  x):
        x_=graph.convert(x)
        return 1 / (1 + np.exp(-x_.initial_value))

    def gradient(self, grad):
        y = self.output_tensor_with_op
        return [grad * (y.initial_value * (1 - y.initial_value))]
    
    
class MeanOp(BaseOp):
    """
    `MeanOp` computes the mean of a tensor. **Note** the gradient here is
    intentially incorrect because computing it requires knowing the shape of
    the input and output tensors. Fortunately, gradients are fairly malleable
    in optimization.
    """
        
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        #self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0].initial_value),op =self) # build tensor with op=self
    

    def compute(self, x):
        x_=graph.convert(x)
        return np.mean(x_.initial_value)

    def gradient(self, grad):
        factor = 1
        return [grad / factor]
    
class AssignOp(BaseOp):
    """
    `AssignOp` updates the session's current state for a tensor. It is not
    differentiable in this implementation.
    """

    def compute(self, sess, a, b):
        sess.state[self.inputs[0]] = b
        return b

In [15]:
class Graph(object):
	"""docstring for Graph"""
	def tensor(self,initial_value=None,op=None):
		if(Tensor==type(initial_value)):
			return initial_value
		else:
			return Tensor(initial_value=initial_value,op=op,graph=self)

	def convert(self,value):
		if isinstance(value,Tensor):
			return value
		return self.tensor(initial_value=value)

	def gradients(self,y,x_s):

		'''
		>>>queue.append(([1,1],1))
		>>>queue.pop(0)
		([1, 1], 1)



    	loss_op = graph.mean(graph.square(graph.transpose(y) - activations1))
    	parameters = [weights0, biases0, weights1, biases1]

    	gradients = graph.gradients(loss_op, parameters)
		'''
		queue = []
		queue.append((y,1)) #queue = [(y,1)]

		grads = {}

		while len(queue)>0:   #len(queue) = 1
			y,grad_y = queue.pop(0)  # y=y,grad_y =1
			grad_y = self.convert(grad_y) #convert grad_y = 1 to a tensor = 1
			#print(y.op)

			gradients = y.op.gradient(grad_y) #gradients = [grad_y = 1,grad_y = 1] grad_y is a tensor
			assert len(gradients) == len(y.op.inputs)#len(gradients) = 2, len(y.op.inputs) = 2, 
            #c = a + b + a + a + b/ y = c /  y.op.inputs is [a + b + a + a ,b]

			for tensor, gradient in zip(y.op.inputs, gradients):
                #zip( [a + b + a + a ,b] , [grad_y = 1,grad_y = 1]  )
				if tensor in grads:  # 1: tensor = a + b + a + a , grads = {} 
                    #2:tensor = b, grads={a + b + a + a:grad_y}
					grads[tensor] += gradient
				else:
					grads[tensor] = gradient # 1 : grads[a + b + a + a] = gradient(=grad_y) 
                    #2:grads[b] = gradient(=grad_y)

				if tensor.op:
					queue.append((tensor, gradient)) #queue=[(a + b + a + a,grad_y),(b,grad_y)]
                    #**********************************************************
                    # for b does not have a op, so tensor.op is None, so queue=[(a+b+a+a,grad_y)] 
                    # not have (b,grad_y)
                    #**********************************************************
                    
                    
            #while len(queue) = 2
            #queue.pop(0) = (a + b + a + a,grad_y) => y = a + b + a + a ,grad_y=grad_y=1(tensor)
            #......
            #while len(queue) = 3
            #queue.pop(0) = (b,grad_y) => y = b, grad_y = grad_y = 1
            

		return [grads[x] for x in x_s]


	def add(self,a,b):
		op = AddOp([a,b],graph=self)
		return op.output_tensor_with_op
    
	def neg(self, x):
		op = NegOp([x], graph=self)
		return op.output_tensor_with_op
    
    
	def sub(self, a, b):
		op = SubOp([a, b], graph=self)
		return op.output_tensor_with_op
    

    
	def mul(self, a, b):
		op = MulOp([a , b], graph=self)
		return op.output_tensor_with_op
    
	def square(self, x):
		op = SquareOp([x], graph=self)
		return op.output_tensor_with_op
    
	def div(self, a, b):
		op = DivOp([a, b], graph=self)
		return op.output_tensor_with_op

	def transpose(self, x):
		op = TransposeOp([x], graph=self)
		return op.output_tensor_with_op
    
	def dot(self, a, b):
		op = DotOp([a, b], graph=self)
		return op.output_tensor_with_op
    
	def sigmoid(self, x):
		op = SigmoidOp([x], graph=self)
		return op.output_tensor_with_op
	def mean(self, x):
		op = MeanOp([x], graph=self)
		return op.output_tensor_with_op
    
	def assign(self, a, b):
		op = AssignOp([a, b], graph=self)
		return op.output

In [17]:
graph = Graph()
a = graph.tensor(np.array([1,1,1,1,6]))
b = graph.tensor(np.array([2,2,2,2,2]))
c = graph.mean(a)
print("==================OP")
print(a.op)
print(b.op)
print(c.op)
print("==================Initial value")
#print("Length is :"+str(len(c.op.inputs)))
#print(c.op.inputs[0].op)
#print(c.op.inputs[1].op)
print(c.op.inputs)
#print(c.op.inputs[1].initial_value)
#print(c.op.output.initial_value)
print(c.op.output_tensor_with_op.initial_value)
print("==================")
print(c.initial_value)
print("==================G")
#grad_a, grad_b = graph.gradients(c, [a, b])
grad_a = graph.gradients(c, [a])
print(grad_a[0].initial_value)
#print(grad_a.initial_value)
#print(grad_b.initial_value)
print("==================")
#print("c.op.inputs[0].op.inputs[0] = a + b + a")
#print(c.op.inputs[0].op.inputs[0].initial_value)
#print(c.op.inputs[0].op.inputs[1].initial_value)
#sess = Session(graph)
#grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 1})
#print(grad_a_)
#print(grad_b_)

None
None
<__main__.MeanOp object at 0x7f9c60648110>
[<__main__.Tensor object at 0x7f9c605da350>]
2.0
2.0
1.0


In [6]:
graph = Graph()
a = graph.tensor(2)
b = graph.tensor(5)
c = a*a*a*a + b
print(c.initial_value)
print("==================")
print(a.op)
print(b.op)
print(c.op)
print("==================")
print("Length is :"+str(len(c.op.inputs)))
print(c.op.inputs[0].op)
print(c.op.inputs[1].op)
print(c.op.inputs[0].initial_value)
print(c.op.inputs[1].initial_value)
print(c.op.output.initial_value)
print(c.op.output_tensor_with_op.initial_value)
print("==================")
print(c.initial_value)
print("==================G")
grad_a, grad_b = graph.gradients(c, [a, b])
print(grad_a.initial_value)
print(grad_b.initial_value)
print("==================")
print("c.op.inputs[0].op.inputs[0] = a + b + a")
print(c.op.inputs[0].op.inputs[0].initial_value)
#print(c.op.inputs[0].op.inputs[1].initial_value)
#sess = Session(graph)
#grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 1})
#print(grad_a_)
#print(grad_b_)

21
None
None
<__main__.AddOp object at 0x7f9c605f1890>
Length is :2
<__main__.MulOp object at 0x7f9c605f17d0>
None
16
5
21
21
21
32
1
c.op.inputs[0].op.inputs[0] = a + b + a
8
