In [2]:
from __future__ import division
import numpy as np

In [3]:
class Tensor(object):
    """docstring for tensor"""
    def __init__(self, initial_value,op,graph):
        self.initial_value = initial_value
        self.graph = graph
        self.op = op

    def __add__(self, other): # change the + operateor's methods, other is another self 
        return self.graph.add(self, other)
    
    def __neg__(self):
        return self.graph.neg(self)
    
    def __sub__(self, other):
        return self.graph.sub(self, other)

    def __mul__(self, other):
        return self.graph.mul(self, other)
    
    def __truediv__(self, other):
        return self.graph.div(self, other)
    
    # ## [Reverse Operator Overloading](https://docs.python.org/2/reference/datamodel.html?highlight=__radd__#object.__radd__)
    def __radd__(self, other):
        return self.graph.add(other, self)

    def __rsub__(self, other):
        return self.graph.sub(other, self)

    def __rmul__(self, other):
        return self.graph.mul(other, self)

    def __rtruediv__(self, other):
        return self.graph.div(other, self)

In [15]:
class BaseOp(object):
    """docstring for BaseOp"""
    def __init__(self, inputs,graph):
        self.inputs = [graph.convert(input_) for input_ in inputs]
        self.output = graph.tensor(op=self)
        self.graph  = graph

    def compute(self,sess,*args):
        raise NotImplementedError()
    def gradient(self,grad):
        raise NotImplementedError()
'''
class myParent( object ):
    def __init__( self, customParam ):
        self.parentNumber = 5
        self.customParam = customParam

class Child( myParent ):
    def __init__( self, customParam ):
        myParent.__init__( self, customParam )
        self.childNumber = 4        
'''        
        
class AddOp(BaseOp):

    
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0],inputs[1]),op =self) # build tensor with op=self
        '''
        print("11111111111 is :"+str(inputs[0].initial_value))
        if inputs[0] is not None and inputs[1] is not None:
            self.output = graph.convert(   self.compute(inputs[0],inputs[1])   )
        #self.output = graph.convert(1000000000000000000000000000000)
    '''
    def compute(self,a=0,b=0):
        #a = self.inputs[0].initial_value
        #b = self.inputs[1].initial_value
        return a.initial_value + b.initial_value
    def gradient(self,grad):
        return [grad,grad]
    
    
    
class NegOp(BaseOp):
    """
    `NegOp` negates a tensor.
    """
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        self.output = graph.convert(   self.compute(inputs[0])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0]),op =self) # build tensor with op=self
    
    def compute(self,  x):
        return -x.initial_value

    def gradient(self, grad):
        return [-grad]

    
class SubOp(BaseOp):
    """
    `SubOp` subtracts a tensor from another tensor. Also uses the
    [sum rule](https://en.wikipedia.org/wiki/Sum_rule_in_differentiation) to
    compute the partial derivatives.
    """

    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0],inputs[1]),op =self) # build tensor with op=self
    
    
    def compute(self, a, b):
        return a.initial_value - b.initial_value

    def gradient(self, grad):
        return [grad, -grad]
    
    
class MulOp(BaseOp):
    """
    `MulOp` multiplies a tensor by another tensor. Uses the
    [product rule](https://en.wikipedia.org/wiki/Product_rule) to compute the
    partial derivatives.
    """
    
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0],inputs[1]),op =self) # build tensor with op=self
    

    def compute(self, a, b):
        return a.initial_value * b.initial_value

    def gradient(self, grad):
        a, b = self.inputs
        return [grad * b, grad * a]
    
    
class SquareOp(BaseOp):
    """
    `SquareOp` squares a tensor.
    """
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        self.output = graph.convert(   self.compute(inputs[0])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0]),op =self) # build tensor with op=self
    

    def compute(self,  x):
        return np.square(x)

    def gradient(self, grad):
        x = self.inputs[0].initial_value
        return [grad * (2 * x)]
    
class DivOp(BaseOp):
    """
    `DivOp` divides a tensor by another tensor. Uses the
    [quotient rule](https://en.wikipedia.org/wiki/Quotient_rule) to compute the
    partial derivatives.
    """
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        #self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0],inputs[1]),op =self) # build tensor with op=self
    

    def compute(self, a, b):

        if type(a) is int:
            print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
            print(type(a))
            temp = a/b
        else:
            print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            print(type(a))
            print(type(b))
            print(a.initial_value)
            print(b.initial_value)
            temp = a.initial_value/b.initial_value
        return temp
    def gradient(self, grad):
        a_, b_ = self.inputs
        a=a_.initial_value
        b=b_.initial_value
        grad_=grad.initial_value
        print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
        print(type(a))
        print(type(b))
        print(type(grad))
        #grad_=self.graph.tensor(grad)
        return [self.graph.tensor(grad_ / b), self.graph.tensor( grad_ * (-a / self.graph.square(b).initial_value)) ] 

    
    

    
class TransposeOp(BaseOp):
    """
    `TransposeOp` tranposes a tensor.
    """
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        #self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0]),op =self) # build tensor with op=self
    

    def compute(self, x):
        return np.transpose(x)

    def gradient(self, grad):
        return [self.graph.transpose(grad)]
    
    
    
    
    
class DotOp(BaseOp):
    """
    `DotOp` computes the dot product between two tensors. Uses the
    [product rule](https://en.wikipedia.org/wiki/Product_rule) to compute the
    partial derivatives. Note that here we need to transpose the terms and
    perform a dot product, assuming matrices rather than scalars.
    """
    def __init__(self, inputs,graph):
        BaseOp.__init__(self, inputs,graph)
        #self.output = graph.convert(   self.compute(inputs[0],inputs[1])   ) #build a tensor object but no op
        self.output_tensor_with_op = graph.tensor(   self.compute(inputs[0],inputs[1]),op =self) # build tensor with op=self
    

    def compute(self, a, b):
        return np.dot(a, b)

    def gradient(self, grad):
        a, b = self.inputs
        aT = self.graph.transpose(a)
        bT = self.graph.transpose(b)
        return [
            self.graph.dot(grad, bT),
            self.graph.dot(aT, grad),
        ]

In [5]:
class Graph(object):
	"""docstring for Graph"""
	def tensor(self,initial_value=None,op=None):
		return Tensor(initial_value=initial_value,op=op,graph=self)

	def convert(self,value):
		if isinstance(value,Tensor):
			return value
		return self.tensor(initial_value=value)

	def gradients(self,y,x_s):

		'''
		>>>queue.append(([1,1],1))
		>>>queue.pop(0)
		([1, 1], 1)



    	loss_op = graph.mean(graph.square(graph.transpose(y) - activations1))
    	parameters = [weights0, biases0, weights1, biases1]

    	gradients = graph.gradients(loss_op, parameters)
		'''
		queue = []
		queue.append((y,1)) #queue = [(y,1)]

		grads = {}

		while len(queue)>0:   #len(queue) = 1
			y,grad_y = queue.pop(0)  # y=y,grad_y =1
			grad_y = self.convert(grad_y) #convert grad_y = 1 to a tensor = 1
			#print(y.op)

			gradients = y.op.gradient(grad_y) #gradients = [grad_y = 1,grad_y = 1] grad_y is a tensor
			assert len(gradients) == len(y.op.inputs)#len(gradients) = 2, len(y.op.inputs) = 2, 
            #c = a + b + a + a + b/ y = c /  y.op.inputs is [a + b + a + a ,b]

			for tensor, gradient in zip(y.op.inputs, gradients):
                #zip( [a + b + a + a ,b] , [grad_y = 1,grad_y = 1]  )
				if tensor in grads:  # 1: tensor = a + b + a + a , grads = {} 
                    #2:tensor = b, grads={a + b + a + a:grad_y}
					grads[tensor] += gradient
					print("+=")
					print(tensor.op)
					print(tensor.initial_value)
				else:
					grads[tensor] = gradient # 1 : grads[a + b + a + a] = gradient(=grad_y) 
                    #2:grads[b] = gradient(=grad_y)
					print("first 111111111111111111111")
					print(tensor.op)
					print(tensor.initial_value)

				if tensor.op:
					queue.append((tensor, gradient)) #queue=[(a + b + a + a,grad_y),(b,grad_y)]
                    #**********************************************************
                    # for b does not have a op, so tensor.op is None, so queue=[(a+b+a+a,grad_y)] 
                    # not have (b,grad_y)
                    #**********************************************************
                    
                    
            #while len(queue) = 2
            #queue.pop(0) = (a + b + a + a,grad_y) => y = a + b + a + a ,grad_y=grad_y=1(tensor)
            #......
            #while len(queue) = 3
            #queue.pop(0) = (b,grad_y) => y = b, grad_y = grad_y = 1
            

		return [grads[x] for x in x_s]


	def add(self,a,b):
		op = AddOp([a,b],graph=self)
		return op.output_tensor_with_op
    
	def neg(self, x):
		op = NegOp([x], graph=self)
		return op.output_tensor_with_op
    
    
	def sub(self, a, b):
		op = SubOp([a, b], graph=self)
		return op.output_tensor_with_op
    

    
	def mul(self, a, b):
		op = MulOp([a , b], graph=self)
		return op.output_tensor_with_op
    
	def square(self, x):
		op = SquareOp([x], graph=self)
		return op.output_tensor_with_op
    
	def div(self, a, b):
		op = DivOp([a, b], graph=self)
		return op.output_tensor_with_op
    
    
	def transpose(self, x):
		op = TransposeOp([x], graph=self)
		return op.output_tensor_with_op
    
	def dot(self, a, b):
		op = DotOp([a, b], graph=self)
		return op.output_tensor_with_op

In [6]:
class Session(object):
    def __init__(self, graph):
        self.graph = graph
        self.state = {}
        
    def run_op(self, op, context):
        args = [self.eval_tensor(tensor, context) for tensor in op.inputs]
        return op.compute(self, *args)
    
    def eval_tensor(self, tensor, context):
        
        if tensor not in context:
            if tensor.op is not None:
                context[tensor] = self.run_op(tensor.op, context) #let context add the value in op.input
            elif tensor in self.state and self.state[tensor] is not None:
                context[tensor] = self.state[tensor]              #let context add the value in self.state
            elif tensor not in self.state and tensor.initial_value is not None:
                context[tensor] = self.state[tensor] = tensor.initial_value
                                                        #let context add the value by the initial_value of tensor
        return context[tensor]
    def run(self, tensors, feed_dict=None):
        
        context = {}

        if feed_dict:
            context.update(feed_dict)

        return [self.eval_tensor(tensor, context) for tensor in tensors]

In [7]:
graph = Graph()
a = graph.tensor(2)
b = graph.tensor(5)
c = a*b+a
print(c.initial_value)
print("==================")
print(a.op)
print(b.op)
print(c.op)
print("==================")
print("Length is :"+str(len(c.op.inputs)))
print(c.op.inputs[0].op)
print(c.op.inputs[1].op)
print(c.op.inputs[0].initial_value)
print(c.op.inputs[1].initial_value)
print(c.op.output.initial_value)
print(c.op.output_tensor_with_op.initial_value)
print("==================")
print(c.initial_value)
print("==================G")
grad_a, grad_b = graph.gradients(c, [a, b])
print(grad_a.initial_value)
print(grad_b.initial_value)
print("==================")
print("c.op.inputs[0].op.inputs[0] = a + b + a")
print(c.op.inputs[0].op.inputs[0].initial_value)
#print(c.op.inputs[0].op.inputs[1].initial_value)
#sess = Session(graph)
#grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 1})
#print(grad_a_)
#print(grad_b_)

12
None
None
<__main__.AddOp object at 0x7f0f40520610>
Length is :2
<__main__.MulOp object at 0x7f0f40520510>
None
10
2
12
12
12
first 111111111111111111111
<__main__.MulOp object at 0x7f0f40520510>
10
first 111111111111111111111
None
2
+=
None
2
first 111111111111111111111
None
5
6
2
c.op.inputs[0].op.inputs[0] = a + b + a
2


In [8]:
graph = Graph()
a = graph.tensor(2)
b = graph.tensor(5)
c = a*(a + a )
print(c.initial_value)
print("==================")
print(a.op)
print(b.op)
print(c.op)
print("==================")
print("Length is :"+str(len(c.op.inputs)))
print(c.op.inputs[0].op)
print(c.op.inputs[0].initial_value)
print(c.op.output.initial_value)
print(c.op.output_tensor_with_op.initial_value)
print("==================")
print(c.initial_value)
print("==================G")
grad_a= graph.gradients(c, [a])
print(grad_a[0].initial_value)
print("==================")
print("c.op.inputs[0].op.inputs[0] = a + b + a")
#print(c.op.inputs[0].op.inputs[0].initial_value)
#print(c.op.inputs[0].op.inputs[1].initial_value)
#sess = Session(graph)
#grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 1})
#print(grad_a_)
#print(grad_b_)

8
None
None
<__main__.MulOp object at 0x7f0f5c4d3790>
Length is :2
None
2
8
8
8
first 111111111111111111111
None
2
first 111111111111111111111
<__main__.AddOp object at 0x7f0f40511e10>
4
+=
None
2
+=
None
2
8
c.op.inputs[0].op.inputs[0] = a + b + a


In [10]:
graph = Graph()
a = graph.tensor(2)
b = graph.tensor(5)
c = a*b + a -b
print(c.initial_value)
print("==================")
print(a.op)
print(b.op)
print(c.op)
print("==================")
#print("Length is :"+str(len(c.op.inputs)))
#print(c.op.inputs[0].op)
#print(c.op.inputs[1].op)
print(c.op.inputs[0].initial_value)
print(c.op.inputs[1].initial_value)
print(c.op.output.initial_value)
print(c.op.output_tensor_with_op.initial_value)
print("==================")
print(c.initial_value)
print("==================G")
grad_a, grad_b = graph.gradients(c, [a, b])
print(grad_a.initial_value)
print(grad_b.initial_value)
print("==================")
print("c.op.inputs[0].op.inputs[0] = a + b + a")
#print(c.op.inputs[0].op.inputs[0].initial_value)
#print(c.op.inputs[0].op.inputs[1].initial_value)
#sess = Session(graph)
#grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 1})
#print(grad_a_)
#print(grad_b_)

7
None
None
<__main__.SubOp object at 0x7f0f4052d090>
12
5
7
7
7
first 111111111111111111111
<__main__.AddOp object at 0x7f0f405203d0>
12
first 111111111111111111111
None
5
first 111111111111111111111
<__main__.MulOp object at 0x7f0f405202d0>
10
first 111111111111111111111
None
2
+=
None
2
+=
None
5
6
1
c.op.inputs[0].op.inputs[0] = a + b + a


In [11]:
a= 3

if type(a) is int:
    print(1)
else:
    print(2)

1


In [12]:
x=4
np.square(x)

16

In [13]:
graph = Graph()
a = graph.tensor(2)
b = graph.tensor(5)
print(graph.square(a).initial_value.initial_value)

4


In [16]:
graph = Graph()
a = graph.tensor(np.array([0,1,2,3]).reshape(1,-1))
b = graph.tensor(np.array([0,1,2,3]).reshape(-1,1))
c = graph.dot(a,b)
print(c.initial_value)
print("==================")
print(a.op)
print(b.op)
print(c.op)
print("==================")
#print("Length is :"+str(len(c.op.inputs)))
#print(c.op.inputs[0].op)
#print(c.op.inputs[1].op)
print(c.op.inputs[0].initial_value)
print(c.op.inputs[1].initial_value)
print(c.op.output.initial_value)
print(c.op.output_tensor_with_op.initial_value)
print("==================")
print(c.initial_value)
print("==================G")
grad_a, grad_b = graph.gradients(c, [a, b])
print(grad_a.initial_value)
print(grad_b.initial_value)
print("==================")
print("c.op.inputs[0].op.inputs[0] = a + b + a")
#print(c.op.inputs[0].op.inputs[0].initial_value)
#print(c.op.inputs[0].op.inputs[1].initial_value)
#sess = Session(graph)
#grad_a_, grad_b_ = sess.run([grad_a, grad_b], feed_dict={a: 2, b: 1})
#print(grad_a_)
#print(grad_b_)

<__main__.Tensor object at 0x7f0f5c2f4b50>
None
None
<__main__.DotOp object at 0x7f0f5c2f4a10>
[[0 1 2 3]]
[[0]
 [1]
 [2]
 [3]]
None
<__main__.Tensor object at 0x7f0f5c2f4b50>
<__main__.Tensor object at 0x7f0f5c2f4b50>


AttributeError: 'int' object has no attribute 'initial_value'