In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Layer
import complexnn

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Using TensorFlow backend.


Tensor: operate in CPU and GPU <br>
ndarray: only in CPU


In [5]:
a = tf.constant([[1,2],[3,4]])
print(a)

tf.Tensor(
[[1 2]
 [3 4]], shape=(2, 2), dtype=int32)


### With different shape

In [15]:
element_list = [
    [0, 1, 2, 3],
    [3, 4, 1],
    [0]
]

try:
    tensor = tf.constant(element_list)
except Exception as e:
    print(f"{type(e).__name__}:{e}")
    
ragged_tensor = tf.ragged.constant(element_list)
print(ragged_tensor)

ValueError:Can't convert non-rectangular Python sequence to Tensor.
<tf.RaggedTensor [[0, 1, 2, 3], [3, 4, 1], [0]]>


### Common operation

In [72]:
maxvalue = tf.reduce_max(a)
minvalue = tf.reduce_min(a)
print(maxvalue,minvalue)
maxvalueind = tf.argmax(a)
# before using softmax, input argument should be in float type, otherwise an error arises with node failed.
print(a)
softvalue = tf.nn.softmax(tf.cast(a,tf.float32))
print(softvalue)
# flatten
print(tf.reshape(a,[-1]))

tf.Tensor(6, shape=(), dtype=int32) tf.Tensor(3, shape=(), dtype=int32)
[[3 4]
 [5 6]]
tf.Tensor(
[[0.26894143 0.7310586 ]
 [0.26894143 0.7310586 ]], shape=(2, 2), dtype=float32)
tf.Tensor([3 4 5 6], shape=(4,), dtype=int32)


### Conversion

In [73]:
# array to tensor
arr = np.array([[1,2],[3,4]])
tensor = tf.constant(arr)
print(tensor)
# tensor to array
arr = np.array(tensor)
print(arr)
# list to tensor
lis = [1,2,3,4]
tensor = tf.constant(lis)
print(tensor)
int_tensor = tf.constant([1,2])
print(tf.cast(int_tensor,tf.float16).dtype)

tf.Tensor(
[[1 2]
 [3 4]], shape=(2, 2), dtype=int32)
[[1 2]
 [3 4]]
tf.Tensor([1 2 3 4], shape=(4,), dtype=int32)
<dtype: 'float16'>


In [104]:
g = tf.constant([[1,2,3,4],
                [4,5,6,7],
                [7,6,1,8]])
# repeats設定的參數中，要遵照axis設定的軸做重複，依照axis方向重複設定次數，而repeats的維度要符合輸入的維度
# 若輸入為3*4，則axis設定為0時 repeats只能是三維，反之axis設定為1時repeats只能四維
tf.repeat(g,repeats=[2,1,1,3],axis=1)

<tf.Tensor: shape=(3, 7), dtype=int32, numpy=
array([[1, 1, 2, 3, 4, 4, 4],
       [4, 4, 5, 6, 7, 7, 7],
       [7, 7, 6, 1, 8, 8, 8]])>

### Automatic differentiation

In [20]:
w = tf.Variable([1.,2.,4.])
b = tf.Variable([3.,3.,3.])
x = tf.Variable([1.,2.,3.])

with tf.GradientTape(persistent=True) as tape:
    y = w*x + b
    loss = tf.reduce_mean(y**2)
print('loss = ',loss,'y = ',y)
# loss = y1^2+y2^2+y3^2 / 3 , dl/dy1 = 2*y1/3 ... and so on
print(tape.gradient(loss,y))
# dl/dx = dl/dy*dy/dx = dl/dy*[1 2 4]
print(tape.gradient(loss,x))

loss =  tf.Tensor(96.666664, shape=(), dtype=float32) y =  tf.Tensor([ 4.  7. 15.], shape=(3,), dtype=float32)
tf.Tensor([ 2.6666667  4.666667  10.       ], shape=(3,), dtype=float32)
tf.Tensor([ 2.6666667  9.333334  40.       ], shape=(3,), dtype=float32)


使用layer時，第一個index為數量，其次為長寬以及Channel數量 <br>
因此若假設x為[[1.],[2.],[3.]]，shape of x is (3,1)，表有三組x，每組x只有一個值 <br>
此時model dense unit = 2 則 y = w*x + b ，w為2*1矩陣，輸入1個x輸出2個值 <br>
反之若x設定為[[1,2,3]]，shape of x is (1,3)，表示x只有一組，而每組x高度有三，因此實際上x是一個column vector，因此w會是3*2的矩陣


In [34]:
layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 2., 3.]])

with tf.GradientTape() as tape:
  # Forward pass
  y = layer(x)
  loss = tf.reduce_mean(y**2)

# Calculate gradients with respect to every trainable variable
grad = tape.gradient(loss, layer.trainable_variables)
# dl/dw = dl/dy*dy/dw = y*x =  X'@Y，dl/db = dl/dy
print(layer.trainable_variables)
print(grad)
print('dl/dw = ',tf.transpose(x)@y)

[<tf.Variable 'dense_7/kernel:0' shape=(3, 2) dtype=float32, numpy=
array([[-0.8785917 ,  0.88383484],
       [-0.5125121 , -0.02549946],
       [ 0.9924493 , -0.0401969 ]], dtype=float32)>, <tf.Variable 'dense_7/bias:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>]
[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[1.0737319, 0.7122453],
       [2.1474638, 1.4244906],
       [3.2211957, 2.136736 ]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1.0737319, 0.7122453], dtype=float32)>]


### watch a tensor

In [44]:
# A trainable variable
x0 = tf.Variable(3.0, name='x0')
# Not trainable
x1 = tf.Variable(3.0, name='x1', trainable=False)
# Not a Variable: A variable + tensor returns a tensor.
x2 = tf.Variable(2.0, name='x2') + 1.0
# Not a variable
x3 = tf.constant(3.0, name='x3')

with tf.GradientTape() as tape:
    y = (x0**2) + (x1**2) + (x2**2)

grad = tape.gradient(y, [x0, x1, x2, x3])

for g in grad:
    print(g)
print([var.name for var in tape.watched_variables()])
# add watch
with tf.GradientTape() as tape:
    tape.watch(x2)
    y = (x0**2) + (x1**2) + (x2**2)
print(tape.gradient(y,x2))

tf.Tensor(6.0, shape=(), dtype=float32)
None
None
None
['x0:0']
tf.Tensor(6.0, shape=(), dtype=float32)
['x0:0']


In [49]:
x = tf.constant([2.,3.])
with tf.GradientTape() as tape:
    tape.watch(x)
    y = x * x
    z = y * y
    r = x * z
print(tape.gradient(r,z).numpy())
# Since GradientTape.gradient can only be called once, we need to set persistent tapes if we want to call it again.
print(tape.gradient(z,x).numpy())

[2. 3.]


RuntimeError: GradientTape.gradient can only be called once on non-persistent tapes.

In [50]:
x = tf.constant([2.,3.])
with tf.GradientTape(persistent = True) as tape:
    tape.watch(x)
    y = x * x
    z = y * y
    r = x * z
print(tape.gradient(r,z).numpy())
print(tape.gradient(y,x).numpy())

[2. 3.]
[4. 6.]


使用tape.gradient需注意變數是否為Variable，若已經變成Tensor，則無法使用，須加上watch

In [88]:
x = tf.Variable(2.0)

for epoch in range(2):
    with tf.GradientTape() as tape:
        y = x*2

    print(type(x).__name__, ":", tape.gradient(y, x))
    x.assign_add(1)
    print(x)

ResourceVariable : tf.Tensor(2.0, shape=(), dtype=float32)
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=3.0>
ResourceVariable : tf.Tensor(2.0, shape=(), dtype=float32)
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=4.0>


需特別注意使用decorator tf.function後， function裡不能使用Variable或創建Variable，但可以讀取function外的Variable

In [408]:
# 二次微分
@tf.function
def seconder(x):
    a = tf.constant(3.)
    b = tf.constant(4.)
    with tf.GradientTape() as tape2:        
        with tf.GradientTape() as tape1:
            y = x**3 + a*x**2 + b
        dy_dx = tape1.gradient(y,x)
    dy2_dx2 = tape2.gradient(dy_dx,x)
    return dy2_dx2

tf.print(seconder(tf.Variable(3.0,dtype=tf.float32)))

24


In [310]:
x = tf.Variable(3,dtype=tf.float32)

@tf.function
def funwithvar():
    a = tf.constant(3.)
    b = tf.constant(4.)
    with tf.GradientTape() as tape2:
        with tf.GradientTape() as tape1:
            y = x**3 + a*x**2 + b
        dy_dx = tape1.gradient(y,x)
    dy2_dx2 = tape2.gradient(dy_dx,x)
    return dy2_dx2

tf.print(funwithvar())

24


In [321]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
x = tf.Variable(3.,dtype=tf.float32)
f = lambda : x**2+2*x+1
for _ in range(1000):
    optimizer.minimize(f,x)
tf.print(x)

-0.999998569


In [317]:
@tf.function
def minimizef():
    for _ in tf.range(1000):
        with tf.GradientTape() as tape:
            y = x**2+2*x+1
        dy_dx = tape.gradient(y,x)
        optimizer.apply_gradients(grads_and_vars=[(dy_dx,x)])
    return x
tf.print(minimizef())

-0.99999851


對於矩陣backpropagation而言，為了維持dA其shape=A，假設有三個矩陣A B C，C=AB，各自為A=(m,n) B=(n,p) C=(m,p)，則計算dC/dA時因為dC已知為(m,p)，dC/dA其shape應=A=(m,n)，值為與B相關，因此dC/dA=Ones@B'，其中為了使寬為n，需將B取transpose，而one的shape則=C

In [117]:
x = tf.Variable([[1.0, 2.0, 3.0],
                 [3.0, 4.0, 5.0],
                 ], dtype=tf.float32)
y = tf.Variable([[3.0,1.0],
                 [1.0,2.0],
                 [3.0,5.0]])
z = tf.Variable([[3.0,1.0,4.0],
                 [1.0,2.0,5.0],
                 ])
with tf.GradientTape() as tape:
    x2 = x@y
    print(x2)
    x3 = x2@z
    print(x3)
    # This step is calculated with NumPy
    # y = np.mean(x2, axis=0)


    # Like most ops, reduce_mean will cast the NumPy array to a constant tensor
    # using `tf.convert_to_tensor`.
    #y = tf.reduce_mean(x2, axis=0)
print(tape.gradient(x3, z))

tf.Tensor(
[[14. 20.]
 [28. 36.]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[ 62.  54. 156.]
 [120. 100. 292.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[42. 42. 42.]
 [56. 56. 56.]], shape=(2, 3), dtype=float32)


In [118]:
# Define a Python function.
def a_regular_function(x, y, b):
  x = tf.matmul(x, y)
  x = x + b
  return x

# `a_function_that_uses_a_graph` is a TensorFlow `Function`.
a_function_that_uses_a_graph = tf.function(a_regular_function)

# Make some tensors.
x1 = tf.constant([[1.0, 2.0]])
y1 = tf.constant([[2.0], [3.0]])
b1 = tf.constant(4.0)

orig_value = a_regular_function(x1, y1, b1).numpy()
# Call a `Function` like a Python function.
tf_function_value = a_function_that_uses_a_graph(x1, y1, b1).numpy()
assert(orig_value == tf_function_value)

In [121]:
@tf.function
def addfun(x):
    return x**2
g = tf.constant(3)
addfun(g)

<tf.Tensor: shape=(), dtype=int32, numpy=9>

In [124]:
class SimpleModel(tf.Module):
    def __init__(self,name=None):
        super().__init__(name=name)
        self.a_variable = tf.Variable(5.,name='train_me')
        self.non_train = tf.Variable(10.,trainable=False,name='donttrain')
    def __call__(self,x):
        return self.a_variable*x+self.non_train
Simple = SimpleModel(name='simple')
Simple(tf.constant(5.))

<tf.Tensor: shape=(), dtype=float32, numpy=35.0>

In [137]:
class Currency:
    rates = {
        'USD':1,
        'NTD':30
    }
    def __init__(self, symbol,amount):
        self.symbol = symbol
        self.amount = amount
    def __repr__(self):
        return f'{self.symbol} ${self.amount}'
    def convert(self, symbol):
        new_amount = self.amount*self.rates[symbol]/self.rates[self.symbol]
        return Currency(symbol, new_amount)
    def __add__(self, other):
        # 定義第一項為基準，將第一項的資訊代入self，第二項以後的代入other，因此other.convert使用的amount是other.amount，
        # 將other.amount丟入convert後其值取代self.amount做運算
        new_amount = self.amount + other.convert(self.symbol).amount
        return Currency(self.symbol, new_amount)
c1 = Currency('USD',10)
c2 = Currency('NTD',600)
c3 = Currency('NTD',900)
c2+c1+c3

NTD $1800.0

In [148]:
class Dense(tf.Module):
    def __init__(self, input_, output_, name=None):
        super().__init__(name=name)
        self.w = tf.Variable(tf.random.normal([input_, output_], name='w'))
        self.b = tf.Variable(tf.zeros([output_]),name='b')
    def __call__(self, x):
        y = tf.matmul(x, self.w) + self.b
        return tf.nn.relu(y)
class SequentialModule(tf.Module):
    def __init__(self, name=None):
        super().__init__(name=name)
        self.dense_1 = Dense(input_=3, output_=3)
        self.dense_2 = Dense(input_=3, output_=2)
    @tf.function
    def __call__(self, x):
        x = self.dense_1(x)
        return self.dense_2(x)
mymodel = SequentialModule(name='the_model')
print('Model results',mymodel(tf.constant([[2.,2.,2.]])))

Model results tf.Tensor([[1.9976461 0.3946366]], shape=(1, 2), dtype=float32)


In [147]:
print(mymodel.variables)

(<tf.Variable 'b:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>, <tf.Variable 'Variable:0' shape=(3, 3) dtype=float32, numpy=
array([[ 1.4399813 ,  0.48512435, -0.663558  ],
       [ 2.2322783 , -1.419927  , -0.17691715],
       [-1.76947   , -0.53400975, -0.9035559 ]], dtype=float32)>, <tf.Variable 'b:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>, <tf.Variable 'Variable:0' shape=(3, 2) dtype=float32, numpy=
array([[-0.2712436 , -0.41378766],
       [-0.6923353 , -0.68764895],
       [ 1.2357608 ,  0.20392382]], dtype=float32)>)


In [166]:
img = np.array([[1.,2.,3.],
                [1.,3.,4.],
                [1.,1.,1.]],dtype=np.float32)
layer1 = tf.keras.layers.Dense(3, activation='relu')
y = layer1(img)
print(y)

tf.Tensor(
[[1.9117403  2.1225517  1.2693424 ]
 [2.793047   2.4954872  1.6266868 ]
 [0.9137399  1.2012966  0.12202454]], shape=(3, 3), dtype=float32)


In [167]:
print(layer1.weights)

[<tf.Variable 'dense_13/kernel:0' shape=(3, 3) dtype=float32, numpy=
array([[ 0.03243327,  0.82836103, -0.23531985],
       [ 0.7646129 , -0.17538404, -0.4326291 ],
       [ 0.11669374,  0.5483196 ,  0.7899735 ]], dtype=float32)>, <tf.Variable 'dense_13/bias:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>]


In [192]:
class Dense(tf.keras.layers.Layer):
    def __init__(self, op, **kwargs):
        super().__init__(**kwargs)
        self.op = op
    def build(self, ip):
        self.w = tf.Variable([[ 0.03243327,  0.82836103, -0.23531985],
       [ 0.7646129 , -0.17538404, -0.4326291 ],
       [ 0.11669374,  0.5483196 ,  0.7899735 ]], name='w')
        self.b = tf.Variable(tf.zeros([self.op], name='b'))
    def call(self, ip):
        return self.relu(tf.matmul(ip, self.w) + self.b)
    def relu(self, x):
        return tf.Variable(np.maximum(x,0))

flxDense = Dense(op=3)
flxDense(img)

<tf.Variable 'dense_17/Variable:0' shape=(3, 3) dtype=float32, numpy=
array([[1.9117403 , 2.1225517 , 1.2693424 ],
       [2.793047  , 2.4954872 , 1.6266868 ],
       [0.9137399 , 1.2012966 , 0.12202454]], dtype=float32)>

### Load Data

In [218]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

### Sequential Model

In [222]:
from tensorflow.keras import layers
model = keras.Sequential(
    [
        layers.Dense(2,activation='relu', name='layer1'),
        layers.Dense(3,activation='relu', name='layer2'),
        layers.Dense(4,name='layer3')
    ]
)
x = tf.ones((3,3))
y = model(x)
print(y)

tf.Tensor(
[[-1.2871853  -0.7042976  -1.6350782  -0.50122947]
 [-1.2871853  -0.7042976  -1.6350782  -0.50122947]
 [-1.2871853  -0.7042976  -1.6350782  -0.50122947]], shape=(3, 4), dtype=float32)


In [None]:
class EquivalentModel(tf.Module):
    def __init__(self, outputsize=outputsize, name='name'):
        super().__init__
        self.outputsize = outputsize
    def build(self, x)

In [326]:
x = tf.zeros((2,3))

In [343]:
tf.random.set_seed(1)
a = tf.random.normal([5,3],mean=1,stddev=0.5)
tf.print(a)

[[0.449389875 1.77287579 1.19182205]
 [0.560171068 0.387663901 0.509439468]
 [1.04390395 0.898369789 0.720921874]
 [0.63972795 0.687003791 0.642487049]
 [0.825822771 0.831765056 1.09128785]]


In [351]:
x = tf.Variable(x)
x[1,:].assign(tf.Variable([1.,2.,4.]))

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[0., 0., 0.],
       [1., 2., 4.]], dtype=float32)>

In [357]:
b = tf.random.uniform([2,3,3,4],minval=-2,maxval=2)
b

<tf.Tensor: shape=(2, 3, 3, 4), dtype=float32, numpy=
array([[[[-1.0541916 , -1.0449066 ,  0.01452494,  1.0941601 ],
         [ 0.44309902,  1.4887133 , -0.9086108 ,  0.4732647 ],
         [-1.1899366 ,  1.4803028 , -0.03037643, -0.01228046]],

        [[-1.8795247 ,  0.7430587 ,  0.526289  , -1.1686454 ],
         [-0.24350691, -1.2441807 ,  1.0536628 , -0.10456944],
         [-1.080358  , -0.7569585 ,  1.3990765 , -0.34365797]],

        [[-1.173347  , -0.22561026,  0.2775097 ,  0.22138214],
         [-0.2731552 , -0.6880598 , -0.80336905, -1.7259188 ],
         [-0.02696133,  1.418993  ,  0.7238035 , -1.2891545 ]]],


       [[[-1.9736538 , -0.8977804 , -1.7181797 , -0.16798544],
         [ 0.32355738, -1.871625  ,  1.4709668 , -1.4523754 ],
         [-0.76088   ,  1.9358897 , -0.44353962, -1.7894301 ]],

        [[-1.1716471 ,  1.1550207 , -1.8895149 , -0.5326934 ],
         [-1.9750752 ,  1.472372  ,  1.5591717 , -1.6778622 ],
         [-1.158916  ,  0.2736063 , -0.20335054, -0.89

In [364]:
tf.print(tf.gather(b,[0],axis=2))
tf.print(' ')
tf.print(tf.gather_nd(b,indices=[(0,0,0,0),(0,0,1,1)]))
mask = tf.boolean_mask(b,b<0)
tf.print(mask)

[[[[-1.05419159 -1.04490662 0.0145249367 1.09416008]]

  [[-1.87952471 0.743058681 0.526289 -1.16864538]]

  [[-1.173347 -0.225610256 0.277509689 0.221382141]]]


 [[[-1.97365379 -0.897780418 -1.7181797 -0.167985439]]

  [[-1.17164707 1.15502071 -1.88951492 -0.532693386]]

  [[-0.701429844 -0.317531109 1.48364401 1.09318829]]]]
 
[-1.05419159 1.48871326]
[-1.05419159 -1.04490662 -0.908610821 ... -0.545397758 -0.813507557 -0.438153744]


In [386]:
t = tf.Variable([[2,3,0.01],
                 [-4,5,-5.4]])
tf.print(t*tf.cast((t>0),tf.float32))
tf.print(" ")
# tf.where(判斷式，若是則此，若否則此)
# e.g. t pass through relu
tf.print(tf.where(t>=0,t,0))
# tf.scatter_nd(欲取代的indice,取代值，建立的矩陣大小)
tf.print(tf.scatter_nd([[1,1],[2,2]],[3,5],(3,3)))
# 抓值
indices = tf.where(t>1)
tf.print(tf.scatter_nd(indices,tf.gather_nd(t,indices),t.shape))


[[2 3 0.01]
 [-0 5 -0]]
 
[[2 3 0.01]
 [0 5 0]]
[[0 0 0]
 [0 3 0]
 [0 0 5]]
[[2 3 0]
 [0 5 0]]


In [390]:
a = tf.random.uniform(shape=[2,3,1,2,1],
                      minval=0,maxval=255,dtype=tf.int32)
tf.print(a.shape)
tf.print(a)
s = tf.squeeze(a,[2,4])
tf.print(s.shape)
r = tf.expand_dims(s,axis=3)
tf.print(r.shape)

TensorShape([2, 3, 1, 2, 1])
[[[[[17]
    [172]]]


  [[[236]
    [89]]]


  [[[31]
    [189]]]]



 [[[[40]
    [123]]]


  [[[220]
    [52]]]


  [[[197]
    [72]]]]]
TensorShape([2, 3, 2])
TensorShape([2, 3, 2, 1])


In [398]:
a = tf.constant([[1.0,2.0],[3.0,4.0]])
b = tf.constant([[5.0,6.0],[7.0,8.0]])
c = tf.constant([[9.0,10.0],[11.0,12.0]])
# don't increase dimension
tf.print(tf.concat([a,b,c],axis = 1))
# increase dimension 
d = tf.stack([a,b,c],axis=0)
tf.print(d)
tf.print(d.shape)
tf.print(tf.split(d,3,axis=0))

[[1 2 5 6 9 10]
 [3 4 7 8 11 12]]
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]

 [[9 10]
  [11 12]]]
TensorShape([3, 2, 2])
[[[[1 2]
  [3 4]]], [[[5 6]
  [7 8]]], [[[9 10]
  [11 12]]]]


In [404]:
c = tf.constant([1,2,3])
d = tf.constant([[1],[2],[3]])
# 個別計算c(1,3)和d(3,1)，取最大shape (3,3)
tf.broadcast_dynamic_shape(tf.shape(c),tf.shape(d))

<tf.Tensor: shape=(), dtype=float32, numpy=4.0>

In [405]:
# Matrix norm (預設計算|A|2 即歐式距離)
a = tf.constant([[1.0,2],[3,4]])
tf.linalg.norm(a)

5.477225575051661

In [411]:
@tf.function
def np_random():
    tf.print(np.random.randn(3,3))
np_random()
np_random()

array([[-0.5228282 ,  0.95754106,  0.67781835],
       [ 0.53549513, -0.97257476, -1.67468178],
       [ 1.00576579,  1.69614907,  1.19309891]])
array([[-0.5228282 ,  0.95754106,  0.67781835],
       [ 0.53549513, -0.97257476, -1.67468178],
       [ 1.00576579,  1.69614907,  1.19309891]])


In [413]:
aconstant = tf.constant(3)
tf.print(id(aconstant))

2249216766696


In [414]:
aconstant = aconstant + tf.constant(3)
tf.print(id(aconstant))

2250453860928


In [415]:
avar = tf.Variable(3)
tf.print(id(avar))

2248587534232


In [429]:
@tf.function
def myadd(a, b):
    print(a,b)
    for i in range(3):
        c = a+b
        tf.print(c)
    print('end')
    return c
print(myadd(3.5,4.5))

3.5 4.5
end
8.0
8.0
8.0
tf.Tensor(8.0, shape=(), dtype=float32)


In [16]:
x = tf.Variable(0.,name='x',dtype=tf.float32)
opt = tf.keras.optimizers.SGD(learning_rate=.01)

@tf.function
def minimizef():
    a = tf.constant(1.)
    b = tf.constant(-2.)
    c = tf.constant(1.)
    
    while tf.constant(True):
        with tf.GradientTape() as tape:
            y = a*x**2 + b*x + c
        dy_dx = tape.gradient(y,x)
        opt.apply_gradients(grads_and_vars=[(dy_dx,x)])
        if tf.abs(dy_dx)<tf.constant(1e-4) or opt.iterations > 10000:
            break
        if opt.iterations%100 == 0:
            tf.print(opt.iterations)
    y = a*x**x+b*x+c
    return y
tf.print(minimizef())

100
200
300
400
4.82201576e-05


SyntaxError: invalid syntax (<ipython-input-42-87c731a7a0b7>, line 1)

In [18]:
class Mymodel(Layer):
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.output_dim = output_dim
    def build(self, input_shape):
        self.w = self.add_weight(name='w',
                                 shape=(input_shape[-1],self.output_dim[1]))
        self.b = tf.zeros(self.output_dim)
    def call(self, input_data):
        return tf.nn.relu(input_data@self.w + self.b)
@tf.function
def Myloss(y_true,y_pred):
    dy = tf.abs(y_true-y_pred)**2
    return tf.reduce_mean(dy)

x_train = np.random.rand(1,4,4)
y_train = np.random.rand(1,4,4)

model = Sequential()
model.add(Mymodel((4,4),input_shape=(4,4)))
model.compile(loss=Myloss,optimizer='sgd')
model.fit(x_train,y_train)

Train on 1 samples


<tensorflow.python.keras.callbacks.History at 0x18770572a20>

In [58]:
class SGOptimizer(tf.keras.optimizers.Optimizer):
    def __init__(self, learning_rate=0.01, name='SGOptimizer',**kwargs):
        super().__init__(name, **kwargs)
        # kwargs.get('key',default_value)
        self._set_hyper('learning_rate',kwargs.get('lr',learning_rate))
        self._is_first = True
    def _create_slots(self, var_list):
        for var in var_list:
            # add_slot(self,var,slot_name,initializer='zeros')
            self.add_slot(var,'pv')
        for var in var_list:
            self.add_slot(var,'pg')
    @tf.function
    def _resource_apply_dense(self, grad, var):
        var_dytpe = var.dtype.base_dtype
        # _decayed_lr is a function where input is var_dtype and return a "Tensor" lr_t, where t denotes as tensor
        lr_t = self._decayed_lr(var_dtype)
        
        new_var_m = var - grad*lr_t
        
        pv_var = self.get_slot(var,'pv')
        pg_var = self.get_slot(var,'pg')
        
        if self._is_first:
            self._is_first = False
            new_var = new_var_m
        else:
            cond = grad*pg_var >= 0
            avg_weights = (pv_var+var)/2.
            new_var = tf.where(cond,new_var_m,avg_weights)
        pv_var.assign(var)
        pg_var.assign(grad)
        var.assign(new_var)
    @tf.function
    def get_config(self):
        # baseconfig includes 'name','lr','decay','rho','momentum','epsilon','centered'.
        # which are originally defined in the class optimizer
        baseconfig = super().get_config()
        return{
            **base_config,
            'learning_rate': self._serialize_hyperparameter('learning_rate')
        }

In [20]:
def printvar(var):
    for v in var:
        print(v)
    for v in var:
        print(v)
printvar(var=['v','g'])

v
g
v
g


In [62]:
opt = tf.keras.optimizers.RMSprop()
m = Sequential()
m.add(tf.keras.layers.Dense(10))
m.compile(opt,loss=Myloss)
data = np.arange(10).reshape(5,2)
labels = np.zeros(5)
m.fit(data,labels)


ValueError: ('Could not interpret optimizer identifier:', <class '__main__.SGOptimizer'>)

In [24]:
opt.get_weights()

[1, array([[4.3673746e-02, 1.1080883e-01, 6.9182432e-01, 1.5476604e+00,
         3.3498514e+00, 3.6615089e-01, 6.4642442e-04, 2.9787531e-01,
         3.8857648e-01, 1.8999034e+00],
        [6.1185982e-02, 1.5152156e-01, 9.4002748e-01, 2.1276853e+00,
         4.6072388e+00, 5.0921941e-01, 7.1580644e-04, 4.1476360e-01,
         5.2664685e-01, 2.6182969e+00]], dtype=float32), array([1.4726669e-03, 3.1784929e-03, 1.8986195e-02, 4.6056870e-02,
        9.9974766e-02, 1.1770521e-02, 1.7680625e-06, 9.6514719e-03,
        1.0474568e-02, 5.7477832e-02], dtype=float32)]

In [27]:
m.variables

[<tf.Variable 'sequential_2/dense/kernel:0' shape=(2, 10) dtype=float32, numpy=
 array([[ 0.0616572 , -0.16643564,  0.5910838 ,  0.41427416,  0.5858908 ,
         -0.0273868 , -0.17450476,  0.04639716,  0.4931684 ,  0.35057634],
        [-0.16498713, -0.03944265, -0.04282768,  0.34154063,  0.525469  ,
          0.35929924,  0.14109105, -0.34209377, -0.07658245,  0.47198814]],
       dtype=float32)>,
 <tf.Variable 'sequential_2/dense/bias:0' shape=(10,) dtype=float32, numpy=
 array([ 0.00316227,  0.00316227, -0.00316228, -0.00316228, -0.00316228,
        -0.00316227,  0.00316204,  0.00316227, -0.00316227, -0.00316228],
       dtype=float32)>]

In [32]:
opt.get_config()

{'name': 'RMSprop',
 'learning_rate': 0.001,
 'decay': 0.0,
 'rho': 0.9,
 'momentum': 0.0,
 'epsilon': 1e-07,
 'centered': False}

In [66]:
a = np.random.rand(3,3,1,4)
b = np.random.rand(3,3,1,4)
print(a)
print('>>')
print(b)
print('>>')
print(np.concatenate((a,b),axis=-2))

[[[[0.62896522 0.10233382 0.21579907 0.49109089]]

  [[0.56970028 0.39445567 0.98970164 0.7154699 ]]

  [[0.57889397 0.04475063 0.46639079 0.72181403]]]


 [[[0.01882047 0.04122241 0.39143425 0.94361904]]

  [[0.699424   0.33666165 0.17879195 0.87850437]]

  [[0.05198682 0.5983092  0.67545425 0.96231523]]]


 [[[0.25224298 0.55681762 0.2365431  0.07239631]]

  [[0.67630574 0.2398951  0.96587005 0.51587215]]

  [[0.51421852 0.3140129  0.7676805  0.80559188]]]]
>>
[[[[0.72505321 0.87619404 0.143162   0.36684588]]

  [[0.58186199 0.35405305 0.3448931  0.88005846]]

  [[0.35227904 0.8342426  0.53015419 0.08076486]]]


 [[[0.10828163 0.57195127 0.64915419 0.88776511]]

  [[0.88123044 0.97559761 0.33725025 0.5872483 ]]

  [[0.01035687 0.73842668 0.29159498 0.17183999]]]


 [[[0.84484815 0.69937252 0.01542592 0.15988129]]

  [[0.71086461 0.78800423 0.04143069 0.34913379]]

  [[0.17628222 0.26037583 0.11534248 0.71545696]]]]
>>
[[[[0.62896522 0.10233382 0.21579907 0.49109089]
   [0.72505321 0.

In [69]:
2//2

1

In [73]:
import complexnn

ModuleNotFoundError: No module named 'complexnn'