## Getting Started 

In [36]:
import torch
import tensorflow as tf


def pr(*sth):
    print(sth)

In [3]:
x = torch.arange(12)
pr(x)
pr(x.numel())  # total number of elements in a tensor
pr(x.shape)
X = x.reshape(3, 4)
pr(X)
X = x.reshape(3, -1)
pr(X)

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
12
torch.Size([12])
tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])


In [4]:
x = tf.range(12)
pr(x)
pr(tf.size(x))
pr(x.shape)
X = tf.reshape(x, (3, 4))
pr(X)
X = tf.reshape(x, (-1, 4))
pr(X)

tf.Tensor([ 0  1  2  3  4  5  6  7  8  9 10 11], shape=(12,), dtype=int32)
tf.Tensor(12, shape=(), dtype=int32)
(12,)
tf.Tensor(
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]], shape=(3, 4), dtype=int32)
tf.Tensor(
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]], shape=(3, 4), dtype=int32)


### Initialize with some constant

In [5]:
torch.zeros((2, 3, 4))

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [6]:
torch.ones((2, 3))

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [7]:
torch.randn((3, 4))

tensor([[ 0.5995,  0.6586, -1.4352,  0.4288],
        [ 2.0775,  0.3837, -1.0126,  1.2563],
        [-0.5817,  0.4726,  1.8769, -0.6230]])

In [8]:
torch.tensor([[1, 2], [3, 4]])

tensor([[1, 2],
        [3, 4]])

In [9]:
tf.zeros((3, 4))

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]], dtype=float32)>

In [10]:
tf.ones((3, 2))

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[1., 1.],
       [1., 1.],
       [1., 1.]], dtype=float32)>

In [11]:
tf.zeros(2, 2).shape  # warning! always with parenthesis

TensorShape([2])

In [12]:
tf.zeros((2, 2)).shape

TensorShape([2, 2])

In [13]:
tf.random.normal((2, 2))

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 0.26843318, -1.2747962 ],
       [ 0.13900137,  0.66048676]], dtype=float32)>

In [14]:
tf.constant([[1, 2], [2, 3]])

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[1, 2],
       [2, 3]])>

## Operations

- element-wise operations ($+,-,*,/,**, exp$)
- concatenation along rows and columns 
- logical statements ($==,!=$)
- sum

In [15]:
x = torch.tensor([1, 2, 3, 4])
y = torch.tensor([2, 3, 4, 5])
x + y, x - y, x * y, x**y

(tensor([3, 5, 7, 9]),
 tensor([-1, -1, -1, -1]),
 tensor([ 2,  6, 12, 20]),
 tensor([   1,    8,   81, 1024]))

In [16]:
x = tf.constant([1, 2, 3])
y = tf.constant([2, 3, 4])
x + y, x - y, x * y, x**y

(<tf.Tensor: shape=(3,), dtype=int32, numpy=array([3, 5, 7])>,
 <tf.Tensor: shape=(3,), dtype=int32, numpy=array([-1, -1, -1])>,
 <tf.Tensor: shape=(3,), dtype=int32, numpy=array([ 2,  6, 12])>,
 <tf.Tensor: shape=(3,), dtype=int32, numpy=array([ 1,  8, 81])>)

In [21]:
x = torch.tensor([1, 2, 3],
                 dtype=torch.float)  # torch.exp not implemented for long
torch.exp(x)

tensor([ 2.7183,  7.3891, 20.0855])

In [23]:
x = tf.constant([1., 2, 3])
tf.exp(x)

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 2.7182817,  7.389056 , 20.085537 ], dtype=float32)>

In [25]:
X = torch.arange(12, dtype=torch.float32).reshape((3, -1))
Y = torch.randn((3, 4))
torch.cat((X, Y)), torch.cat((X, Y), dim=0), torch.cat(
    (X, Y), dim=1)  # dim=0 along the rows, dim=1 alon the columns

(tensor([[ 0.0000,  1.0000,  2.0000,  3.0000],
         [ 4.0000,  5.0000,  6.0000,  7.0000],
         [ 8.0000,  9.0000, 10.0000, 11.0000],
         [-0.4289,  0.1921, -1.5583,  0.7700],
         [-0.3428,  0.0395, -0.1833,  0.1800],
         [-2.7038,  0.5620,  0.6307, -1.1414]]),
 tensor([[ 0.0000,  1.0000,  2.0000,  3.0000],
         [ 4.0000,  5.0000,  6.0000,  7.0000],
         [ 8.0000,  9.0000, 10.0000, 11.0000],
         [-0.4289,  0.1921, -1.5583,  0.7700],
         [-0.3428,  0.0395, -0.1833,  0.1800],
         [-2.7038,  0.5620,  0.6307, -1.1414]]),
 tensor([[ 0.0000,  1.0000,  2.0000,  3.0000, -0.4289,  0.1921, -1.5583,  0.7700],
         [ 4.0000,  5.0000,  6.0000,  7.0000, -0.3428,  0.0395, -0.1833,  0.1800],
         [ 8.0000,  9.0000, 10.0000, 11.0000, -2.7038,  0.5620,  0.6307, -1.1414]]))

In [29]:
X = tf.reshape(tf.range(12, dtype=tf.float32), (3, -1))
Y = tf.random.normal((3, 4))
# tf.concat([X,Y]) # need axis
tf.concat([X, Y], axis=0), tf.concat([X, Y], axis=1)

(<tf.Tensor: shape=(6, 4), dtype=float32, numpy=
 array([[ 0.        ,  1.        ,  2.        ,  3.        ],
        [ 4.        ,  5.        ,  6.        ,  7.        ],
        [ 8.        ,  9.        , 10.        , 11.        ],
        [ 0.32152602, -1.641733  , -1.1712486 , -1.2893225 ],
        [ 0.96593076,  1.6905729 , -0.2956201 ,  0.31225562],
        [ 1.1656165 , -0.531092  , -0.5489811 ,  0.9708494 ]],
       dtype=float32)>, <tf.Tensor: shape=(3, 8), dtype=float32, numpy=
 array([[ 0.        ,  1.        ,  2.        ,  3.        ,  0.32152602,
         -1.641733  , -1.1712486 , -1.2893225 ],
        [ 4.        ,  5.        ,  6.        ,  7.        ,  0.96593076,
          1.6905729 , -0.2956201 ,  0.31225562],
        [ 8.        ,  9.        , 10.        , 11.        ,  1.1656165 ,
         -0.531092  , -0.5489811 ,  0.9708494 ]], dtype=float32)>)

In [30]:
X = torch.randn((3, 4, 3))
X.sum()

tensor(7.7180)

In [31]:
X = tf.random.normal((3, 4))
tf.reduce_sum(X)

<tf.Tensor: shape=(), dtype=float32, numpy=1.0478933>

## Broadcasting Mechanism

Under certain conditions, even when shapes differ, we can still perform elementwise operations by invoking the **broadcasting mechanism**

1. expand one or both arrays by copying elements appropriately
2. carry out the elementwise operations 

In most cases, we broadcast along an axis where an array initially only has length 1.

In [53]:
a = torch.arange(3).reshape((3, 1))
#b = torch.arange(2).reshape((2,1)) # cannot broadcast
b = torch.arange(2).reshape((1, 2))
pr(a, b)

(tensor([[0],
        [1],
        [2]]), tensor([[0, 1]]))


In [54]:
a + b

tensor([[0, 1],
        [1, 2],
        [2, 3]])

In [45]:
a = tf.reshape(tf.range(3), (3, -1))
b = tf.reshape(tf.range(2), (-1, 2))
a + b

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[0, 1],
       [1, 2],
       [2, 3]])>

## Indexing and Slicing

In [57]:
X = torch.arange(12).reshape((3, 4))
X

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [58]:
X[-1]

tensor([ 8,  9, 10, 11])

In [60]:
X[0:2, 0:2]

tensor([[0, 1],
        [4, 5]])

In [61]:
X[2, 2]

tensor(10)

In [62]:
X[0:2, :] = 100  # Tensor in pyTorch can be directly assigned to
X

tensor([[100, 100, 100, 100],
        [100, 100, 100, 100],
        [  8,   9,  10,  11]])

In [67]:
X = tf.reshape(tf.range(12, dtype=tf.float32), (3, 4))
X

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]], dtype=float32)>

In [68]:
X[-1], X[1:3]

(<tf.Tensor: shape=(4,), dtype=float32, numpy=array([ 8.,  9., 10., 11.], dtype=float32)>,
 <tf.Tensor: shape=(2, 4), dtype=float32, numpy=
 array([[ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]], dtype=float32)>)

`Tensor` in TensorFlow are immutable and cannot be assigned to. 

In [69]:
X_var = tf.Variable(X)
X_var[1, 2].assign(9.)

<tf.Variable 'UnreadVariable' shape=(3, 4) dtype=float32, numpy=
array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  9.,  7.],
       [ 8.,  9., 10., 11.]], dtype=float32)>

## Saving Memory

We write `Y = X + Y`, we will dereference the tensor that `Y` used to point to and instead point `Y` at the newly allocated memory. But typically, we want this operation in place

In [70]:
X = torch.tensor([1, 2, 3])
Y = torch.tensor([4, 5, 6])
before = id(Y)
Y = X + Y
before == id(Y)

False

In [71]:
X = tf.constant([1, 2, 3])
Y = tf.constant([3, 3, 4])
before = id(Y)
Y = X + Y
before == id(Y)

False

In [72]:
X = torch.tensor([1, 2, 3])
Y = torch.tensor([4, 5, 6])
Z = torch.zeros_like(Y)
before = id(Z)
Z[:] = X + Y
before == id(Z)

True

In [73]:
before = id(X)
X += Y
before == id(X)

True

As for TensorFlow, `Variables` are mutable containers of state which provide a way to store model parameters. We can assign the result of an operation to a `Variable` with `assign`

In [74]:
X = tf.reshape(tf.range(12, dtype=tf.float32), (3, 4))
Y = tf.random.normal((3, 4))
Z = tf.Variable(tf.zeros_like(X))
before = id(Z)
Z.assign(X + Y)
before == id(Z)

True

TensorFlow `Tensors` are immutable and gradients do not flow through `Variable` assignments. TensorFlow does not provide an explicit way to run an individual operation in-place. 

It provides `tf.function` decorator to wrap computation inside a TensorFlow graph that gets compiled and optimzed before running. This allows TensorFlow to prune unused values, and to re-use prior allocations that are no longer needed. This minimizes the memory overhead of TensorFlow computations.

In [75]:
@tf.function
def computation(X, Y):
    Z = tf.zeros_like(Y)
    A = X + Y
    print(id(A))
    B = A + Y 
    print(id(B))
    C = B + Y 
    print(id(C))
    return C + Y 

computation(X,Y)

2589049508752
2589048831112
2589049492928


<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[-3.7738135,  1.1630945,  6.5113316, -0.8264731],
       [ 7.986046 ,  7.714159 ,  3.7849388,  9.962524 ],
       [ 5.2667503,  2.769516 ,  5.513662 ,  2.156002 ]], dtype=float32)>

## Conversion to Other Python Objects

In [77]:
A = torch.arange(12).reshape((3,4))
B = A.numpy()
type(A),type(B)

(torch.Tensor, numpy.ndarray)

In [78]:
a = torch.tensor([3.2])
a, a.item(), float(a), int(a)

(tensor([3.2000]), 3.200000047683716, 3.200000047683716, 3)

In [79]:
A = tf.random.normal((3,4))
B = A.numpy()
C = tf.constant(B)
type(A), type(B), type(C)

(tensorflow.python.framework.ops.EagerTensor,
 numpy.ndarray,
 tensorflow.python.framework.ops.EagerTensor)