In [1]:
import tempfile

import numpy as np
import tensorflow as tf

In [2]:
# eager execution allows for more interactive frontend
# I don't know what this means
tf.enable_eager_execution()

## Tensors

Tensors are some kind of special array-like data type. Tensorflow has special operations for them. Tensors are immutable and can be backed by GPU.

In [3]:
tf.add(1, 2)

<tf.Tensor: id=3, shape=(), dtype=int32, numpy=3>

In [4]:
tf.add([1, 2], [3, 4])

<tf.Tensor: id=7, shape=(2,), dtype=int32, numpy=array([4, 6], dtype=int32)>

In [5]:
tf.square(5)

<tf.Tensor: id=10, shape=(), dtype=int32, numpy=25>

In [6]:
tf.reduce_sum([1, 2, 3])

<tf.Tensor: id=17, shape=(), dtype=int32, numpy=6>

In [7]:
tf.encode_base64("message")

<tf.Tensor: id=20, shape=(), dtype=string, numpy=b'bWVzc2FnZQ'>

In [8]:
# operator overloading
tf.square(2) + tf.square(3)

<tf.Tensor: id=26, shape=(), dtype=int32, numpy=13>

In [9]:
results = tf.matmul([[1]], [[2, 3]])
results

<tf.Tensor: id=30, shape=(1, 2), dtype=int32, numpy=array([[2, 3]], dtype=int32)>

Conversion between NumPy and Tensorflow

In [10]:
ndarray = np.ones([3, 3])
ndarray

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [11]:
# tf operation on numpy data
tensor_array = tf.multiply(ndarray, 40)
tensor_array

<tf.Tensor: id=34, shape=(3, 3), dtype=float64, numpy=
array([[40., 40., 40.],
       [40., 40., 40.],
       [40., 40., 40.]])>

In [12]:
# np operation on tensorflow data
np.add(tensor_array, 1)

array([[41., 41., 41.],
       [41., 41., 41.],
       [41., 41., 41.]])

In [13]:
tensor_array.numpy()

array([[40., 40., 40.],
       [40., 40., 40.],
       [40., 40., 40.]])

## GPU Acceleration

Tensorflow automatically decides about using GPU if available.

In [14]:
t = tf.random_uniform([3, 3])
t

<tf.Tensor: id=44, shape=(3, 3), dtype=float32, numpy=
array([[0.9092399 , 0.21407151, 0.8533448 ],
       [0.8915709 , 0.57705283, 0.57683396],
       [0.41011918, 0.2412095 , 0.45627856]], dtype=float32)>

In [15]:
tf.test.is_gpu_available()

False

In [16]:
t.device

'/job:localhost/replica:0/task:0/device:CPU:0'

Force operation to take place on device.

In [17]:
def time_matmul(x, y):
    %timeit tf.matmul(x, y)

In [18]:
# on CPU
with tf.device("CPU:0"):
    x = tf.random_uniform([1000, 1000])
    assert x.device.endswith("CPU:0")
    time_matmul(x, x)

14.3 ms ± 26.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [19]:
# on GPU (only works if available)
with tf.device("GPU:0"):
    x = tf.random_uniform([1000, 1000])
    assert x.device.endswith("GPU:0")
    time_matmul(x, x)

RuntimeError: Error copying tensor to device: /job:localhost/replica:0/task:0/device:GPU:0. /job:localhost/replica:0/task:0/device:GPU:0 unknown device.

## Datasets

Datasets are the way to feed data to models.

### Creating Datasets

In [20]:
tensor_dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6])
tensor_dataset

<TensorSliceDataset shapes: (), types: tf.int32>

In [21]:
_, filename = tempfile.mkstemp()
filename

'/tmp/tmpagwdvus8'

In [22]:
with open(filename, 'w') as o:
    o.write('''Red
Green
Blue
''')

In [23]:
file_dataset = tf.data.TextLineDataset(filename)
file_dataset

<TextLineDataset shapes: (), types: tf.string>

### Dataset Transformations

In [24]:
tensor_dataset2 = tensor_dataset.map(tf.square).shuffle(2).batch(2)
tensor_dataset2

<BatchDataset shapes: (?,), types: tf.int32>

In [25]:
file_dataset2 = file_dataset.batch(2)
file_dataset2

<BatchDataset shapes: (?,), types: tf.string>

### Dataset Iteration

In [26]:
for e in tensor_dataset2:
    print(e)

tf.Tensor([1 4], shape=(2,), dtype=int32)
tf.Tensor([ 9 25], shape=(2,), dtype=int32)
tf.Tensor([36 16], shape=(2,), dtype=int32)


In [27]:
for e in file_dataset2:
    print(e)

tf.Tensor([b'Red' b'Green'], shape=(2,), dtype=string)
tf.Tensor([b'Blue'], shape=(1,), dtype=string)
