In [23]:
import os
import time
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras, feature_column
from sklearn import model_selection, preprocessing
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow_hub as hub
import PIL.Image as Image
import tensorflow_datasets as tfds

from IPython.core.interactiveshell import InteractiveShell


In [5]:
# 配置项
# 这个要放到设置中文之前否则还是小方框
plt.style.use("seaborn")

# 指定默认字体 用来正常显示中文标签
plt.rcParams['font.sans-serif'] = ['SimHei']
# 解决保存图像是负号'-'显示为方块的问题
plt.rcParams['axes.unicode_minus'] = False

# #全部行都能输出
InteractiveShell.ast_node_interactivity = "all"

In [9]:
# Tensor 其实类似于numpy scipy库 提供各种数据操作 eg:加,减,乘,除,以及各种线性代数的计算
tf.add(2, 4)
tf.add([1, 2], [2, 4])
tf.multiply(5, 6)
tf.reduce_sum([1, 2, 3, 4])
tf.square(2) + tf.square(3)

<tf.Tensor: id=26, shape=(), dtype=int32, numpy=6>

<tf.Tensor: id=30, shape=(2,), dtype=int32, numpy=array([3, 6], dtype=int32)>

<tf.Tensor: id=34, shape=(), dtype=int32, numpy=30>

<tf.Tensor: id=42, shape=(), dtype=int32, numpy=10>

<tf.Tensor: id=48, shape=(), dtype=int32, numpy=13>

In [13]:
# 1 x 1的矩阵乘以 1 x 2的矩阵
tf.matmul([[3]], [[4, 5]])

<tf.Tensor: id=58, shape=(1, 2), dtype=int32, numpy=array([[12, 15]], dtype=int32)>

In [18]:
# Tensor和NumPy中的ndarray之间的相互转换
ndarray = np.ones((3, 3))
ndarray

# tf.Tensor会自动转换
tensor = tf.multiply(ndarray, 44)
tensor

# numpy内部也会自动转换
np.add(tensor, 10)

# 显示转换
tensor.numpy()

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

<tf.Tensor: id=73, shape=(3, 3), dtype=float64, numpy=
array([[44., 44., 44.],
       [44., 44., 44.],
       [44., 44., 44.]])>

array([[54., 54., 54.],
       [54., 54., 54.],
       [54., 54., 54.]])

array([[44., 44., 44.],
       [44., 44., 44.],
       [44., 44., 44.]])

In [21]:
x = tf.random.uniform([3, 3])
x
# 因为我安转的是CPU版的 所以不支持
tf.test.is_gpu_available()
x.device.endswith('GPU:0')

<tf.Tensor: id=98, shape=(3, 3), dtype=float32, numpy=
array([[0.4164703 , 0.3663733 , 0.08413434],
       [0.36332703, 0.73588   , 0.44184303],
       [0.6715127 , 0.09204984, 0.526211  ]], dtype=float32)>

False

False

In [22]:
x.device

'/job:localhost/replica:0/task:0/device:CPU:0'

In [26]:
# 如我特殊指定, tf会自动决定由哪个设备执行哪个操作, 如想指定,就需要tf.device的上下文管理器
def time_matmul(x):
  start = time.time()
  for loop in range(10):
    tf.matmul(x, x)

  result = time.time()-start

  print("10 loops: {:0.2f}ms".format(1000*result))

# 强制在cpu上执行
print("On CPU:")
with tf.device('CPU:0'):
    x = tf.random.uniform([1000, 1000])
    assert x.device.endswith("CPU:0")
    time_matmul(x)


# 因为电脑不支持 就没办法演示了
if tf.test.is_gpu_available():
  print("On GPU:")
  with tf.device("GPU:0"): # Or GPU:1 for the 2nd GPU, GPU:2 for the 3rd etc.
    x = tf.random.uniform([1000, 1000])
    assert x.device.endswith("GPU:0")
    time_matmul(x)

On CPU:
10 loops: 205.46ms


In [39]:
# 使用 tf.data.Dataset操作
ds_tensors = tf.data.Dataset.from_tensors([1, 2, 3, 4, 5, 6])
ds_tensors

# TextLineDataset 创建Dataset
import tempfile

_, filename = tempfile.mkstemp()
with open(filename, 'w') as f:
  f.write("""
  Line 1
  Line 2
  Line 3
  """)

ds_file = tf.data.TextLineDataset(filename)
ds_file
[*ds_file]
filename

<TensorDataset shapes: (6,), types: tf.int32>

30

<TextLineDatasetV2 shapes: (), types: tf.string>

[<tf.Tensor: id=483, shape=(), dtype=string, numpy=b''>,
 <tf.Tensor: id=484, shape=(), dtype=string, numpy=b'  Line 1'>,
 <tf.Tensor: id=485, shape=(), dtype=string, numpy=b'  Line 2'>,
 <tf.Tensor: id=486, shape=(), dtype=string, numpy=b'  Line 3'>,
 <tf.Tensor: id=487, shape=(), dtype=string, numpy=b'  '>]

'/var/folders/mk/dvh8rm551972h0f6l5dl5y680000gn/T/tmpbf8aub4a'

In [38]:
#  map, batch, shuffle 操作
[*ds_tensors]
tran_tensors = ds_tensors.map(tf.square)
[*tran_tensors]

tran_tensors = ds_tensors.map(tf.square).shuffle(2)
[*tran_tensors]

tran_tensors = ds_tensors.map(tf.square).shuffle(2).batch(2)
[*tran_tensors]

ds_file = ds_file.batch(2)
[*ds_file]

[<tf.Tensor: id=398, shape=(6,), dtype=int32, numpy=array([1, 2, 3, 4, 5, 6], dtype=int32)>]

[<tf.Tensor: id=411, shape=(6,), dtype=int32, numpy=array([ 1,  4,  9, 16, 25, 36], dtype=int32)>]

[<tf.Tensor: id=428, shape=(6,), dtype=int32, numpy=array([ 1,  4,  9, 16, 25, 36], dtype=int32)>]

[<tf.Tensor: id=449, shape=(1, 6), dtype=int32, numpy=array([[ 1,  4,  9, 16, 25, 36]], dtype=int32)>]

[<tf.Tensor: id=460, shape=(2, 2), dtype=string, numpy=
 array([[b'', b'  Line 1'],
        [b'  Line 2', b'  Line 3']], dtype=object)>,
 <tf.Tensor: id=461, shape=(1, 1), dtype=string, numpy=array([[b'  ']], dtype=object)>]

In [40]:
# tf.data.Dataset迭代操作
for x in ds_tensors:
    print(x)

tf.Tensor([1 2 3 4 5 6], shape=(6,), dtype=int32)
