In [23]:
import pandas as pd
import tensorflow as tf
import numpy as np
csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/applied-dl/heart.csv')
df=pd.read_csv(csv_file)

In [24]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,reversible,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,normal,0


In [25]:
df['thal'] = pd.Categorical(df['thal'])
df['thal'] = df.thal.cat.codes
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,2,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,3,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,4,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,3,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,3,0


In [26]:
target = df.pop('target')

In [27]:
dataset = tf.data.Dataset.from_tensor_slices((df.values.astype(np.float32), target.values.astype(np.int32)))
# df.values就是一个ndarray，所以这个dataset的类型就是tuple

In [35]:
print(dataset)
i=0
for f,l in dataset:
    i+=1
print(i)
# 可以看到虽然传入的只是一个tuple，但是dataset会对这个tuple进行解析
# 结果就是，df.values中的每个元素与target.values中的每个元素进行zip
# 所以这个dataset指向了303个对象

<TensorSliceDataset shapes: ((13,), ()), types: (tf.float32, tf.int32)>
303


In [30]:
train_dataset = dataset.shuffle(len(df)).batch(1)

In [31]:
train_dataset

<BatchDataset shapes: ((None, 13), (None,)), types: (tf.float32, tf.int32)>

In [36]:
def get_compiled_model():
  model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
  ])

  model.compile(optimizer='adam',
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=['accuracy'])
  return model

model = get_compiled_model()
model.fit(train_dataset, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7fd5b3bd6518>

In [37]:
inputs = {key: tf.keras.layers.Input(shape=(), name=key) for key in df.keys()}
x = tf.stack(list(inputs.values()), axis=-1)
# input.values就是所有的Input
# 每个input持有的都是一些样本的同一个特征
# 对这些特征在最后一个轴进行stack，就成了每一行都是样本
x = tf.keras.layers.Dense(10, activation='relu')(x)
output = tf.keras.layers.Dense(1)(x)

model_func = tf.keras.Model(inputs=inputs, outputs=output)

model_func.compile(optimizer='adam',
                   loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                   metrics=['accuracy'])

In [41]:
dict_slices = tf.data.Dataset.from_tensor_slices((df.to_dict('list'), target.values)).batch(16)

In [42]:
dict_slices

<BatchDataset shapes: ({age: (None,), sex: (None,), cp: (None,), trestbps: (None,), chol: (None,), fbs: (None,), restecg: (None,), thalach: (None,), exang: (None,), oldpeak: (None,), slope: (None,), ca: (None,), thal: (None,)}, (None,)), types: ({age: tf.int32, sex: tf.int32, cp: tf.int32, trestbps: tf.int32, chol: tf.int32, fbs: tf.int32, restecg: tf.int32, thalach: tf.int32, exang: tf.int32, oldpeak: tf.float32, slope: tf.int32, ca: tf.int32, thal: tf.int32}, tf.int64)>

In [44]:
df.to_dict('list')
# 输出就是k->list

{'age': [63,
  67,
  67,
  37,
  41,
  56,
  62,
  57,
  63,
  53,
  57,
  56,
  56,
  44,
  52,
  57,
  48,
  54,
  48,
  49,
  64,
  58,
  58,
  58,
  60,
  50,
  58,
  66,
  43,
  40,
  69,
  60,
  64,
  59,
  44,
  42,
  43,
  57,
  55,
  61,
  65,
  65,
  67,
  62,
  65,
  44,
  65,
  60,
  51,
  48,
  58,
  45,
  53,
  39,
  68,
  52,
  44,
  47,
  53,
  51,
  66,
  62,
  62,
  44,
  63,
  52,
  62,
  41,
  58,
  35,
  63,
  51,
  55,
  65,
  45,
  56,
  54,
  44,
  62,
  54,
  51,
  29,
  51,
  43,
  55,
  70,
  62,
  35,
  51,
  59,
  59,
  52,
  64,
  58,
  47,
  57,
  41,
  45,
  60,
  52,
  42,
  67,
  55,
  64,
  70,
  51,
  58,
  60,
  68,
  46,
  77,
  54,
  58,
  48,
  57,
  54,
  35,
  45,
  70,
  53,
  59,
  62,
  64,
  57,
  52,
  56,
  43,
  53,
  48,
  56,
  42,
  59,
  60,
  63,
  42,
  66,
  54,
  69,
  50,
  51,
  62,
  68,
  67,
  69,
  45,
  50,
  59,
  50,
  64,
  57,
  64,
  43,
  45,
  58,
  50,
  55,
  62,
  37,
  38,
  41,
  66,
  52,
  56,
  46,
  46,
  6

In [45]:
model_func.fit(dict_slices, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7fd5b3a3e860>