In [1]:
import pandas as pd
import tensorflow as tf

In [2]:
csv_file = tf.keras.utils.get_file('heart.csv', 
                                   'https://storage.googleapis.com/applied-dl/heart.csv',
                                   cache_dir='./')

In [3]:
df = pd.read_csv(csv_file)

In [4]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
age,303.0,54.594059,9.01637,29.0,48.0,56.0,61.0,77.0
sex,303.0,0.676568,0.46856,0.0,0.0,1.0,1.0,1.0
cp,303.0,3.108911,1.028414,0.0,2.0,3.0,4.0,4.0
trestbps,303.0,131.785479,17.748338,94.0,120.0,130.0,140.0,200.0
chol,303.0,246.547855,52.175933,126.0,211.0,241.0,275.0,564.0
fbs,303.0,0.148515,0.356198,0.0,0.0,0.0,0.0,1.0
restecg,303.0,0.990099,0.988293,0.0,0.0,1.0,2.0,2.0
thalach,303.0,149.194719,23.173368,71.0,132.0,152.0,165.5,202.0
exang,303.0,0.326733,0.469794,0.0,0.0,0.0,1.0,1.0
oldpeak,303.0,1.057756,1.165025,0.0,0.0,0.8,1.6,6.2


In [5]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,reversible,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,normal,0


In [6]:
df.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal         object
target        int64
dtype: object

In [7]:
df['thal'] = pd.Categorical(df['thal'])
df['thal'] = df.thal.cat.codes

In [8]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,2,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,3,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,4,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,3,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,3,0


In [9]:
target = df.pop('target')

In [10]:
df.values.shape

(303, 13)

In [11]:
dataset = tf.data.Dataset.from_tensor_slices((df.values, target.values))

In [12]:
dataset

<TensorSliceDataset shapes: ((13,), ()), types: (tf.float64, tf.int64)>

In [13]:
for feat, targ in dataset.take(5):
    print('Features: {}, Target: {}'.format(feat, targ))

Features: [ 63.    1.    1.  145.  233.    1.    2.  150.    0.    2.3   3.    0.
   2. ], Target: 0
Features: [ 67.    1.    4.  160.  286.    0.    2.  108.    1.    1.5   2.    3.
   3. ], Target: 1
Features: [ 67.    1.    4.  120.  229.    0.    2.  129.    1.    2.6   2.    2.
   4. ], Target: 0
Features: [ 37.    1.    3.  130.  250.    0.    0.  187.    0.    3.5   3.    0.
   3. ], Target: 0
Features: [ 41.    0.    2.  130.  204.    0.    2.  172.    0.    1.4   1.    0.
   3. ], Target: 0


In [14]:
tf.constant(df['thal'])

<tf.Tensor: shape=(303,), dtype=int8, numpy=
array([2, 3, 4, 3, 3, 3, 3, 3, 4, 4, 2, 3, 2, 4, 4, 3, 4, 3, 3, 3, 3, 3,
       3, 4, 4, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 3, 4, 2, 4, 3, 4, 3, 4, 4,
       2, 3, 3, 4, 3, 3, 4, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 4,
       4, 2, 3, 3, 4, 3, 4, 3, 3, 4, 4, 3, 3, 4, 4, 3, 3, 3, 3, 4, 4, 4,
       3, 3, 4, 3, 4, 4, 3, 4, 3, 3, 3, 4, 3, 4, 4, 3, 3, 4, 4, 4, 4, 4,
       3, 3, 3, 3, 4, 3, 4, 3, 4, 4, 3, 3, 2, 4, 4, 2, 3, 3, 4, 4, 3, 4,
       3, 3, 4, 2, 4, 4, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
       4, 3, 3, 3, 4, 3, 4, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 2,
       4, 4, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 2, 2, 4, 3, 4, 2, 4, 3,
       3, 4, 3, 3, 3, 3, 4, 3, 4, 3, 4, 2, 2, 4, 3, 4, 3, 2, 4, 3, 3, 2,
       4, 4, 4, 4, 3, 0, 3, 3, 3, 3, 1, 4, 3, 3, 3, 4, 3, 4, 3, 3, 3, 4,
       3, 3, 4, 4, 4, 4, 3, 3, 4, 3, 4, 3, 4, 4, 3, 4, 4, 3, 4, 4, 3, 3,
      

In [15]:
train_dataset = dataset.shuffle(len(df)).batch(1)

In [16]:
def get_compiled_model():
    model = tf.keras.Sequential([
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(10, activation='relu'),
      tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

In [17]:
model = get_compiled_model()
model.fit(train_dataset, epochs=15)

Epoch 1/15


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x7fea740aa1d0>

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                140       
_________________________________________________________________
dense_1 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 11        
Total params: 261
Trainable params: 261
Non-trainable params: 0
_________________________________________________________________


In [19]:
inputs = {key: tf.keras.layers.Input(shape=(), name=key) for key in df.keys()}
print("inputs.keys() =======>", list(inputs.keys()))
print("inputs.values() ---->", list(inputs.values()))

x = tf.stack(list(inputs.values()), axis=-1)
print("x:", x)
print("x shape:", x.shape)

x = tf.keras.layers.Dense(10, activation='relu', name="one")(x)
output = tf.keras.layers.Dense(1, activation='sigmoid', name='two')(x)

model_func = tf.keras.Model(inputs=inputs, outputs=output)

model_func.compile(optimizer='adam',
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

inputs.values() ----> [<tf.Tensor 'age:0' shape=(None,) dtype=float32>, <tf.Tensor 'sex:0' shape=(None,) dtype=float32>, <tf.Tensor 'cp:0' shape=(None,) dtype=float32>, <tf.Tensor 'trestbps:0' shape=(None,) dtype=float32>, <tf.Tensor 'chol:0' shape=(None,) dtype=float32>, <tf.Tensor 'fbs:0' shape=(None,) dtype=float32>, <tf.Tensor 'restecg:0' shape=(None,) dtype=float32>, <tf.Tensor 'thalach:0' shape=(None,) dtype=float32>, <tf.Tensor 'exang:0' shape=(None,) dtype=float32>, <tf.Tensor 'oldpeak:0' shape=(None,) dtype=float32>, <tf.Tensor 'slope:0' shape=(None,) dtype=float32>, <tf.Tensor 'ca:0' shape=(None,) dtype=float32>, <tf.Tensor 'thal:0' shape=(None,) dtype=float32>]
x: Tensor("stack:0", shape=(None, 13), dtype=float32)
x shape: (None, 13)


In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 10)                140       
_________________________________________________________________
dense_1 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 11        
Total params: 261
Trainable params: 261
Non-trainable params: 0
_________________________________________________________________


* **与 tf.data 一起使用时，保存 pd.DataFrame 列结构的最简单方法是将 pd.DataFrame 转换为 dict ，并对该字典进行切片。**

In [21]:
dict_slices = tf.data.Dataset.from_tensor_slices((df.to_dict('list'), target.values)).batch(16)

In [None]:
for dict_slice in dict_slices.take(1):
    print (dict_slice)