In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Normalization
from tensorflow.keras.models import Sequential, Model

In [2]:
print(tf.__version__)

2.8.0


In [3]:
SHUFFLE_BUFFER = 500
BATCH_SIZE = 2

In [4]:
csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/download.tensorflow.org/data/heart.csv')

#### Read the CSV file using pandas

In [5]:
df = pd.read_csv(csv_file)

In [6]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,reversible,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,normal,0


If label data is not devided in data, devide label data and train data

In [7]:
df.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal         object
target        int64
dtype: object

In [8]:
target = df.pop('target')

### A DataFrame as an array
###### distinguish numeric data which is not categorical
age, trestbps, chol, thalach, oldpeak

In [9]:
for name, values in df.items():
    print(name)
    print(df[name].unique())
    print(df[name].nunique())
    print('==================================================================================')

age
[63 67 37 41 56 62 57 53 44 52 48 54 49 64 58 60 50 66 43 40 69 59 42 55
 61 65 51 45 39 68 47 35 29 70 46 77 38 34 74 76 71]
41
sex
[1 0]
2
cp
[1 4 3 2 0]
5
trestbps
[145 160 120 130 140 172 150 110 132 117 135 155 125 104 180 138 108 128
 100 200 124  94 122 170 105 165 112 102 152 115 118 101 126 142 174 134
 148 178 158 192 129 144 123 136 146 106 156 154 114 164]
50
chol
[233 286 229 250 204 236 268 354 254 203 192 294 256 263 199 168 239 275
 266 211 283 284 224 206 219 340 226 247 167 230 335 234 177 276 353 243
 225 269 267 248 197 360 258 308 245 270 208 264 321 274 325 235 257 302
 164 231 141 252 255 183 330 222 217 282 288 220 209 227 261 213 174 281
 198 221 205 309 240 289 318 298 265 564 246 322 299 300 293 277 304 214
 207 160 249 394 212 184 315 409 244 305 195 196 273 126 313 259 200 262
 215 228 193 303 271 210 327 149 201 295 306 178 237 218 223 242 319 166
 180 311 278 232 253 342 169 187 157 176 241 131 175 417 290 172 216 188
 185 326 260 182 307 186 341 407]

In [10]:
numeric_features_names = ['age', 'thalach', 'trestbps', 'chol', 'oldpeak']
numeric_features = df[numeric_features_names]
numeric_features.head()

Unnamed: 0,age,thalach,trestbps,chol,oldpeak
0,63,150,145,233,2.3
1,67,108,160,286,1.5
2,67,129,120,229,2.6
3,37,187,130,250,3.5
4,41,172,130,204,1.4


In [11]:
 tf.convert_to_tensor(numeric_features)

<tf.Tensor: shape=(303, 5), dtype=float64, numpy=
array([[ 63. , 150. , 145. , 233. ,   2.3],
       [ 67. , 108. , 160. , 286. ,   1.5],
       [ 67. , 129. , 120. , 229. ,   2.6],
       ...,
       [ 65. , 127. , 135. , 254. ,   2.8],
       [ 48. , 150. , 130. , 256. ,   0. ],
       [ 63. , 154. , 150. , 407. ,   4. ]])>

#### With model.fit

In [12]:
normalizer = Normalization(axis=-1)
normalizer.adapt(numeric_features)
normalizer(numeric_features.iloc[:3])

<tf.Tensor: shape=(3, 5), dtype=float32, numpy=
array([[ 0.93383914,  0.03480717,  0.7457807 , -0.26008663,  1.0680453 ],
       [ 1.3782105 , -1.7806163 ,  1.5923283 ,  0.75738776,  0.38022864],
       [ 1.3782105 , -0.8729046 , -0.66513205, -0.33687717,  1.3259765 ]],
      dtype=float32)>

In [13]:
normalizer_axis0 = Normalization(axis=0)
normalizer_axis0.adapt(numeric_features)

# ValueError: All `axis` values to be kept must have known shape. 
# Got axis: (0,), input shape: [None, 5], with unknown axis at index: 0

# why is input shape None........
# numeric_features shape is [303, 5], 
# tf.convert_to_tensor(numeric_features) shape is [303, 5]

ValueError: in user code:

    File "C:\Users\winston\anaconda3_64\envs\tf25\lib\site-packages\keras\engine\base_preprocessing_layer.py", line 117, in adapt_step  *
        self._adapt_maybe_build(data)
    File "C:\Users\winston\anaconda3_64\envs\tf25\lib\site-packages\keras\engine\base_preprocessing_layer.py", line 285, in _adapt_maybe_build  **
        self.build(data_shape)
    File "C:\Users\winston\anaconda3_64\envs\tf25\lib\site-packages\keras\layers\preprocessing\normalization.py", line 150, in build
        raise ValueError(

    ValueError: All `axis` values to be kept must have known shape. Got axis: (0,), input shape: [None, 5], with unknown axis at index: 0


In [15]:
a = [[ 0.933839,  0.03480718,  0.7457807, -0.26008663,  1.0680454]]
a = [[ 1,  2,  3, 4,  5]]
# a = tf.convert_to_tensor(a, dtype=tf.float32)

In [16]:
normalizer_dogs = Normalization(axis=-1)
normalizer_dogs.adapt(a)
normalizer_dogs(numeric_features.iloc[:3])

# ValueError: All `axis` values to be kept must have known shape. 
# Got axis: (0,), input shape: [None, 5], with unknown axis at index: 0

# why is input shape None........
# numeric_features shape is [303, 5], 
# tf.convert_to_tensor(numeric_features) shape is [303, 5]

<tf.Tensor: shape=(3, 5), dtype=float32, numpy=
array([[ 6.20e+08,  1.48e+09,  1.42e+09,  2.29e+09, -2.70e+07],
       [ 6.60e+08,  1.06e+09,  1.57e+09,  2.82e+09, -3.50e+07],
       [ 6.60e+08,  1.27e+09,  1.17e+09,  2.25e+09, -2.40e+07]],
      dtype=float32)>

In [17]:
def get_basic_model():
    model = Sequential([
        normalizer,
        Dense(10, activation='relu'),
        Dense(10, activation='relu'),
        Dense(1)
    ])
    
    model.compile(optimizer='adam',
                 loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                 metrics=['accuracy'])
    
    return model

def get_dogs_model():
    model = Sequential([
        normalizer_dogs,
        Dense(10, activation='relu'),
        Dense(10, activation='relu'),
        Dense(1)
    ])
    
    model.compile(optimizer='adam',
                 loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                 metrics=['accuracy'])
    
    return model

In [18]:
model = get_basic_model()
model.fit(numeric_features, target, epochs=15, batch_size=BATCH_SIZE)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x2676a51d4c0>

In [19]:
model_dogs = get_dogs_model()
model_dogs.fit(numeric_features, target, epochs=15, batch_size=BATCH_SIZE)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x2676b6d4280>

#### With tf.data

In [21]:
numeric_dataset = tf.data.Dataset.from_tensor_slices((numeric_features, target))

for row in numeric_dataset.take(3):
    print(row)

(<tf.Tensor: shape=(5,), dtype=float64, numpy=array([ 63. , 150. , 145. , 233. ,   2.3])>, <tf.Tensor: shape=(), dtype=int64, numpy=0>)
(<tf.Tensor: shape=(5,), dtype=float64, numpy=array([ 67. , 108. , 160. , 286. ,   1.5])>, <tf.Tensor: shape=(), dtype=int64, numpy=1>)
(<tf.Tensor: shape=(5,), dtype=float64, numpy=array([ 67. , 129. , 120. , 229. ,   2.6])>, <tf.Tensor: shape=(), dtype=int64, numpy=0>)


In [22]:
numeric_batchs = numeric_dataset.shuffle(1000).batch(BATCH_SIZE)

model = get_basic_model()
model.fit(numeric_batchs, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x2676b7d9e50>

In [23]:
model_dogs = get_dogs_model()
model_dogs.fit(numeric_batchs, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x2676c8a10d0>

### A Datafram as a Dictionary

In [24]:
numeric_dict_ds = tf.data.Dataset.from_tensor_slices((dict(numeric_features), target))

In [29]:
for row in numeric_dict_ds.take(5):
    print(row, end='\n\n\n')

({'age': <tf.Tensor: shape=(), dtype=int64, numpy=63>, 'thalach': <tf.Tensor: shape=(), dtype=int64, numpy=150>, 'trestbps': <tf.Tensor: shape=(), dtype=int64, numpy=145>, 'chol': <tf.Tensor: shape=(), dtype=int64, numpy=233>, 'oldpeak': <tf.Tensor: shape=(), dtype=float64, numpy=2.3>}, <tf.Tensor: shape=(), dtype=int64, numpy=0>)


({'age': <tf.Tensor: shape=(), dtype=int64, numpy=67>, 'thalach': <tf.Tensor: shape=(), dtype=int64, numpy=108>, 'trestbps': <tf.Tensor: shape=(), dtype=int64, numpy=160>, 'chol': <tf.Tensor: shape=(), dtype=int64, numpy=286>, 'oldpeak': <tf.Tensor: shape=(), dtype=float64, numpy=1.5>}, <tf.Tensor: shape=(), dtype=int64, numpy=1>)


({'age': <tf.Tensor: shape=(), dtype=int64, numpy=67>, 'thalach': <tf.Tensor: shape=(), dtype=int64, numpy=129>, 'trestbps': <tf.Tensor: shape=(), dtype=int64, numpy=120>, 'chol': <tf.Tensor: shape=(), dtype=int64, numpy=229>, 'oldpeak': <tf.Tensor: shape=(), dtype=float64, numpy=2.6>}, <tf.Tensor: shape=(), dtype=int64, numpy=0

#### Dictionary with Keras
There are two equivalent ways you can write a Keras model that accepts a dictionary as input.

##### 1. The Model-subclass style
You write a subclass of tf.keras.Model (or tf.keras.Layer). You directly handle the inputs, and create the outputs:

In [30]:
def stack_dict(inputs, fun=tf.stack):
    values = []
    for key in sorted(inputs.keys()):
        values.append(tf.cast(inputs[key], dtype=tf.float32))
    
    return fun(values, axis=-1)