In [2]:
import pandas as pd
import tensorflow as tf

SHUFFLE_BUFFER = 500
BATCH_SIZE = 2

In [3]:
csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/download.tensorflow.org/data/heart.csv')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/heart.csv
[1m13273/13273[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:
df = pd.read_csv(csv_file)

In [5]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,reversible,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,normal,0


In [6]:
df.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal         object
target        int64
dtype: object

In [7]:
target = df.pop('target')

https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#a_dataframe_as_an_array

In [8]:
numeric_feature_names = ['age', 'thalach', 'trestbps',  'chol', 'oldpeak']
numeric_features = df[numeric_feature_names]
numeric_features.head()

Unnamed: 0,age,thalach,trestbps,chol,oldpeak
0,63,150,145,233,2.3
1,67,108,160,286,1.5
2,67,129,120,229,2.6
3,37,187,130,250,3.5
4,41,172,130,204,1.4


In [9]:
tf.convert_to_tensor(numeric_features)

<tf.Tensor: shape=(303, 5), dtype=float64, numpy=
array([[ 63. , 150. , 145. , 233. ,   2.3],
       [ 67. , 108. , 160. , 286. ,   1.5],
       [ 67. , 129. , 120. , 229. ,   2.6],
       ...,
       [ 65. , 127. , 135. , 254. ,   2.8],
       [ 48. , 150. , 130. , 256. ,   0. ],
       [ 63. , 154. , 150. , 407. ,   4. ]])>

This error is odd.  It appears to be independent of the parameter passed, as adapt() takes only an array. And it claims that the value, which is assigned on the previous line, has not been assigned.

In [10]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(numeric_features)

UnboundLocalError: local variable 'input_shape' referenced before assignment

In [11]:
help(normalizer.adapt)

Help on method adapt in module keras.src.layers.preprocessing.normalization:

adapt(data) method of keras.src.layers.preprocessing.normalization.Normalization instance
    Computes the mean and variance of values in a dataset.
    
    Calling `adapt()` on a `Normalization` layer is an alternative to
    passing in `mean` and `variance` arguments during layer construction. A
    `Normalization` layer should always either be adapted over a dataset or
    passed `mean` and `variance`.
    
    During `adapt()`, the layer will compute a `mean` and `variance`
    separately for each position in each axis specified by the `axis`
    argument. To calculate a single `mean` and `variance` over the input
    data, simply pass `axis=None` to the layer.
    
    Arg:
        data: The data to train on. It can be passed either as a
            `tf.data.Dataset`, as a NumPy array, or as a backend-native
            eager tensor.
            If a dataset, *it must be batched*. Keras will assume that

In [12]:
normalizer(numeric_features.iloc[:3])

<tf.Tensor: shape=(3, 5), dtype=float32, numpy=
array([[ 63. , 150. , 145. , 233. ,   2.3],
       [ 67. , 108. , 160. , 286. ,   1.5],
       [ 67. , 129. , 120. , 229. ,   2.6]], dtype=float32)>

In [15]:
def get_basic_model():
  model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1)
  ])

  model.compile(optimizer='adam',
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=['accuracy'])
  return model

In [None]:
model = get_basic_model()
model.fit(numeric_features, target, epochs=15, batch_size=BATCH_SIZE)

In [13]:
numeric_dataset = tf.data.Dataset.from_tensor_slices((numeric_features, target))

for row in numeric_dataset.take(3):
  print(row)

(<tf.Tensor: shape=(5,), dtype=float64, numpy=array([ 63. , 150. , 145. , 233. ,   2.3])>, <tf.Tensor: shape=(), dtype=int64, numpy=0>)
(<tf.Tensor: shape=(5,), dtype=float64, numpy=array([ 67. , 108. , 160. , 286. ,   1.5])>, <tf.Tensor: shape=(), dtype=int64, numpy=1>)
(<tf.Tensor: shape=(5,), dtype=float64, numpy=array([ 67. , 129. , 120. , 229. ,   2.6])>, <tf.Tensor: shape=(), dtype=int64, numpy=0>)


In [16]:
numeric_batches = numeric_dataset.shuffle(1000).batch(BATCH_SIZE)

model = get_basic_model()
model.fit(numeric_batches, epochs=15)

Epoch 1/15
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6598 - loss: 18.8895
Epoch 2/15
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5429 - loss: 2.2054
Epoch 3/15
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6665 - loss: 1.3376
Epoch 4/15
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6864 - loss: 1.3637
Epoch 5/15
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6890 - loss: 1.0461
Epoch 6/15
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6996 - loss: 0.8253
Epoch 7/15
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7373 - loss: 0.8291
Epoch 8/15
[1m152/152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6937 - loss: 0.8908
Epoch 9/15
[1m152/152[0m [32m━━━━━━━

<keras.src.callbacks.history.History at 0x7d20206348e0>

https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#a_dataframe_as_a_dictionary

In [17]:
numeric_dict_ds = tf.data.Dataset.from_tensor_slices((dict(numeric_features), target))

In [18]:
for row in numeric_dict_ds.take(3):
  print(row)

({'age': <tf.Tensor: shape=(), dtype=int64, numpy=63>, 'thalach': <tf.Tensor: shape=(), dtype=int64, numpy=150>, 'trestbps': <tf.Tensor: shape=(), dtype=int64, numpy=145>, 'chol': <tf.Tensor: shape=(), dtype=int64, numpy=233>, 'oldpeak': <tf.Tensor: shape=(), dtype=float64, numpy=2.3>}, <tf.Tensor: shape=(), dtype=int64, numpy=0>)
({'age': <tf.Tensor: shape=(), dtype=int64, numpy=67>, 'thalach': <tf.Tensor: shape=(), dtype=int64, numpy=108>, 'trestbps': <tf.Tensor: shape=(), dtype=int64, numpy=160>, 'chol': <tf.Tensor: shape=(), dtype=int64, numpy=286>, 'oldpeak': <tf.Tensor: shape=(), dtype=float64, numpy=1.5>}, <tf.Tensor: shape=(), dtype=int64, numpy=1>)
({'age': <tf.Tensor: shape=(), dtype=int64, numpy=67>, 'thalach': <tf.Tensor: shape=(), dtype=int64, numpy=129>, 'trestbps': <tf.Tensor: shape=(), dtype=int64, numpy=120>, 'chol': <tf.Tensor: shape=(), dtype=int64, numpy=229>, 'oldpeak': <tf.Tensor: shape=(), dtype=float64, numpy=2.6>}, <tf.Tensor: shape=(), dtype=int64, numpy=0>)


In [19]:
def stack_dict(inputs, fun=tf.stack):
    values = []
    for key in sorted(inputs.keys()):
      values.append(tf.cast(inputs[key], tf.float32))

    return fun(values, axis=-1)

In [20]:
model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)

Epoch 1/5


KeyError: "Exception encountered when calling Sequential.call().\n\n\x1b[1minput_layer\x1b[0m\n\nArguments received by Sequential.call():\n  • inputs={'age': 'tf.Tensor(shape=(None, 1), dtype=int64)', 'thalach': 'tf.Tensor(shape=(None, 1), dtype=int64)', 'trestbps': 'tf.Tensor(shape=(None, 1), dtype=int64)', 'chol': 'tf.Tensor(shape=(None, 1), dtype=int64)', 'oldpeak': 'tf.Tensor(shape=(None, 1), dtype=float32)'}\n  • training=True\n  • mask={'age': 'None', 'thalach': 'None', 'trestbps': 'None', 'chol': 'None', 'oldpeak': 'None'}"

In [21]:
numeric_dict_batches = numeric_dict_ds.shuffle(SHUFFLE_BUFFER).batch(BATCH_SIZE)
model.fit(numeric_dict_batches, epochs=5)

Epoch 1/5


KeyError: "Exception encountered when calling Sequential.call().\n\n\x1b[1minput_layer\x1b[0m\n\nArguments received by Sequential.call():\n  • inputs={'age': 'tf.Tensor(shape=(None,), dtype=int64)', 'thalach': 'tf.Tensor(shape=(None,), dtype=int64)', 'trestbps': 'tf.Tensor(shape=(None,), dtype=int64)', 'chol': 'tf.Tensor(shape=(None,), dtype=int64)', 'oldpeak': 'tf.Tensor(shape=(None,), dtype=float32)'}\n  • training=True\n  • mask={'age': 'None', 'thalach': 'None', 'trestbps': 'None', 'chol': 'None', 'oldpeak': 'None'}"

In [None]:
model.predict(dict(numeric_features.iloc[:3]))

In [22]:
inputs = {}
for name, column in numeric_features.items():
  inputs[name] = tf.keras.Input(
      shape=(1,), name=name, dtype=tf.float32)

inputs

{'age': <KerasTensor shape=(None, 1), dtype=float32, sparse=None, name=age>,
 'thalach': <KerasTensor shape=(None, 1), dtype=float32, sparse=None, name=thalach>,
 'trestbps': <KerasTensor shape=(None, 1), dtype=float32, sparse=None, name=trestbps>,
 'chol': <KerasTensor shape=(None, 1), dtype=float32, sparse=None, name=chol>,
 'oldpeak': <KerasTensor shape=(None, 1), dtype=float32, sparse=None, name=oldpeak>}

In [23]:
x = stack_dict(inputs, fun=tf.concat)

normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(stack_dict(dict(numeric_features)))

x = normalizer(x)
x = tf.keras.layers.Dense(10, activation='relu')(x)
x = tf.keras.layers.Dense(10, activation='relu')(x)
x = tf.keras.layers.Dense(1)(x)

model = tf.keras.Model(inputs, x)

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'],
              run_eagerly=True)

ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


In [None]:
tf.keras.utils.plot_model(model, rankdir="LR", show_shapes=True)

In [None]:
model.fit(dict(numeric_features), target, epochs=5, batch_size=BATCH_SIZE)

In [None]:
numeric_dict_batches = numeric_dict_ds.shuffle(SHUFFLE_BUFFER).batch(BATCH_SIZE)
model.fit(numeric_dict_batches, epochs=5)

https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#full_example

In [24]:
binary_feature_names = ['sex', 'fbs', 'exang']

In [25]:
categorical_feature_names = ['cp', 'restecg', 'slope', 'thal', 'ca']

In [26]:
inputs = {}
for name, column in df.items():
  if type(column[0]) == str:
    dtype = tf.string
  elif (name in categorical_feature_names or
        name in binary_feature_names):
    dtype = tf.int64
  else:
    dtype = tf.float32

  inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)

In [27]:
inputs

{'age': <KerasTensor shape=(None,), dtype=float32, sparse=None, name=age>,
 'sex': <KerasTensor shape=(None,), dtype=int64, sparse=None, name=sex>,
 'cp': <KerasTensor shape=(None,), dtype=int64, sparse=None, name=cp>,
 'trestbps': <KerasTensor shape=(None,), dtype=float32, sparse=None, name=trestbps>,
 'chol': <KerasTensor shape=(None,), dtype=float32, sparse=None, name=chol>,
 'fbs': <KerasTensor shape=(None,), dtype=int64, sparse=None, name=fbs>,
 'restecg': <KerasTensor shape=(None,), dtype=int64, sparse=None, name=restecg>,
 'thalach': <KerasTensor shape=(None,), dtype=float32, sparse=None, name=thalach>,
 'exang': <KerasTensor shape=(None,), dtype=int64, sparse=None, name=exang>,
 'oldpeak': <KerasTensor shape=(None,), dtype=float32, sparse=None, name=oldpeak>,
 'slope': <KerasTensor shape=(None,), dtype=int64, sparse=None, name=slope>,
 'ca': <KerasTensor shape=(None,), dtype=int64, sparse=None, name=ca>,
 'thal': <KerasTensor shape=(None,), dtype=string, sparse=None, name=thal>

The preprocessing code now throws a ValueError when passing Keras tensors to Tensorflow. Ironic since this is pretty much the point of this exercise.

I attempted the following fix (cell) as shown here: https://stackoverflow.com/questions/71808327/how-to-fix-error-where-a-kerastensor-is-passed-to-a-tf-api

It did not fix the issue, but caused the error to display a more useful message which I might be able to work with.

In [29]:
from tensorflow.python.framework.ops import disable_eager_execution

disable_eager_execution()

In [33]:
from tensorflow.keras import Layer

class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

In [34]:
preprocessed = []

for name in binary_feature_names:
  inp = inputs[name]
  inp = inp[:, tf.newaxis]
  inp = MyLayer()(inp)
  float_value = tf.cast(inp, tf.float32)
  preprocessed.append(float_value)

preprocessed

RuntimeError: Exception encountered when calling MyLayer.call().

[1mCould not automatically infer the output shape / dtype of 'my_layer' (of type MyLayer). Either the `MyLayer.call()` method is incorrect, or you need to implement the `MyLayer.compute_output_spec() / compute_output_shape()` method. Error encountered:

name 'tf_fn' is not defined[0m

Arguments received by MyLayer.call():
  • args=('<KerasTensor shape=(None, 1), dtype=int64, sparse=False, name=keras_tensor_7>',)
  • kwargs=<class 'inspect._empty'>