In [75]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow import keras


train = pd.read_csv("data/segmentation_target.test")
test = pd.read_csv("data/segmentation_target.data")

print(f"{train.shape=}")
print(f"{test.shape=}")

val_dataframe = train.sample(frac=1)
train_dataframe = test.sample(frac=1)

print(
    "Using %d samples for training and %d for validation"
    % (len(train_dataframe), len(val_dataframe))
)

train.shape=(2100, 20)
test.shape=(210, 20)
Using 210 samples for training and 2100 for validation


AttributeError: module 'sklearn' has no attribute 'preprocessing'

In [81]:
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    dataframe["TARGET"] = dataframe["TARGET"].apply(lambda a: str(a))
    print(dataframe["TARGET"].values[:5])
    labels = dataframe.pop("TARGET")

    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds


train_ds = dataframe_to_dataset(train_dataframe)
val_ds = dataframe_to_dataset(val_dataframe)

['4' '1' '0' '3' '6']
FOLIAGE      4
PATH         1
GRASS        0
CEMENT       3
BRICKFACE    6
            ..
FOLIAGE      4
WINDOW       2
BRICKFACE    6
FOLIAGE      4
BRICKFACE    6
Name: TARGET, Length: 210, dtype: object
labels.shape=(210,)
['2' '6' '2' '0' '4']
WINDOW       2
BRICKFACE    6
WINDOW       2
GRASS        0
FOLIAGE      4
            ..
SKY          5
BRICKFACE    6
FOLIAGE      4
PATH         1
GRASS        0
Name: TARGET, Length: 2100, dtype: object
labels.shape=(2100,)


In [67]:
for x, y in train_ds.take(1):
    print("Input:", x)
    print("\n\nTarget:", y)

Input: {'REGION-CENTROID-COL': <tf.Tensor: shape=(), dtype=float64, numpy=121.0>, 'REGION-CENTROID-ROW': <tf.Tensor: shape=(), dtype=float64, numpy=113.0>, 'REGION-PIXEL-COUNT': <tf.Tensor: shape=(), dtype=int64, numpy=9>, 'SHORT-LINE-DENSITY-5': <tf.Tensor: shape=(), dtype=float64, numpy=0.0>, 'SHORT-LINE-DENSITY-2': <tf.Tensor: shape=(), dtype=float64, numpy=0.0>, 'VEDGE-MEAN': <tf.Tensor: shape=(), dtype=float64, numpy=1.722222>, 'VEDGE-SD': <tf.Tensor: shape=(), dtype=float64, numpy=1.5296303>, 'HEDGE-MEAN': <tf.Tensor: shape=(), dtype=float64, numpy=2.944444>, 'HEDGE-SD': <tf.Tensor: shape=(), dtype=float64, numpy=1.5296295>, 'INTENSITY-MEAN': <tf.Tensor: shape=(), dtype=float64, numpy=20.25926>, 'RAWRED-MEAN': <tf.Tensor: shape=(), dtype=float64, numpy=20.0>, 'RAWBLUE-MEAN': <tf.Tensor: shape=(), dtype=float64, numpy=25.444445>, 'RAWGREEN-MEAN': <tf.Tensor: shape=(), dtype=float64, numpy=15.333333>, 'EXRED-MEAN': <tf.Tensor: shape=(), dtype=float64, numpy=-0.7777778>, 'EXBLUE-MEA

In [68]:
train_ds = train_ds.batch(21)
val_ds = val_ds.batch(21)

In [69]:
from keras.layers import Normalization

def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature

In [70]:
from keras import layers
# Numerical features
f1 = keras.Input(shape=(1,), name="REGION-CENTROID-COL")
f2 = keras.Input(shape=(1,), name="REGION-CENTROID-ROW")
f3 = keras.Input(shape=(1,), name="REGION-PIXEL-COUNT")
f4 = keras.Input(shape=(1,), name="SHORT-LINE-DENSITY-5")
f5 = keras.Input(shape=(1,), name="SHORT-LINE-DENSITY-2")
f6 = keras.Input(shape=(1,), name="VEDGE-MEAN")
f7 = keras.Input(shape=(1,), name="VEDGE-SD")
f8 = keras.Input(shape=(1,), name="HEDGE-MEAN")
f9 = keras.Input(shape=(1,), name="HEDGE-SD")
f10 = keras.Input(shape=(1,), name="INTENSITY-MEAN")
f11 = keras.Input(shape=(1,), name="RAWRED-MEAN")
f12 = keras.Input(shape=(1,), name="RAWBLUE-MEAN")
f13 = keras.Input(shape=(1,), name="RAWGREEN-MEAN")
f14 = keras.Input(shape=(1,), name="EXRED-MEAN")
f15 = keras.Input(shape=(1,), name="EXBLUE-MEAN")
f16 = keras.Input(shape=(1,), name="EXGREEN-MEAN")
f17 = keras.Input(shape=(1,), name="VALUE-MEAN")
f18 = keras.Input(shape=(1,), name="SATURATION-MEAN")
f19 = keras.Input(shape=(1,), name="HUE-MEAN")

all_inputs = [
    f1,
    f2,
    f3,
    f4,
    f5,
    f6,
    f7,
    f8,
    f9,
    f10,
    f11,
    f12,
    f13,
    f14,
    f15,
    f16,
    f17,
    f18,
    f19,
]

f1_encoded = encode_numerical_feature(f1, "REGION-CENTROID-COL", train_ds)
f2_encoded = encode_numerical_feature(f2, "REGION-CENTROID-ROW", train_ds)
f3_encoded = encode_numerical_feature(f3, "REGION-PIXEL-COUNT", train_ds)
f4_encoded = encode_numerical_feature(f4, "SHORT-LINE-DENSITY-5", train_ds)
f5_encoded = encode_numerical_feature(f5, "SHORT-LINE-DENSITY-2", train_ds)
f6_encoded = encode_numerical_feature(f6, "VEDGE-MEAN", train_ds)
f7_encoded = encode_numerical_feature(f7, "VEDGE-SD", train_ds)
f8_encoded = encode_numerical_feature(f8, "HEDGE-MEAN", train_ds)
f9_encoded = encode_numerical_feature(f9, "HEDGE-SD", train_ds)
f10_encoded = encode_numerical_feature(f10, "INTENSITY-MEAN", train_ds)
f11_encoded = encode_numerical_feature(f11, "RAWRED-MEAN", train_ds)
f12_encoded = encode_numerical_feature(f12, "RAWBLUE-MEAN", train_ds)
f13_encoded = encode_numerical_feature(f13, "RAWGREEN-MEAN", train_ds)
f14_encoded = encode_numerical_feature(f14, "EXRED-MEAN", train_ds)
f15_encoded = encode_numerical_feature(f15, "EXBLUE-MEAN", train_ds)
f16_encoded = encode_numerical_feature(f16, "EXGREEN-MEAN", train_ds)
f17_encoded = encode_numerical_feature(f17, "VALUE-MEAN", train_ds)
f18_encoded = encode_numerical_feature(f18, "SATURATION-MEAN", train_ds)
f19_encoded = encode_numerical_feature(f19, "HUE-MEAN", train_ds)

all_features = layers.concatenate(
    [
        f1_encoded,
        f2_encoded,
        f3_encoded,
        f4_encoded,
        f5_encoded,
        f6_encoded,
        f7_encoded,
        f8_encoded,
        f9_encoded,
        f10_encoded,
        f11_encoded,
        f12_encoded,
        f13_encoded,
        f14_encoded,
        f15_encoded,
        f16_encoded,
        f17_encoded,
        f18_encoded,
        f19_encoded,
    ]
)

x = layers.Dense(32, activation="relu")(all_features)
x = layers.Dropout(0.5)(x)
output = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(all_inputs, output)
model.compile("adam", loss= tf.keras.losses.MeanSquaredError(), metrics=["accuracy"])

In [71]:
from keras.utils.vis_utils import plot_model

plot_model(model, show_shapes=True, rankdir="LR")

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [74]:
model.fit(train_ds, epochs=50, validation_data=val_ds)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x20b76f01990>

In [73]:
predictions = model.predict(val_ds.take(1))

print(predictions)

print(
    "This particular patient had a %.1f percent probability "
    "of having a heart disease, as evaluated by our model." % (100 * predictions[0][0],)
)

[[0.9965597 ]
 [0.9992046 ]
 [0.12387908]
 [0.9644068 ]
 [0.96538806]
 [0.99520004]
 [0.99712485]
 [0.99834263]
 [0.9866264 ]
 [0.97679627]
 [0.94994515]
 [0.99722564]
 [0.13252275]
 [0.9972764 ]
 [0.99631304]
 [0.9400719 ]
 [0.999343  ]
 [0.98563695]
 [0.97429013]
 [0.99939275]
 [0.9998081 ]]
This particular patient had a 99.7 percent probability of having a heart disease, as evaluated by our model.
