In [2]:
import numpy as np
import pandas as pd

import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [3]:
tf.enable_eager_execution()

### Load data

In [4]:
URL = 'https://storage.googleapis.com/applied-dl/heart.csv'
dataframe = pd.read_csv(URL)
dataframe.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0,fixed,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,normal,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,reversible,0
3,37,1,3,130,250,0,0,187,0,3.5,3,0,normal,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,normal,0


###  Split train, valid, test

In [5]:
train, test = train_test_split(dataframe, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

193 train examples
49 validation examples
61 test examples


### Create Dataset

In [6]:
def df_to_dataset(df, label, shuffle=True, batch_size=32):
    df = df.copy()
    target = df.pop(label)
    ds = tf.data.Dataset.from_tensor_slices((dict(df), target))
    ds = ds.cache()
    if shuffle:
        ds = ds.shuffle(buffer_size=len(df))
    ds = ds.batch(batch_size)
    return ds

In [7]:
tr_ds = df_to_dataset(train, 'target')
vl_ds = df_to_dataset(train, 'target', shuffle=False)
ts_ds = df_to_dataset(train, 'target', shuffle=False)

### Create Feature Columns

In [8]:
feature_columns = []

# 数值列
for header in ['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'slope', 'ca']:
    feature_columns.append(feature_column.numeric_column(header))

# 分桶列
age = feature_column.numeric_column("age")
age_buckets = feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
feature_columns.append(age_buckets)

# 分类列
thal = feature_column.categorical_column_with_vocabulary_list(
      'thal', ['fixed', 'normal', 'reversible'])
thal_one_hot = feature_column.indicator_column(thal)
feature_columns.append(thal_one_hot)

# 嵌入列
thal_embedding = feature_column.embedding_column(thal, dimension=8)
feature_columns.append(thal_embedding)

# 组合列
crossed_feature = feature_column.crossed_column([age_buckets, thal], hash_bucket_size=1000)
crossed_feature = feature_column.indicator_column(crossed_feature)
feature_columns.append(crossed_feature)

In [17]:
example_batch = next(iter(tr_ds))[0]

In [31]:
# 用于创建一个特征列
# 并转换一批次数据的一个实用程序方法
def demo(feature_column):
    feature_layer = layers.DenseFeatures(feature_column)
    print(feature_layer(example_batch).numpy().shape)

In [32]:
demo(feature_columns)

(32, 1029)


### Model

#### cross layer

In [39]:
import tensorflow.keras.backend as K

In [40]:
class CrossLayer(tf.keras.layers.Layer):
    def __init__(self, input_dim, output_dim=30, **kwargs):
        self.input_dim = input_dim
        self.output_dim = output_dim
        super(CrossLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.kernel = self.add_weight(name='kernel', 
                                      shape=(self.input_dim, self.output_dim),
                                      initializer='glorot_uniform',
                                      trainable=True)
        super(CrossLayer, self).build(input_shape)

    def call(self, x):
        a = K.pow(K.dot(x,self.kernel), 2)
        b = K.dot(K.pow(x, 2), K.pow(self.kernel, 2))
        return K.mean(a-b, 1, keepdims=True)*0.5

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim)

#### feature column 进 keras

In [41]:
class Model(tf.keras.Model):
    def __init__(self):
        super(Model, self).__init__()
        self.feat_layer = tf.keras.layers.DenseFeatures(feature_columns)
        self.linear = tf.keras.layers.Dense(1)
        self.cs = CrossLayer(1029)   ### 1029 is dimension of feature
        self.add = tf.keras.layers.Add()
        self.act = tf.keras.layers.Activation('sigmoid')       

    def call(self, x):
        x = self.feat_layer(x)
        linear = self.linear(x)
        cross = self.cs(x)
        x = self.add([linear, cross])
        x = self.act(x)
        return x

model = Model()

In [42]:
loss_object = tf.keras.losses.BinaryCrossentropy()

optimizer = tf.keras.optimizers.Adam()

In [43]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.BinaryAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.BinaryAccuracy(name='test_accuracy')

In [44]:
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images)
        labels = labels[:, tf.newaxis]
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)
    
@tf.function
def test_step(images, labels):
    predictions = model(images)
    labels = labels[:, tf.newaxis]
    t_loss = loss_object(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)    

In [45]:
EPOCHS = 5

for epoch in range(EPOCHS):
    # 在下一个epoch开始时，重置评估指标
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    for images, labels in tr_ds:
        train_step(images, labels)
    
    for images, labels in vl_ds:
        test_step(images, labels) 

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print (template.format(epoch+1,
                         train_loss.result(),
                         train_accuracy.result()*100,
                         test_loss.result(),
                         test_accuracy.result()*100))

Epoch 1, Loss: 2.5254440307617188, Accuracy: 59.06735610961914, Test Loss: 0.680488646030426, Test Accuracy: 69.94818115234375
Epoch 2, Loss: 1.1131755113601685, Accuracy: 73.05699920654297, Test Loss: 1.390129804611206, Test Accuracy: 71.50259399414062
Epoch 3, Loss: 1.0128390789031982, Accuracy: 70.98445892333984, Test Loss: 0.8251709342002869, Test Accuracy: 71.50259399414062
Epoch 4, Loss: 0.7121309638023376, Accuracy: 73.05699920654297, Test Loss: 0.5672151446342468, Test Accuracy: 77.20207214355469
Epoch 5, Loss: 0.6961004137992859, Accuracy: 74.61140441894531, Test Loss: 0.529730498790741, Test Accuracy: 77.72020721435547
