In [None]:
!pip install sklearn

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('Heart.csv') # 데이터 불러오기
df.head()
print(df.shape)

(303, 15)


In [None]:
train, test = train_test_split(df, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train set')
print(len(test), 'test set')
print(len(val), 'val set')
    # 데이터프레임 분류(train, test, val)

193 train set
61 test set
49 val set


In [None]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):  # 데이터프래임을 데이터셋으로 변환
  dataframe = df.copy()
  labels = dataframe.pop('AHD')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [None]:
batch_size = 5
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [None]:
for feature_batch, label_batch in train_ds.take(1):
  print('전체 특성: ', list(feature_batch.keys()))
  print('나이 특성의 배치: ', feature_batch['Age'])
  print('타깃의 배치: ', label_batch)

전체 특성:  ['No', 'Age', 'Sex', 'ChestPain', 'RestBP', 'Chol', 'Fbs', 'RestECG', 'MaxHR', 'ExAng', 'Oldpeak', 'Slope', 'Ca', 'Thal']
나이 특성의 배치:  tf.Tensor([56 54 53 47 74], shape=(5,), dtype=int64)
타깃의 배치:  tf.Tensor([1 1 0 0 0], shape=(5,), dtype=int64)


In [None]:
example_batch = next(iter(train_ds))[0]

def demo(feature_column):
  feature_layer = layers.DenseFeatures(feature_column)
  print(feature_layer(example_batch).numpy())

In [None]:
age = feature_column.numeric_column("Age")
demo(age)

[[42.]
 [52.]
 [56.]
 [41.]
 [54.]
 [67.]
 [58.]
 [42.]
 [47.]
 [57.]
 [69.]
 [50.]
 [46.]
 [63.]
 [51.]
 [42.]
 [65.]
 [40.]
 [56.]
 [60.]
 [53.]
 [57.]
 [64.]
 [46.]
 [55.]
 [52.]
 [58.]
 [54.]
 [60.]
 [45.]
 [70.]
 [41.]]


In [None]:
age_buckets = feature_column.bucketized_column(age, boundaries=[18,
25, 30, 35, 40, 45, 50, 55, 60, 65])  # represent in one-hot encoding type
demo(age_buckets)

[[0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]


In [None]:
thal = feature_column.categorical_column_with_vocabulary_list(
'Thal'
, ['fixed', 'normal', 'reversable'])
thal_one_hot = feature_column.indicator_column(thal)
demo(thal_one_hot)

[[0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]]


In [None]:
thal_embedding = feature_column.embedding_column(thal, dimension=8) # embedding column
demo(thal_embedding)  # represent by dense vector

[[-0.19265212  0.18598749  0.43184954  0.50161886 -0.24444287 -0.10962177
  -0.16754857 -0.2681168 ]
 [-0.19265212  0.18598749  0.43184954  0.50161886 -0.24444287 -0.10962177
  -0.16754857 -0.2681168 ]
 [-0.296158    0.0826901  -0.05799363  0.24914083  0.5414035  -0.10875078
   0.18227662 -0.00492234]
 [-0.296158    0.0826901  -0.05799363  0.24914083  0.5414035  -0.10875078
   0.18227662 -0.00492234]
 [-0.19265212  0.18598749  0.43184954  0.50161886 -0.24444287 -0.10962177
  -0.16754857 -0.2681168 ]]


In [None]:
crossed_feature = feature_column.crossed_column([age_buckets, thal],
hash_bucket_size=10) # hash_bucket_size = 1000
demo(feature_column.indicator_column(crossed_feature))

[[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]


In [None]:
feature_columns = []

# 수치형 열
for header in ['Age', 'RestBP', 'Chol', 'MaxHR', 'Oldpeak', 'Slope', 'Ca']:
  feature_columns.append(feature_column.numeric_column(header))

# 버킷형 열
age_buckets = feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50,
55, 60, 65])
feature_columns.append(age_buckets)

# 범주형 열
thal = feature_column.categorical_column_with_vocabulary_list(
'Thal', ['fixed', 'normal', 'reversible'])
thal_one_hot = feature_column.indicator_column(thal)
feature_columns.append(thal_one_hot)

# 임베딩 열
thal_embedding = feature_column.embedding_column(thal, dimension=8)
feature_columns.append(thal_embedding)

# 교차 특성 열
crossed_feature = feature_column.crossed_column([age_buckets, thal], hash_bucket_size=1000)
crossed_feature = feature_column.indicator_column(crossed_feature)
feature_columns.append(crossed_feature)

print(feature_columns)

[NumericColumn(key='Age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='RestBP', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='Chol', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='MaxHR', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='Oldpeak', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='Slope', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='Ca', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), BucketizedColumn(source_column=NumericColumn(key='Age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(18, 25, 30, 35, 40, 45, 50, 55, 60, 65)), IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='Thal', vocabulary_list=('fixed', 'normal', 'reversible'), dtype

In [None]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [None]:
batch_size = 32
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [None]:
model = tf.keras.Sequential([
  feature_layer,  # input layer
  layers.Dense(128, activation='relu'), # hidden layer(nodes, activation_func)
  layers.Dense(128, activation='relu'),
  layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # set learning options

In [None]:
model.fit(train_ds, validation_data=val_ds, epochs=500)

In [None]:
loss, accuracy = model.evaluate(test_ds)
print("정확도: ", accuracy)

정확도:  0.9867987036705017


--------------------------------------------


In [None]:
dataframe_iris = pd.read_csv('iris.csv')
dataframe_iris.head()
print(dataframe_iris.shape)

(100, 5)


In [None]:
iris_train, iris_test = train_test_split(dataframe_iris, test_size=0.2)
iris_train, iris_val = train_test_split(iris_train, test_size=0.2)
print(iris_train)
print(iris_test)
print(iris_val)

    SepalLength  SepalWidth  PetalLength  PetalWidth  Name
2           4.7         3.2          1.3         0.2     0
84          5.4         3.0          4.5         1.5     1
52          6.9         3.1          4.9         1.5     1
46          5.1         3.8          1.6         0.2     0
38          4.4         3.0          1.3         0.2     0
..          ...         ...          ...         ...   ...
69          5.6         2.5          3.9         1.1     1
89          5.5         2.5          4.0         1.3     1
43          5.0         3.5          1.6         0.6     0
31          5.4         3.4          1.5         0.4     0
15          5.7         4.4          1.5         0.4     0

[64 rows x 5 columns]
    SepalLength  SepalWidth  PetalLength  PetalWidth  Name
97          6.2         2.9          4.3         1.3     1
42          4.4         3.2          1.3         0.2     0
19          5.1         3.8          1.5         0.3     0
62          6.0         2.2      

In [None]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):  # 데이터프래임을 데이터셋으로 변환
  dataframe = dataframe_iris.copy()
  labels = dataframe.pop('Name')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [None]:
iris_train_ds = df_to_dataset(iris_train)
iris_test_ds = df_to_dataset(iris_test, shuffle=False)
iris_val_ds = df_to_dataset(iris_val, shuffle=False)

In [None]:
example_batch = next(iter(iris_train_ds))[0]

def demo(feature_column):
  feature_layer = layers.DenseFeatures(feature_column)
  print(feature_layer(example_batch).numpy())

In [None]:
feature_columns = []

for header in ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']:
  feature_columns.append(feature_column.numeric_column(header))

print(feature_columns)

[NumericColumn(key='SepalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='SepalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]


In [None]:
feature_layer = layers.DenseFeatures(feature_columns)

In [None]:
model_iris = tf.keras.Sequential([
  feature_layer,
  layers.Dense(128, activation='relu'),
  layers.Dense(128, activation='relu'),
  layers.Dense(1, activation='sigmoid')
])

In [None]:
model_iris.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model_iris.fit(iris_train_ds,
validation_data=iris_val_ds,
epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f3010c30f50>