In [1]:
import keras
import numpy as np
import tensorflow as tf
keras.__version__

Using TensorFlow backend.


'2.3.1'

In [2]:
# 실행할 때마다 같은 결과를 출력하기 위해 설정하는 부분
np.random.seed(3)
tf.random.set_seed(3)

In [3]:
def MinMaxScaler(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    # noise term prevents the zero division
    return numerator / (denominator + 1e-7)

In [4]:
xy = np.loadtxt('ThoraricSurgery.csv', delimiter=',', dtype=np.float32)

In [5]:
xy = MinMaxScaler(xy)
print(xy)

[[0.6226013  0.         0.4855967  ... 0.         0.6212121  0.        ]
 [0.         0.14285715 0.2962963  ... 0.         0.59090906 0.        ]
 [0.01492537 0.14285715 0.3600823  ... 0.         0.6818182  0.9999999 ]
 ...
 [0.86353946 0.71428573 0.80658436 ... 0.         0.6212121  0.        ]
 [0.05117271 1.         0.5925926  ... 0.         0.56060606 0.9999999 ]
 [0.9509595  1.         0.7736625  ... 0.         0.42424244 0.        ]]


In [6]:
x_train = xy[:400, 0:-1]
y_train = xy[:400, [-1]]

x_test = xy[400:, 0:-1]
y_test = xy[400:, [-1]]

print(x_train.shape, x_test.shape) 

(400, 17) (70, 17)


In [7]:
from keras import models
from keras import layers
from keras import regularizers

def build_model():
    model = models.Sequential()
    model.add(layers.Dense(30,kernel_regularizer=regularizers.l2(0.001),
                          activation='relu',input_shape=(17,)))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
    return model

In [8]:
# k-겹 교차 검증
# x_train, y_train을 다시 train data(partial) 300개, validation data 100개로 나눈다.
k = 4
num_val_samples = len(x_train) // k
print(num_val_samples)         # validation data: 100개
num_epochs = 100
all_scores = []
for i in range(k):
    print('처리중인 폴드 #', i)
    # 검증 데이터 준비: k번째 분할
    val_data = x_train[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = y_train[i * num_val_samples: (i + 1) * num_val_samples]

    # 훈련 데이터 준비
    partial_train_data = np.concatenate(
        [x_train[:i * num_val_samples],
         x_train[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = np.concatenate(
        [y_train[:i * num_val_samples],
         y_train[(i + 1) * num_val_samples:]],
        axis=0)

    # 케라스 모델 구성
    model = build_model()
    # 모델 훈련
    model.fit(partial_train_data, partial_train_targets,
              epochs=num_epochs, batch_size=1, verbose=0)
    # 검증 세트로 모델 평가
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)

100
처리중인 폴드 # 0
처리중인 폴드 # 1
처리중인 폴드 # 2
처리중인 폴드 # 3


In [9]:
all_scores

[0.800000011920929, 0.8500000238418579, 0.8500000238418579, 0.9200000166893005]

In [10]:
np.mean(all_scores)

0.8550000190734863

In [11]:
# 최종 모델 훈련
model = build_model()
model.fit(x_train, y_train, epochs=100, batch_size=10)
results=model.evaluate(x_test, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [12]:
results

[0.17457109221390316, 0.7857142686843872]