In [6]:
# 기본
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 경고가 뜨지 않도록..
import warnings
warnings.filterwarnings('ignore')

# 그래프 설정
plt.rcParams['font.family'] = 'Malgun Gothic'
# plt.rcParams['font.family'] = 'AppleGothic'
plt.rcParams['font.size'] = 16
plt.rcParams['figure.figsize'] = 20, 10
plt.rcParams['axes.unicode_minus'] = False

# 저장
import pickle

# 딥러닝
import tensorflow as tf

# 딥러닝 신경망 구조를 정의하는 것.
from tensorflow.keras.models import Sequential
# 층 구조를 정의하는 것
from tensorflow.keras.layers import Dense
# 활성화 함수를 정의하는 것
from tensorflow.keras.layers import Activation

# 평가함수
# 분류용
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

# 회귀용
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

# 현재 프로젝트를 GPU에 할당한다.
# 사용한 가능한 GPU 목록을 가져온다.
gpus = tf.config.experimental.list_physical_devices('GPU')
# gpu가 있다면...
if len(gpus) > 0 :
    try :
        for gpu in gpus :
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e :
        print(e)


In [7]:
# 수술 환자 데이터를 읽어온다
# 첫 번째 줄부터 데이터
# 제일 마지막 컬럼이 결과, 1은 생존, 0은 사망

df1 = pd.read_csv('data/ThoraricSurgery.csv', header=None)
df1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,293,1,3.80,2.80,0,0,0,0,0,0,12,0,0,0,1,0,62,0
1,1,2,2.88,2.16,1,0,0,0,1,1,14,0,0,0,1,0,60,0
2,8,2,3.19,2.50,1,0,0,0,1,0,11,0,0,1,1,0,66,1
3,14,2,3.98,3.06,2,0,0,0,1,1,14,0,0,0,1,0,80,1
4,17,2,2.21,1.88,0,0,1,0,0,0,12,0,0,0,1,0,56,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
465,98,6,3.04,2.40,2,0,0,0,1,0,11,0,0,0,1,0,76,0
466,369,6,3.88,2.72,1,0,0,0,1,0,12,0,0,0,1,0,77,0
467,406,6,5.36,3.96,1,0,0,0,1,0,12,0,0,0,0,0,62,0
468,25,8,4.32,3.20,0,0,0,0,0,0,11,0,0,0,0,0,58,1


In [8]:
# 첫 번째 컬럼은 환자 일련 번호이므로 제거한다.
df1.drop(0, axis=1, inplace=True)
df1


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,1,3.80,2.80,0,0,0,0,0,0,12,0,0,0,1,0,62,0
1,2,2.88,2.16,1,0,0,0,1,1,14,0,0,0,1,0,60,0
2,2,3.19,2.50,1,0,0,0,1,0,11,0,0,1,1,0,66,1
3,2,3.98,3.06,2,0,0,0,1,1,14,0,0,0,1,0,80,1
4,2,2.21,1.88,0,0,1,0,0,0,12,0,0,0,1,0,56,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
465,6,3.04,2.40,2,0,0,0,1,0,11,0,0,0,1,0,76,0
466,6,3.88,2.72,1,0,0,0,1,0,12,0,0,0,1,0,77,0
467,6,5.36,3.96,1,0,0,0,1,0,12,0,0,0,0,0,62,0
468,8,4.32,3.20,0,0,0,0,0,0,11,0,0,0,0,0,58,1


In [9]:
# 입력과 결과로 나눈다.
X = df1.drop(17, axis=1)
y = df1[17]

display(X)
display(y)


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,1,3.80,2.80,0,0,0,0,0,0,12,0,0,0,1,0,62
1,2,2.88,2.16,1,0,0,0,1,1,14,0,0,0,1,0,60
2,2,3.19,2.50,1,0,0,0,1,0,11,0,0,1,1,0,66
3,2,3.98,3.06,2,0,0,0,1,1,14,0,0,0,1,0,80
4,2,2.21,1.88,0,0,1,0,0,0,12,0,0,0,1,0,56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
465,6,3.04,2.40,2,0,0,0,1,0,11,0,0,0,1,0,76
466,6,3.88,2.72,1,0,0,0,1,0,12,0,0,0,1,0,77
467,6,5.36,3.96,1,0,0,0,1,0,12,0,0,0,0,0,62
468,8,4.32,3.20,0,0,0,0,0,0,11,0,0,0,0,0,58


0      0
1      0
2      1
3      1
4      0
      ..
465    0
466    0
467    0
468    1
469    0
Name: 17, Length: 470, dtype: int64

In [10]:
# 텐서플로의 랜덤시드를 고정한다
# 학습을 할 때 행을 랜덤하게 섞는다
# 이때, 랜덤 패턴을 항상 일정하게 유지시켜줌
# 작업을 할때, 신경망 구조를 다시 설계해야 하는 경우가 발생. 
tf.random.set_seed(1)

In [12]:
# 신경망 구조를 설계한다.
model = Sequential()
# 컬럼의 개수
a1 = X.shape[1] 

# model.add : 신경창의 층을 쌓는 함수
# Dense : 신경망의 층을 정의힌다. 첫 번째 은닉층은 입력 노드의 개수를 반드시 설정해야 하고, 마지막 것이 출력된다.

# 첫 번째 은닉층
# 첫 번째 은닉층은 입력 층의 노드의 개수를 설정해줘야 한다. 
# input_dim : 입력층의 노드의 개수, 컬럼의 수
model.add(Dense(30, input_dim=a1)) 
model.add(Activation('relu'))

# 출력층
model.add(Dense(1)) # 층 1개
model.add(Activation('sigmoid'))


In [13]:
# 개발자가 설계한 것을 토대로 신경망 모델을 생성한다.
model.compile(loss='mean_squared_error', optimizer='adam', 
              metrics=['accuracy'])
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 30)                510       
                                                                 
 activation (Activation)     (None, 30)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 31        
                                                                 
 activation_1 (Activation)   (None, 1)                 0         
                                                                 
Total params: 541
Trainable params: 541
Non-trainable params: 0
_________________________________________________________________


In [14]:
# 학습한다
model.fit(X,y, epochs=30, batch_size=10)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1ff816b7d60>

In [15]:
# 예측한다 (0 ~ 1 값으로 나옴. 0.5보다 작으면 0, 0.5 보다 크면 1)
pred1 = model.predict(X)
pred1

array([[4.09516320e-02],
       [1.41028538e-01],
       [4.53943238e-02],
       [4.63128723e-02],
       [1.15569621e-01],
       [6.31197244e-02],
       [3.03277876e-02],
       [6.28886148e-02],
       [1.00683123e-01],
       [4.14631441e-02],
       [5.50865196e-02],
       [6.22694977e-02],
       [5.98248653e-02],
       [4.01025340e-02],
       [3.99067517e-08],
       [8.13721269e-02],
       [7.82130212e-02],
       [7.15645775e-02],
       [3.98076065e-02],
       [9.07327160e-02],
       [1.06360234e-01],
       [1.04443692e-01],
       [1.39712706e-01],
       [9.70158502e-02],
       [1.37074918e-01],
       [4.96457107e-02],
       [2.40984753e-01],
       [4.35505509e-02],
       [7.44341984e-02],
       [3.72139886e-02],
       [7.73982257e-02],
       [1.02481276e-01],
       [1.26715571e-01],
       [1.11024857e-01],
       [1.72299042e-01],
       [5.13360732e-08],
       [4.36217003e-02],
       [8.58911127e-02],
       [9.56807509e-02],
       [1.51690409e-01],


In [16]:
# 결과가 0.5보다 크면 1로 변환한다.
result1 = (pred1 > 0.5).astype('int')
print(result1)


[[0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]


In [17]:
# 진자 결과와 얼마나 같은지 확인.
score1 = accuracy_score(y, result1)
score1


0.851063829787234