In [2]:
from IPython.display import display, HTML
display(HTML("""
<style>
div.container{width:90% !important;}
div.cell.code_cell.rendered{width:100%;}
div.input_prompt{padding:0px;}
div.CodeMirror {font-family:Consolas; font-size:12pt;}
div.text_cell_render.rendered_html{font-size:12pt;}
div.output {font-size:12pt; font-weight:bold;}
div.input {font-family:Consolas; font-size:12pt;}
div.prompt {min-width:70px;}
div#toc-wrapper{padding-top:120px;}
div.text_cell_render ul li{font-size:12pt;padding:5px;}
table.dataframe{font-size:12px;}
</style>
"""))

In [5]:
from tensorflow.keras.utils import to_categorical # 분류분석시 원핫인코딩(추천)
import pandas as pd # 원핫인코딩
from tensorflow.keras.models import Sequential, load_model # 모델 생성, 모델 load
from tensorflow.keras.layers import Dense, Input
import numpy as np

# 딥러닝 : 데이터의 규칙성을 추출
- 응용분야 : 글씨인식, 문장분류, 예측, 질병진단, 얼굴인식
- ML/DL의 종류
   * 지도학습(독립=입력변수, 타겟=종속변수)
       * 분류분석(타겟변수가 category 변수인 분석. 다중분류 vs 이진분류)
   * 비지도학습
       * 군집화
   * 강화학습
- 지도학습에서의 딥러닝 프로그래밍 방식
    - 1. 데이터 확보 및 생성 
    - 2. 데이터 전처리 : 스케일 조정, 훈련데이터셋(학습데이터셋), 검증데이터셋, 시험데이터셋, 원핫인코딩
    - 3. 모델구성
    - 4. 모델 학습 과정 설정
    - 5. 모델 학습시키기(학습,검증데이터셋)
    - 6. 모델 평가(시험데이터셋)
    - 7. 모델 저장/사용(입력값 주면 예측값 받기)

# 1. 데이터 셋 생성
# 2. 데이터 전처리

In [9]:
# 학습데이터 = 훈련데이터셋
x_train = np.array([1,2,3,4,5,6,7,8,9]*10)
y_train = np.array([2,4,6,8,10,12,14,16,18]*10)
# 검증데이터, 테스트셋
x_val = np.array([1,2,3,4,5,6,7,8,9])
y_val = np.array([2,4,6,8,10,12,14,16,18])

In [19]:
# 인코딩 종류 
    # 라벨인코딩=문자를숫자로
    # 원핫인코딩= 값의 개수만큼 열을 만들고, 해당 범주에 해당하는 열에만 1을, 나머지를 0을.
data = np.array(['a','b','c','b','b'])
print('원 데이터 :', data)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
labeling_data = le.fit_transform(data)
print('라벨인코딩된 데이터 :', labeling_data)
one_hot_encoding_data = to_categorical(labeling_data)
print('원핫인코딩된 데이터 :\n ', one_hot_encoding_data)

원 데이터 : ['a' 'b' 'c' 'b' 'b']
라벨인코딩된 데이터 : [0 1 2 1 1]
원핫인코딩된 데이터 :
  [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]]


In [23]:
# 원핫인코딩(분류분석을 위한 target변수)
data = np.array([1, 4, 5, 4, 5])
categorical_onehot = to_categorical(data) # 원핫인코딩 결과가 numpy 배열
print('to_categorical 이용 \n', categorical_onehot)
getdummies_onehot = pd.get_dummies(data) # 원핫인코딩 결과가 DataFrame
print('get_dummies 이용\n', getdummies_onehot)
print(type(categorical_onehot), type(getdummies_onehot))

to_categorical 이용 
 [[0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]]
get_dummies 이용
    1  4  5
0  1  0  0
1  0  1  0
2  0  0  1
3  0  1  0
4  0  0  1
<class 'numpy.ndarray'> <class 'pandas.core.frame.DataFrame'>


In [30]:
# 분류분석을 위한 target변수 원핫인코딩
Y_train = to_categorical(y_train, 19)
Y_val   = to_categorical(y_val)

In [31]:
x_train.shape, Y_train.shape, x_train.shape, Y_train.shape

((90,), (90, 19), (90,), (90, 19))

# 3. 모델구성

In [32]:
model = Sequential()
model.add(Input(shape=(1,)))
model.add(Dense(38, activation='sigmoid')) #활성화 함수 : relu, elu, tanh, sigmoid
model.add(Dense(64, activation='elu'))
model.add(Dense(32, activation='elu'))
model.add(Dense(19, activation='softmax'))
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 38)                76        
                                                                 
 dense_1 (Dense)             (None, 64)                2496      
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 19)                627       
                                                                 
Total params: 5,279
Trainable params: 5,279
Non-trainable params: 0
_________________________________________________________________


# 4. 모델 학습과정 설정
- 회기분석에서의 loss : mse, rmse, mae
- 다중 분류분석에서의 loss: categorical_crossentropy


In [35]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])

# 5. 모델 학습시키기

In [37]:
hist = model.fit(x_train, Y_train, epochs=300, batch_size=10,
                validation_data=(x_val, Y_val), verbose=2)
# verbose=0:출력없음 / 1:기본값출력 / 2:에포크별 자세한 로그 출력

Epoch 1/300
9/9 - 1s - loss: 2.9418 - accuracy: 0.1222 - val_loss: 2.7584 - val_accuracy: 0.1111 - 882ms/epoch - 98ms/step
Epoch 2/300
9/9 - 0s - loss: 2.6923 - accuracy: 0.1778 - val_loss: 2.5716 - val_accuracy: 0.2222 - 46ms/epoch - 5ms/step
Epoch 3/300
9/9 - 0s - loss: 2.5121 - accuracy: 0.2222 - val_loss: 2.4218 - val_accuracy: 0.2222 - 48ms/epoch - 5ms/step
Epoch 4/300
9/9 - 0s - loss: 2.3754 - accuracy: 0.2000 - val_loss: 2.2963 - val_accuracy: 0.2222 - 58ms/epoch - 6ms/step
Epoch 5/300
9/9 - 0s - loss: 2.2565 - accuracy: 0.2000 - val_loss: 2.1859 - val_accuracy: 0.3333 - 40ms/epoch - 4ms/step
Epoch 6/300
9/9 - 0s - loss: 2.1541 - accuracy: 0.2889 - val_loss: 2.0973 - val_accuracy: 0.2222 - 59ms/epoch - 7ms/step
Epoch 7/300
9/9 - 0s - loss: 2.0795 - accuracy: 0.2222 - val_loss: 2.0216 - val_accuracy: 0.1111 - 42ms/epoch - 5ms/step
Epoch 8/300
9/9 - 0s - loss: 2.0071 - accuracy: 0.1667 - val_loss: 1.9587 - val_accuracy: 0.2222 - 51ms/epoch - 6ms/step
Epoch 9/300
9/9 - 0s - loss: 1

Epoch 69/300
9/9 - 0s - loss: 0.4480 - accuracy: 0.9556 - val_loss: 0.4278 - val_accuracy: 1.0000 - 48ms/epoch - 5ms/step
Epoch 70/300
9/9 - 0s - loss: 0.4515 - accuracy: 0.9222 - val_loss: 0.4134 - val_accuracy: 0.8889 - 45ms/epoch - 5ms/step
Epoch 71/300
9/9 - 0s - loss: 0.4111 - accuracy: 0.9778 - val_loss: 0.3977 - val_accuracy: 1.0000 - 44ms/epoch - 5ms/step
Epoch 72/300
9/9 - 0s - loss: 0.4116 - accuracy: 0.9556 - val_loss: 0.3893 - val_accuracy: 1.0000 - 51ms/epoch - 6ms/step
Epoch 73/300
9/9 - 0s - loss: 0.3947 - accuracy: 1.0000 - val_loss: 0.3802 - val_accuracy: 1.0000 - 52ms/epoch - 6ms/step
Epoch 74/300
9/9 - 0s - loss: 0.3862 - accuracy: 1.0000 - val_loss: 0.3671 - val_accuracy: 1.0000 - 51ms/epoch - 6ms/step
Epoch 75/300
9/9 - 0s - loss: 0.3730 - accuracy: 1.0000 - val_loss: 0.3586 - val_accuracy: 1.0000 - 48ms/epoch - 5ms/step
Epoch 76/300
9/9 - 0s - loss: 0.3640 - accuracy: 1.0000 - val_loss: 0.3517 - val_accuracy: 1.0000 - 52ms/epoch - 6ms/step
Epoch 77/300
9/9 - 0s - 

Epoch 136/300
9/9 - 0s - loss: 0.0622 - accuracy: 1.0000 - val_loss: 0.0589 - val_accuracy: 1.0000 - 47ms/epoch - 5ms/step
Epoch 137/300
9/9 - 0s - loss: 0.0605 - accuracy: 1.0000 - val_loss: 0.0591 - val_accuracy: 1.0000 - 46ms/epoch - 5ms/step
Epoch 138/300
9/9 - 0s - loss: 0.0600 - accuracy: 1.0000 - val_loss: 0.0561 - val_accuracy: 1.0000 - 38ms/epoch - 4ms/step
Epoch 139/300
9/9 - 0s - loss: 0.0562 - accuracy: 1.0000 - val_loss: 0.0548 - val_accuracy: 1.0000 - 41ms/epoch - 5ms/step
Epoch 140/300
9/9 - 0s - loss: 0.0548 - accuracy: 1.0000 - val_loss: 0.0533 - val_accuracy: 1.0000 - 41ms/epoch - 5ms/step
Epoch 141/300
9/9 - 0s - loss: 0.0526 - accuracy: 1.0000 - val_loss: 0.0525 - val_accuracy: 1.0000 - 55ms/epoch - 6ms/step
Epoch 142/300
9/9 - 0s - loss: 0.0522 - accuracy: 1.0000 - val_loss: 0.0508 - val_accuracy: 1.0000 - 42ms/epoch - 5ms/step
Epoch 143/300
9/9 - 0s - loss: 0.0501 - accuracy: 1.0000 - val_loss: 0.0483 - val_accuracy: 1.0000 - 39ms/epoch - 4ms/step
Epoch 144/300
9/

Epoch 203/300
9/9 - 0s - loss: 0.0127 - accuracy: 1.0000 - val_loss: 0.0125 - val_accuracy: 1.0000 - 47ms/epoch - 5ms/step
Epoch 204/300
9/9 - 0s - loss: 0.0124 - accuracy: 1.0000 - val_loss: 0.0122 - val_accuracy: 1.0000 - 34ms/epoch - 4ms/step
Epoch 205/300
9/9 - 0s - loss: 0.0121 - accuracy: 1.0000 - val_loss: 0.0120 - val_accuracy: 1.0000 - 53ms/epoch - 6ms/step
Epoch 206/300
9/9 - 0s - loss: 0.0120 - accuracy: 1.0000 - val_loss: 0.0118 - val_accuracy: 1.0000 - 42ms/epoch - 5ms/step
Epoch 207/300
9/9 - 0s - loss: 0.0117 - accuracy: 1.0000 - val_loss: 0.0115 - val_accuracy: 1.0000 - 39ms/epoch - 4ms/step
Epoch 208/300
9/9 - 0s - loss: 0.0116 - accuracy: 1.0000 - val_loss: 0.0114 - val_accuracy: 1.0000 - 55ms/epoch - 6ms/step
Epoch 209/300
9/9 - 0s - loss: 0.0114 - accuracy: 1.0000 - val_loss: 0.0112 - val_accuracy: 1.0000 - 39ms/epoch - 4ms/step
Epoch 210/300
9/9 - 0s - loss: 0.0113 - accuracy: 1.0000 - val_loss: 0.0110 - val_accuracy: 1.0000 - 56ms/epoch - 6ms/step
Epoch 211/300
9/

Epoch 270/300
9/9 - 0s - loss: 0.0047 - accuracy: 1.0000 - val_loss: 0.0046 - val_accuracy: 1.0000 - 35ms/epoch - 4ms/step
Epoch 271/300
9/9 - 0s - loss: 0.0046 - accuracy: 1.0000 - val_loss: 0.0046 - val_accuracy: 1.0000 - 33ms/epoch - 4ms/step
Epoch 272/300
9/9 - 0s - loss: 0.0046 - accuracy: 1.0000 - val_loss: 0.0045 - val_accuracy: 1.0000 - 37ms/epoch - 4ms/step
Epoch 273/300
9/9 - 0s - loss: 0.0045 - accuracy: 1.0000 - val_loss: 0.0044 - val_accuracy: 1.0000 - 52ms/epoch - 6ms/step
Epoch 274/300
9/9 - 0s - loss: 0.0045 - accuracy: 1.0000 - val_loss: 0.0044 - val_accuracy: 1.0000 - 48ms/epoch - 5ms/step
Epoch 275/300
9/9 - 0s - loss: 0.0044 - accuracy: 1.0000 - val_loss: 0.0043 - val_accuracy: 1.0000 - 40ms/epoch - 4ms/step
Epoch 276/300
9/9 - 0s - loss: 0.0043 - accuracy: 1.0000 - val_loss: 0.0043 - val_accuracy: 1.0000 - 57ms/epoch - 6ms/step
Epoch 277/300
9/9 - 0s - loss: 0.0043 - accuracy: 1.0000 - val_loss: 0.0042 - val_accuracy: 1.0000 - 59ms/epoch - 7ms/step
Epoch 278/300
9/