## 다중 분류 - wine 데이터

In [4]:
import numpy as np
import matplotlib.pyplot as pit
import tensorflow as tf
seed = 2023
np.random.seed(seed)
tf.random.set_seed(seed)

In [6]:
tf.__version__

'2.14.0'

- 데이터 전처리

In [None]:
from sklearn.datasets import load_wine
wine = load_wine()

In [10]:
# X 데이터는 표준화
from sklearn.preprocessing import StandardScaler
wine_std = StandardScaler().fit_transform(wine.data)

In [12]:
# y데이터
np.unique(wine.target, return_counts=True)


(array([0, 1, 2]), array([59, 71, 48], dtype=int64))

In [15]:
# y값이 3가지 이상일 경우에는 반드시 One-hot encoding을 해주어야 함
from tensorflow.keras.utils import to_categorical
Y = to_categorical(wine.target)
Y.shape, Y[:5]

((178, 3),
 array([[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]], dtype=float32))

In [16]:
# 데이터셋 분리
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(
    wine_std, Y , stratify=Y , test_size=0.2,random_state=seed
)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((142, 13), (36, 13), (142, 3), (36, 3))

- 모델 정의 / 설정/ 학습/ 평가

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [23]:
model = Sequential([
    Dense(30, input_dim=13, activation='relu'),
    Dense(12, activation='relu'),
    Dense(3,  activation='softmax')         # 노드수는 y의 종류 갯수, 활성화 함수는 softmax
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 30)                420       
                                                                 
 dense_2 (Dense)             (None, 12)                372       
                                                                 
 dense_3 (Dense)             (None, 3)                 39        
                                                                 
Total params: 831 (3.25 KB)
Trainable params: 831 (3.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [24]:
# 다중 분류시 손실 함수는 categorical_crossentropy
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [25]:
# 학습
hist = model.fit(X_train, Y_train, validation_split=0.2,
                 epochs=100, batch_size=100,verbose=0)


In [27]:
# 평가
model.evaluate(X_test,Y_test)



[0.04562778025865555, 1.0]

In [28]:
# 예측
pred = model.predict(X_test)
pred[:5]



array([[1.1146919e-02, 5.8404589e-03, 9.8301262e-01],
       [9.9993658e-01, 1.4910786e-05, 4.8456157e-05],
       [5.0535095e-03, 9.8996532e-01, 4.9810633e-03],
       [9.9787509e-01, 1.6375961e-03, 4.8730944e-04],
       [2.1062842e-02, 9.7835177e-01, 5.8535516e-04]], dtype=float32)

- 학습과정 시각화

In [30]:
y_acc = hist.history['accuracy']
y_vloss = hist.history['val_loss']
xs = np.arange(1, len(y_acc)+1)

In [31]:
plt.figure(figsize=(12,6))
plt.plot(xs, y_loss, label='train loss')
plt.plot(xs, y_vloss, label='validation loss')
plt.legend(), plt.grid(), plt.xlabel('epoch')
plt.show()

NameError: name 'plt' is not defined