In [35]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input

from tensorflow.keras.utils import to_categorical
import keras_tuner as kt

In [36]:
# 데이터 로드
file_path = './wine.csv'
df = pd.read_csv(file_path)

# 결측치 확인
print("=== 결측치 현황 ===")
print(df.isnull().sum())

# 레이블 분포 확인
print("\n=== 레이블 분포 ===")
print(df['Wine'].value_counts())


=== 결측치 현황 ===
Wine                    0
Alcohol                 0
Malic.acid              0
Ash                     0
Acl                     0
Mg                      0
Phenols                 0
Flavanoids              0
Nonflavanoid.phenols    0
Proanth                 0
Color.int               0
Hue                     0
OD                      0
Proline                 0
dtype: int64

=== 레이블 분포 ===
Wine
2    71
1    59
3    48
Name: count, dtype: int64


In [37]:
# 특성과 레이블 분리
X = df.drop('Wine', axis=1).values # Pandas DataFrame을 NumPy 배열로 변환
y = df['Wine'].values

# 원핫 인코딩 수행 (y 데이터)
onehot_encoder = OneHotEncoder(sparse_output=False)
y_onehot = onehot_encoder.fit_transform(y.reshape(-1, 1))

# 훈련 및 테스트 세트 분리
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)


In [38]:
# 데이터셋 Shape 확인
print("\n=== 데이터셋 Shape ===")
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


=== 데이터셋 Shape ===
X_train shape: (142, 13)
X_test shape: (36, 13)
y_train shape: (142, 3)
y_test shape: (36, 3)


In [39]:
def build_model(hp):
    model = Sequential()
    # 첫 번째 레이어에 Input 객체 사용
    model.add(Input(shape=(X_train.shape[1],)))
    model.add(Dense(hp.Int('units_1', min_value=32, max_value=128, step=32), activation='relu'))
    model.add(Dense(hp.Int('units_2', min_value=16, max_value=64, step=16), activation='relu'))
    model.add(Dense(y_train.shape[1], activation='softmax'))

    model.compile(
        optimizer=hp.Choice('optimizer', values=['adam', 'sgd']),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

In [40]:
# Keras Tuner 설정 (하이퍼파라미터 탐색)
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    directory='my_tuning_dir',
    project_name='wine_classification'
)

# 하이퍼파라미터 탐색 수행
tuner.search(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)


Reloading Tuner from my_tuning_dir/wine_classification/tuner0.json


In [41]:
# 최적의 하이퍼파라미터 출력 및 모델 생성
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print("\nBest Hyperparameters:")
print(f"Units in first hidden layer: {best_hps.get('units_1')}")
print(f"Units in second hidden layer: {best_hps.get('units_2')}")
print(f"Optimizer: {best_hps.get('optimizer')}")



Best Hyperparameters:
Units in first hidden layer: 96
Units in second hidden layer: 16
Optimizer: adam


In [42]:
# 최적의 하이퍼파라미터로 모델 학습
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# 테스트 데이터 평가
loss, accuracy = model.evaluate(X_test, y_test)
print("\nTest Loss:", loss)
print("Test Accuracy:", accuracy)

Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.4083 - loss: 412.5561 - val_accuracy: 0.4138 - val_loss: 342.4888
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.4020 - loss: 316.2854 - val_accuracy: 0.4138 - val_loss: 219.5485
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.3662 - loss: 194.7588 - val_accuracy: 0.2759 - val_loss: 198.7416
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.3101 - loss: 187.3394 - val_accuracy: 0.2759 - val_loss: 168.9977
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.3185 - loss: 141.1823 - val_accuracy: 0.2759 - val_loss: 113.8346
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.2893 - loss: 101.0121 - val_accuracy: 0.2759 - val_loss: 45.6521
Epoch 7/50
[1m4/4[0m 

In [43]:
# 예측 수행 및 결과 확인
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1) # 예측된 클래스 인덱스
y_true_classes = np.argmax(y_test, axis=1) # 실제 클래스 인덱스

# 혼동 행렬 출력
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)
print("\nConfusion Matrix:")
print(cm)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step

Confusion Matrix:
[[13  1  0]
 [ 1 13  0]
 [ 3  5  0]]
