In [9]:
# 딥러닝 기반 와인 데이터 분류 (Wine.csv from GitHub)

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [11]:
# 1. 데이터 로딩
url = "https://raw.githubusercontent.com/MyungKyuYi/AI-class/refs/heads/main/wine.csv"
df = pd.read_csv(url)
print("✅ 데이터 로딩 완료")
print(df.head())

✅ 데이터 로딩 완료
   Wine  Alcohol  Malic.acid   Ash   Acl   Mg  Phenols  Flavanoids  \
0     1    14.23        1.71  2.43  15.6  127     2.80        3.06   
1     1    13.20        1.78  2.14  11.2  100     2.65        2.76   
2     1    13.16        2.36  2.67  18.6  101     2.80        3.24   
3     1    14.37        1.95  2.50  16.8  113     3.85        3.49   
4     1    13.24        2.59  2.87  21.0  118     2.80        2.69   

   Nonflavanoid.phenols  Proanth  Color.int   Hue    OD  Proline  
0                  0.28     2.29       5.64  1.04  3.92     1065  
1                  0.26     1.28       4.38  1.05  3.40     1050  
2                  0.30     2.81       5.68  1.03  3.17     1185  
3                  0.24     2.18       7.80  0.86  3.45     1480  
4                  0.39     1.82       4.32  1.04  2.93      735  


In [13]:
# 2. X, y 분리 (Wine 컬럼이 target이고, 1~3 → 0~2로 변환 필요)
X = df.drop(columns=["Wine"])
y = tf.keras.utils.to_categorical(df["Wine"] - 1, num_classes=3)


In [15]:
# 3. 데이터 분할 및 스케일링
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [17]:
# 4. 모델 구성(하이퍼파라미터 튜닝)
model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.1))  # 과적합 방지용
model.add(Dense(16, activation='relu'))
model.add(Dense(3, activation='softmax'))  # 다중 클래스 분류


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [19]:
# 5. 컴파일
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [21]:
# 6. 조기 종료 콜백 설정
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [23]:
# 7. 학습
history = model.fit(
    X_train, y_train,
    validation_split=0.1,   # 학습데이터 중 10%를 검증용으로 사용
    epochs=30,              # 안정성 위해 줄임
    batch_size=8,           # 가벼운 배치
    callbacks=[early_stopping],
    verbose=1
)

Epoch 1/30


2025-03-27 21:16:37.452779: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:961] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 147ms/step - accuracy: 0.3041 - loss: 1.1910 - val_accuracy: 0.4667 - val_loss: 1.1492
Epoch 2/30
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.5455 - loss: 0.9841 - val_accuracy: 0.6000 - val_loss: 0.9246
Epoch 3/30
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.7907 - loss: 0.8063 - val_accuracy: 0.7333 - val_loss: 0.7676
Epoch 4/30
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.8697 - loss: 0.6509 - val_accuracy: 0.8667 - val_loss: 0.6048
Epoch 5/30
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.8890 - loss: 0.5221 - val_accuracy: 0.9333 - val_loss: 0.4694
Epoch 6/30
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.9050 - loss: 0.4405 - val_accuracy: 0.9333 - val_loss: 0.3754
Epoch 7/30
[1m16/16[0m [32m━━━━━━━━━━━━━━

In [25]:
# 8. 평가
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\n🧪 Test Accuracy: {accuracy:.4f}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 614ms/step - accuracy: 1.0000 - loss: 0.0183

🧪 Test Accuracy: 1.0000


In [27]:
# 9. 예측 결과
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print("\n📊 Confusion Matrix:")
print(confusion_matrix(y_true, y_pred_classes))

print("\n📋 Classification Report:")
print(classification_report(y_true, y_pred_classes))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step

📊 Confusion Matrix:
[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]

📋 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36

