In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from tensorflow.keras.utils import to_categorical

In [22]:
# 데이터 로딩
data = pd.read_csv("C:\\Users\\LG\\Downloads\\seeds\\seeds_dataset.txt", sep=r'\s+', header=None)

In [24]:
# 특성/레이블 분리
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values.astype(int) - 1  # 0~2로 정리
y_encoded = to_categorical(y, num_classes=3)

In [26]:
# 셔플 및 stratify 분할
X, y_encoded = shuffle(X, y_encoded, random_state=42)
y_raw = np.argmax(y_encoded, axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, stratify=y_raw, random_state=42)
y_train_raw = np.argmax(y_train, axis=1)
X_train_final, X_val, y_train_final, y_val = train_test_split(X_train, y_train, test_size=0.1, stratify=y_train_raw, random_state=42)

In [27]:
# 정규화
scaler = StandardScaler()
X_train_final = scaler.fit_transform(X_train_final)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [30]:
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model

# 입력층
inputs = Input(shape=(7,))

# 은닉층
x = Dense(32, activation='relu')(inputs)
x = Dropout(0.3)(x)
x = Dense(16, activation='relu')(x)

#출력층
prediction = Dense(3, activation='softmax')(x)

# 모델 정의
model = Model(inputs=inputs, outputs=prediction)

In [31]:
from tensorflow.keras.callbacks import EarlyStopping

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(
    X_train_final, y_train_final,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=8,
    callbacks=[early_stop]
)

Epoch 1/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 40ms/step - accuracy: 0.4429 - loss: 1.0896 - val_accuracy: 0.5882 - val_loss: 0.8764
Epoch 2/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6877 - loss: 0.8360 - val_accuracy: 0.8824 - val_loss: 0.6650
Epoch 3/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7766 - loss: 0.7560 - val_accuracy: 0.8824 - val_loss: 0.5399
Epoch 4/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8215 - loss: 0.6072 - val_accuracy: 0.8824 - val_loss: 0.4426
Epoch 5/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8616 - loss: 0.4908 - val_accuracy: 0.8824 - val_loss: 0.3731
Epoch 6/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.8784 - loss: 0.4814 - val_accuracy: 0.8824 - val_loss: 0.3276
Epoch 7/100
[1m19/19[0m [

In [34]:
# 테스트 성능 평가
loss, accuracy = model.evaluate(X_test, y_test)
print(f"테스트 정확도: {accuracy:.4f}")

# 예측 분포 확인
pred_classes = np.argmax(model.predict(X_test), axis=1)
true_classes = np.argmax(y_test, axis=1)

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(true_classes, pred_classes))
print(classification_report(true_classes, pred_classes, digits=4))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.9211 - loss: 0.3377
테스트 정확도: 0.9286
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step
[[13  0  1]
 [ 1 13  0]
 [ 1  0 13]]
              precision    recall  f1-score   support

           0     0.8667    0.9286    0.8966        14
           1     1.0000    0.9286    0.9630        14
           2     0.9286    0.9286    0.9286        14

    accuracy                         0.9286        42
   macro avg     0.9317    0.9286    0.9294        42
weighted avg     0.9317    0.9286    0.9294        42

