In [149]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from tensorflow.keras.utils import to_categorical

In [123]:
# 데이터 로드
data = pd.read_csv("C:\\Users\\LG\\Downloads\\seeds\\seeds_dataset.txt", sep=r'\s+', header=None)

In [125]:
# 특징(X)과 레이블(y) 분리
X = data.iloc[:, :-1].values   # 7개의 특성
y = data.iloc[:, -1].values    # 마지막 열이 클래스 (1~3)

In [151]:
#레이블 인코딩 (0~2로 조정)
y = y.astype(int) - 1
y_encoded = to_categorical(y, num_classes=3)

In [153]:
# 데이터 셔플
X, y_encoded = shuffle(X, y_encoded, random_state=42)

In [155]:
# train-test 분리
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

# train-validation 분리 (1번 요구사항)
X_train_final, X_val, y_train_final, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42
)

In [157]:
# 7. 특성 정규화
scaler = StandardScaler()
X_train_final = scaler.fit_transform(X_train_final)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [171]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout

model = Sequential([
    Input(shape=(7,)),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')
])

In [173]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(
    X_train_final, y_train_final,
    validation_data=(X_val, y_val),
    epochs=50, batch_size=8
)

Epoch 1/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 45ms/step - accuracy: 0.4073 - loss: 1.1041 - val_accuracy: 0.2941 - val_loss: 1.1083
Epoch 2/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.4076 - loss: 1.0707 - val_accuracy: 0.4118 - val_loss: 1.0875
Epoch 3/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.3018 - loss: 1.1053 - val_accuracy: 0.4118 - val_loss: 1.0811
Epoch 4/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.4508 - loss: 1.0663 - val_accuracy: 0.4118 - val_loss: 1.0831
Epoch 5/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.4047 - loss: 1.0519 - val_accuracy: 0.4118 - val_loss: 1.1014
Epoch 6/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.3598 - loss: 1.1326 - val_accuracy: 0.4706 - val_loss: 1.0969
Epoch 7/50
[1m19/19[0m [32m━━━━

In [144]:
# 테스트 세트 정확도 평가
loss, accuracy = model.evaluate(X_test, y_test)
print(f"테스트 정확도: {accuracy:.4f}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.7316 - loss: 0.7470
테스트 정확도: 0.7381


In [175]:
# 예측 예시
pred = model.predict(X_test[:5])
print("예측 확률:", pred)
print("예측 클래스:", np.argmax(pred, axis=1))
print("실제 클래스:", np.argmax(y_test[:5], axis=1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
예측 확률: [[0.54599935 0.33028778 0.12371287]
 [0.32686347 0.40459833 0.2685382 ]
 [0.3466132  0.3410088  0.31237802]
 [0.3150471  0.30404285 0.38091004]
 [0.4219598  0.33458054 0.24345967]]
예측 클래스: [0 1 0 2 0]
실제 클래스: [1 2 1 0 1]


In [177]:
y_raw = np.argmax(y_encoded, axis=1)  # 원래 클래스 0~2
unique, counts = np.unique(y_raw, return_counts=True)
print(dict(zip(unique, counts)))

{0: 70, 1: 70, 2: 70}


In [179]:
preds = model.predict(X_test)
pred_classes = np.argmax(preds, axis=1)
print("예측 클래스 분포:", dict(zip(*np.unique(pred_classes, return_counts=True))))


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step 
예측 클래스 분포: {0: 16, 1: 13, 2: 13}
