In [None]:
# DT, RF, SVM, LR, K-NN 
# car_evaluation.csv
# labelEncoder()

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder

# 0. CSV 파일 불러오기
file = "C:/AI/car_evaluation.csv"  # CSV 파일 경로
df = pd.read_csv(file)
df.columns = ['buying', 'maint', 'doors', 'persons', 'lug', 'safety', 'class']

# 1. 데이터 전처리 (X: 특성, y: 타겟)
print("결측치 확인:\n", df.isnull().sum())
X = df.drop(columns=['class'])
y = df["class"]

print("\n수치화(전):\n", df.value_counts())
label_encoders = {}
encoder = LabelEncoder()
labels = X.columns
for col in labels:
    X[col] = encoder.fit_transform(X[col])
'''
X['buying'] = df['buying'].replace({'vhigh': 3, 'high': 2, 'med': 1, 'low': 0})
X['maint'] = df['buying'].replace({'vhigh': 3, 'high': 2, 'med': 1, 'low': 0})
X['doors'] = df['doors'].replace({'5more': 5})
X['persons'] = df['persons'].replace({'more': 5})
X['lug'] = df['lug'].replace({'big': 2, 'med': 1, 'small': 0})
X['safety'] = df['safety'].replace({'high': 2, 'med': 1, 'low': 0})
'''
print("\n수치화(후):\n", X.value_counts())
print(X)

# 2. 데이터 분할 (훈련 80%, 테스트 20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. 모델 학습
model_DT = DecisionTreeClassifier() 
model_RF = RandomForestClassifier()
model_SVM = SVC()
model_LR = LogisticRegression()

model_DT.fit(X_train, y_train)
model_RF.fit(X_train, y_train)
model_SVM.fit(X_train, y_train)
model_LR.fit(X_train, y_train)

# 5. 예측 및 정확도 평가
print("<<Result>>")
y_pred_DT = model_DT.predict(X_test)
accuracy_DT = accuracy_score(y_test, y_pred_DT)
print(f"1. Decision Tree Accuracy: {accuracy_DT:.4f}")

y_pred_RF = model_RF.predict(X_test)
accuracy_RF = accuracy_score(y_test, y_pred_RF)
print(f"2. Random Forest Accuracy: {accuracy_RF:.4f}")

y_pred_SVM = model_SVM.predict(X_test)
accuracy_SVM = accuracy_score(y_test, y_pred_SVM)
print(f"3. Support Vector Machine Accuracy: {accuracy_SVM:.4f}")

y_pred_LR = model_LR.predict(X_test)
accuracy_LR = accuracy_score(y_test, y_pred_LR)
print(f"4. Logistic Regression Accuracy: {accuracy_LR:.4f}")

# 5. K-NN 모델 학습 (K=3)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

knn_clf = KNeighborsClassifier(n_neighbors=3)
knn_clf.fit(X_train, y_train)
y_pred_class = knn_clf.predict(X_test)
accuracy = knn_clf.score(X_test, y_test)
print(f"5-1. K-NN Classification 정확도: {accuracy:.2f}")

k_values = range(1, 21)  # K값 범위 설정
scores = []

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)  # K-NN 모델 생성
    knn.fit(X_train, y_train)  # 학습
    y_pred = knn.predict(X_test)  # 예측
    accuracy = accuracy_score(y_test, y_pred)  # 정확도 계산
    scores.append(accuracy)
    
# 최적의 K 값 찾기
best_k = k_values[np.argmax(scores)]
print(f"5-2. 최적의 K 값: {best_k}")

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\n6. K-NN 결과의 Confusion Matrix Result:\n", cm)


결측치 확인:
 buying     0
maint      0
doors      0
persons    0
lug        0
safety     0
class      0
dtype: int64

수치화(전):
 buying  maint  doors  persons  lug    safety  class
high    high   2      2        big    high    unacc    1
med     med    4      more     small  med     acc      1
                                      high    acc      1
                               med    med     acc      1
                                      low     unacc    1
                                                      ..
low     low    3      2        med    med     unacc    1
                                      low     unacc    1
                                      high    unacc    1
                               big    med     unacc    1
vhigh   vhigh  5more  more     small  med     unacc    1
Name: count, Length: 1727, dtype: int64

수치화(후):
 buying  maint  doors  persons  lug  safety
0       0      0      0        0    0         1
2       2      2      2        2    2         1
         