In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# 1. 데이터 로딩
file_path = "C:\\Users\\axhtl\\OneDrive\\바탕 화면\\학교\\인공지능개론\\car_evaluation.csv"
df = pd.read_csv(file_path)

# 데이터프레임 확인
print(df.head())
# 컬럼명 확인
print(df.columns)

   vhigh vhigh.1  2 2.1  small   low  unacc
0  vhigh   vhigh  2   2  small   med  unacc
1  vhigh   vhigh  2   2  small  high  unacc
2  vhigh   vhigh  2   2    med   low  unacc
3  vhigh   vhigh  2   2    med   med  unacc
4  vhigh   vhigh  2   2    med  high  unacc
Index(['vhigh', 'vhigh.1', '2', '2.1', 'small', 'low', 'unacc'], dtype='object')


In [2]:
# 2. 결측치 확인 => 결측치 없음
df.isnull().sum()

vhigh      0
vhigh.1    0
2          0
2.1        0
small      0
low        0
unacc      0
dtype: int64

In [3]:
# 3. 레이블 확인
df['unacc'].value_counts()

unacc
unacc    1209
acc       384
good       69
vgood      65
Name: count, dtype: int64

In [4]:
# 4. encoding
# LabelEncoder 객체 생성
label_encoders = {}

# 모든 컬럼에 대해 encoding 수행
for col in df.columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # 저장

# 변환된 데이터 확인
print(df.head())

   vhigh  vhigh.1  2  2.1  small  low  unacc
0      3        3  0    0      2    2      2
1      3        3  0    0      2    0      2
2      3        3  0    0      1    1      2
3      3        3  0    0      1    2      2
4      3        3  0    0      1    0      2


In [5]:
# 특성과 타겟 분리
X = df.iloc[:, :-1]  # 마지막 열 제외 (특성)
y = df.iloc[:, -1]   # 마지막 열 (클래스)

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# 모델 학습 및 평가
# 결정 트리 학습 및 평가
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train) # 학습

dt_pred = dt_model.predict(X_test) # 예측

print("\n--- Decision Tree ---")
print(accuracy_score(y_test, dt_pred))
print(confusion_matrix(y_test, dt_pred))


--- Decision Tree ---
0.9710982658959537
[[ 73   2   0   2]
 [  2  12   0   1]
 [  1   0 236   0]
 [  0   2   0  15]]


In [7]:
# 랜덤 포레스트 학습 및 평가
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

rf_pred = rf_model.predict(X_test)

print("\n--- Random Forest ---")
print(accuracy_score(y_test, rf_pred))
print(confusion_matrix(y_test, rf_pred))


--- Random Forest ---
0.9682080924855492
[[ 74   1   1   1]
 [  2   9   0   4]
 [  1   0 236   0]
 [  1   0   0  16]]


In [8]:
# SVM 학습 및 평가
svm_model = SVC()
svm_model.fit(X_train, y_train)

svm_pred = svm_model.predict(X_test)

print("\n--- SVM ---")
print(accuracy_score(y_test, svm_pred))
print(confusion_matrix(y_test, svm_pred))


--- SVM ---
0.8988439306358381
[[ 61   0  16   0]
 [ 11   2   0   2]
 [  1   0 236   0]
 [  5   0   0  12]]


In [9]:
# 로지스틱 회귀 학습 및 평가
lr_model = LogisticRegression(max_iter=200)
lr_model.fit(X_train, y_train)

lr_pred = lr_model.predict(X_test)

print("\n--- Logistic regression ---")
print(accuracy_score(y_test, lr_pred))
print(confusion_matrix(y_test, lr_pred))


--- Logistic regression ---
0.684971098265896
[[ 12   0  64   1]
 [  2   0  13   0]
 [ 13   0 224   0]
 [ 11   0   5   1]]


In [10]:
# knn으로 붓꽃 분류

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# 5. 예측 및 평가
y_pred = knn.predict(X_test)

print("\n--- KNN ---")
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


--- KNN ---
0.8757225433526011
[[ 54   1  22   0]
 [ 12   2   1   0]
 [  0   0 237   0]
 [  6   0   1  10]]
