In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# 1. 데이터 로딩
df = pd.read_csv("C:\\Users\\axhtl\\OneDrive\\바탕 화면\\학교\\인공지능개론\\car_evaluation.csv", header = None)

# 데이터프레임 확인
df

Unnamed: 0,0,1,2,3,4,5,6
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good


In [2]:
# 2. 컬럼명 변경
df.columns=['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety',
       'output']

In [3]:
df.columns

Index(['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety',
       'output'],
      dtype='object')

In [4]:
df

Unnamed: 0,price,maint,doors,persons,lug_capacity,safety,output
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good


In [5]:
# 3. 결측치 확인 => 결측치 없음
df.isnull().sum()

price           0
maint           0
doors           0
persons         0
lug_capacity    0
safety          0
output          0
dtype: int64

In [6]:
columns = ['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety','output']

In [7]:
# 3. encoding
label_encoders = {}
for column in columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

In [8]:
df

Unnamed: 0,price,maint,doors,persons,lug_capacity,safety,output
0,3,3,0,0,2,1,2
1,3,3,0,0,2,2,2
2,3,3,0,0,2,0,2
3,3,3,0,0,1,1,2
4,3,3,0,0,1,2,2
...,...,...,...,...,...,...,...
1723,1,1,3,2,1,2,1
1724,1,1,3,2,1,0,3
1725,1,1,3,2,0,1,2
1726,1,1,3,2,0,2,1


In [9]:
# 4. 레이블 확인
df['output'].value_counts()

output
2    1210
0     384
1      69
3      65
Name: count, dtype: int64

In [10]:
# 5. x, y 분리(레이블, 레이블 아닌 것)
X = df.drop('output', axis=1).values
y = df['output'].values

In [11]:
X

array([[3, 3, 0, 0, 2, 1],
       [3, 3, 0, 0, 2, 2],
       [3, 3, 0, 0, 2, 0],
       ...,
       [1, 1, 3, 2, 0, 1],
       [1, 1, 3, 2, 0, 2],
       [1, 1, 3, 2, 0, 0]])

In [12]:
y

array([2, 2, 2, ..., 2, 1, 3])

In [13]:
# 6. 데이터 표준화
scaler = StandardScaler()
X = scaler.fit_transform(X)

# 7. train, test 분류
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [14]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1382, 6), (346, 6), (1382,), (346,))

In [15]:
# 8. 모델 학습 및 평가
# 결정 트리 학습 및 평가
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train) # 학습

dt_pred = dt_model.predict(X_test) # 예측

print("\n--- Decision Tree ---")
print(accuracy_score(y_test, dt_pred))
print(confusion_matrix(y_test, dt_pred))


--- Decision Tree ---
0.976878612716763
[[ 74   2   3   0]
 [  0  17   0   0]
 [  2   0 238   0]
 [  1   0   0   9]]


In [16]:
# 랜덤 포레스트 학습 및 평가
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

rf_pred = rf_model.predict(X_test)

print("\n--- Random Forest ---")
print(accuracy_score(y_test, rf_pred))
print(confusion_matrix(y_test, rf_pred))


--- Random Forest ---
0.9739884393063584
[[ 75   2   1   1]
 [  1  15   0   1]
 [  2   0 238   0]
 [  1   0   0   9]]


In [17]:
# SVM 학습 및 평가
svm_model = SVC()
svm_model.fit(X_train, y_train)

svm_pred = svm_model.predict(X_test)

print("\n--- SVM ---")
print(accuracy_score(y_test, svm_pred))
print(confusion_matrix(y_test, svm_pred))


--- SVM ---
0.9132947976878613
[[ 73   1   4   1]
 [  7   8   0   2]
 [ 12   0 228   0]
 [  3   0   0   7]]


In [18]:
# 로지스틱 회귀 학습 및 평가
lr_model = LogisticRegression(max_iter=200)
lr_model.fit(X_train, y_train)

lr_pred = lr_model.predict(X_test)

print("\n--- Logistic regression ---")
print(accuracy_score(y_test, lr_pred))
print(confusion_matrix(y_test, lr_pred))


--- Logistic regression ---
0.6502890173410405
[[  9   0  67   3]
 [  1   0  16   0]
 [ 22   0 214   4]
 [  2   0   6   2]]
