In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/dlsrhdwlsmdrofhs/car_evaluation.csv', header= None)

print(df)

          0      1      2     3      4     5      6
0     vhigh  vhigh      2     2  small   low  unacc
1     vhigh  vhigh      2     2  small   med  unacc
2     vhigh  vhigh      2     2  small  high  unacc
3     vhigh  vhigh      2     2    med   low  unacc
4     vhigh  vhigh      2     2    med   med  unacc
...     ...    ...    ...   ...    ...   ...    ...
1723    low    low  5more  more    med   med   good
1724    low    low  5more  more    med  high  vgood
1725    low    low  5more  more    big   low  unacc
1726    low    low  5more  more    big   med   good
1727    low    low  5more  more    big  high  vgood

[1728 rows x 7 columns]


In [None]:
print(df.columns)

Index([0, 1, 2, 3, 4, 5, 6], dtype='int64')


In [None]:
df.columns = ['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety', 'output']
print(df.columns)

Index(['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety',
       'output'],
      dtype='object')


In [None]:
print(df.isnull().sum())

price           0
maint           0
doors           0
persons         0
lug_capacity    0
safety          0
output          0
dtype: int64


In [None]:
label_encoders = {}
for column in df.columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])
print(df)

      price  maint  doors  persons  lug_capacity  safety  output
0         3      3      0        0             2       1       2
1         3      3      0        0             2       2       2
2         3      3      0        0             2       0       2
3         3      3      0        0             1       1       2
4         3      3      0        0             1       2       2
...     ...    ...    ...      ...           ...     ...     ...
1723      1      1      3        2             1       2       1
1724      1      1      3        2             1       0       3
1725      1      1      3        2             0       1       2
1726      1      1      3        2             0       2       1
1727      1      1      3        2             0       0       3

[1728 rows x 7 columns]


In [None]:
df['output'].value_counts()

Unnamed: 0_level_0,count
output,Unnamed: 1_level_1
2,1210
0,384
1,69
3,65


In [None]:
X = df.drop('output', axis=1).values
y = df['output'].values

In [None]:
X

array([[3, 3, 0, 0, 2, 1],
       [3, 3, 0, 0, 2, 2],
       [3, 3, 0, 0, 2, 0],
       ...,
       [1, 1, 3, 2, 0, 1],
       [1, 1, 3, 2, 0, 2],
       [1, 1, 3, 2, 0, 0]])

In [None]:
y

array([2, 2, 2, ..., 2, 1, 3])

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print("학습 데이터 개수:", len(X_train))
print("테스트 데이터 개수:", len(X_test))

학습 데이터 개수: 1382
테스트 데이터 개수: 346


In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1382, 6), (346, 6), (1382,), (346,))

In [None]:
# Decision Tree 모델 생성 및 학습
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

# 예측 및 평가, confusion matrix 출력
dt_pred = dt_model.predict(X_test)
print(f'Decision Tree Accuracy: {accuracy_score(y_test, dt_pred)}')
print(confusion_matrix(y_test, dt_pred))

Decision Tree Accuracy: 0.9884393063583815
[[ 74   2   0   0]
 [  0  12   0   0]
 [  2   0 243   0]
 [  0   0   0  13]]


In [None]:
# Random Forest 모델 생성 및 학습
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

# 예측 및 평가, confusion matrix 출력
rf_pred = rf_model.predict(X_test)
print(f'Random Forest Accuracy: {accuracy_score(y_test, rf_pred)}')
print(confusion_matrix(y_test, rf_pred))

Random Forest Accuracy: 0.9739884393063584
[[ 71   4   1   0]
 [  1  11   0   0]
 [  3   0 242   0]
 [  0   0   0  13]]


In [None]:
# SVM 모델 생성 및 학습
svm_model = SVC()
svm_model.fit(X_train, y_train)

# 예측 및 평가, confusion matrix 출력
svm_pred = svm_model.predict(X_test)
print(f'SVM Accuracy: {accuracy_score(y_test, svm_pred)}')
print(confusion_matrix(y_test, svm_pred))

SVM Accuracy: 0.9132947976878613
[[ 67   4   5   0]
 [  5   7   0   0]
 [ 15   0 230   0]
 [  1   0   0  12]]


In [None]:
# KNN 모델 생성 및 학습 (k = 5)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# 예측 및 평가, confusion matrix 출력
knn_pred = knn.predict(X_test)
print(f'KNN: {accuracy_score(y_test, knn_pred)}')
print(confusion_matrix(y_test, knn_pred))

KNN: 0.953757225433526
[[ 71   0   5   0]
 [  5   7   0   0]
 [  4   0 241   0]
 [  2   0   0  11]]


In [None]:
# Logistic Regression 모델 생성 및 학습
lr_model =LogisticRegression(max_iter=200)
lr_model.fit(X_train, y_train)

# 예측 및 평가, confusion matrix 출력
lr_pred = lr_model.predict(X_test)
print(f'Logistic Regression: {accuracy_score(y_test, lr_pred)}')
print(confusion_matrix(y_test, lr_pred))

Logistic Regression: 0.6907514450867052
[[ 15   0  56   5]
 [  2   0  10   0]
 [ 23   0 220   2]
 [  3   0   6   4]]
