In [1]:
import numpy as np
import sklearn
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from ucimlrepo import fetch_ucirepo 

In [2]:
car_df = fetch_ucirepo(id=19) 

X = car_df.data.features 
y = car_df.data.targets
df = pd.concat([X, y], axis=1)
df

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good


In [3]:
from sklearn.preprocessing import StandardScaler, LabelEncoder

# LabelEncoder 객체 생성
label = LabelEncoder()

#LabelEncoder를 적용하여 변환
df['buying'] = label.fit_transform(df['buying'])
df['maint'] = label.fit_transform(df['maint'])
df['doors'] = label.fit_transform(df['doors'])
df['persons'] = label.fit_transform(df['persons'])
df['lug_boot'] = label.fit_transform(df['lug_boot'])
df['safety'] = label.fit_transform(df['safety'])
df['class'] = label.fit_transform(df['class'])
df

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,3,3,0,0,2,1,2
1,3,3,0,0,2,2,2
2,3,3,0,0,2,0,2
3,3,3,0,0,1,1,2
4,3,3,0,0,1,2,2
...,...,...,...,...,...,...,...
1723,1,1,3,2,1,2,1
1724,1,1,3,2,1,0,3
1725,1,1,3,2,0,1,2
1726,1,1,3,2,0,2,1


In [4]:
# 결측치 제거
df = df.dropna()
df.isnull().sum()

buying      0
maint       0
doors       0
persons     0
lug_boot    0
safety      0
class       0
dtype: int64

In [21]:
df_corr=df.corr()

#히트맵->상관관계 확인
plt.figure(figsize=(7,7))
sns.set(font_scale=0.8) 
sns.heatmap(df_corr, norm=LogNorm(), annot=True, cbar=False)
plt.show()

NameError: name 'LogNorm' is not defined

<Figure size 700x700 with 0 Axes>

In [6]:
X=df.drop('class',axis=1)
X.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,3,3,0,0,2,1
1,3,3,0,0,2,2
2,3,3,0,0,2,0
3,3,3,0,0,1,1
4,3,3,0,0,1,2


In [7]:
y=df['class']
y.head()

0    2
1    2
2    2
3    2
4    2
Name: class, dtype: int32

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.25,random_state=0)

In [9]:
#모델 학습 효율 높이기 위한 StandardScaler
from sklearn.preprocessing import StandardScaler

ss =StandardScaler()
ss.fit(X_train)
ss_train = ss.transform(X_train)
ss_test = ss.transform(X_test)

In [10]:
from sklearn.linear_model import LogisticRegression #선형 분류 모델
from sklearn.metrics import accuracy_score #분류 모델의 정확도 평가, 객관적인 지표 중 하나
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix #모델 성능 평가용 혼동 행렬 생성
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings('ignore') #파이썬 경고 메세지 무시

In [11]:
clf_lr = LogisticRegression(random_state=0) #로지스틱 회귀 분석 모델 생성
clf_lr.fit(ss_train, y_train) #모델 훈련

pred_lr = clf_lr.predict(ss_test) #모델 테스트

print ("\n--- Logistic Regression Classifier ---")
print(accuracy_score(y_test,pred_lr))
print(classification_report(y_test, pred_lr))
print (confusion_matrix(y_test, pred_lr)) # 얼마나 맞추고 어디서 틀렸는지


--- Logistic Regression Classifier ---
0.6527777777777778
              precision    recall  f1-score   support

           0       0.27      0.11      0.16        99
           1       0.00      0.00      0.00        21
           2       0.70      0.91      0.79       296
           3       0.30      0.19      0.23        16

    accuracy                           0.65       432
   macro avg       0.32      0.30      0.29       432
weighted avg       0.55      0.65      0.59       432

[[ 11   0  84   4]
 [  1   0  20   0]
 [ 25   0 268   3]
 [  4   0   9   3]]


In [12]:
print ("\n--- Radom Forest ---")
rf_clf = RandomForestClassifier(random_state=0) #랜덤 포레스트 분류기 모델 생성
rf_clf.fit(ss_train, y_train)

pred = rf_clf.predict(ss_test)

print(accuracy_score(y_test,pred_lr))
print(classification_report(y_test, pred_lr))
print (confusion_matrix(y_test, pred))


--- Radom Forest ---
0.6527777777777778
              precision    recall  f1-score   support

           0       0.27      0.11      0.16        99
           1       0.00      0.00      0.00        21
           2       0.70      0.91      0.79       296
           3       0.30      0.19      0.23        16

    accuracy                           0.65       432
   macro avg       0.32      0.30      0.29       432
weighted avg       0.55      0.65      0.59       432

[[ 93   2   4   0]
 [  1  20   0   0]
 [  3   0 293   0]
 [  2   0   0  14]]


In [13]:
clf_nn = MLPClassifier(random_state=0) # 다층 퍼셈트론 분류기 구현 [입력층, 1개 이상의 은닉층, 출력]
clf_nn.fit(ss_train, y_train)

pred_nn = clf_nn.predict(ss_test)

print ("\n--- Neural Network Classifier ---")
print(accuracy_score(y_test,pred_lr))
print(classification_report(y_test, pred_lr))
print (confusion_matrix(y_test, pred_nn))


--- Neural Network Classifier ---
0.6527777777777778
              precision    recall  f1-score   support

           0       0.27      0.11      0.16        99
           1       0.00      0.00      0.00        21
           2       0.70      0.91      0.79       296
           3       0.30      0.19      0.23        16

    accuracy                           0.65       432
   macro avg       0.32      0.30      0.29       432
weighted avg       0.55      0.65      0.59       432

[[ 90   1   8   0]
 [  1  17   0   3]
 [  4   0 292   0]
 [  2   0   0  14]]
