# 분류(Classification)

In [4]:
from sklearn.datasets import load_iris
iris = load_iris()

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, stratify = iris.target, random_state=2021
)

### Logistic Regression

In [8]:
# 모델 생성
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [9]:
# 모델 학습
lr.fit(X_train, y_train)

LogisticRegression()

In [10]:
# 예측
pred_lr = lr.predict(X_test)

In [11]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, pred_lr)

0.9333333333333333

In [22]:
# 하이퍼 파라미터 확인
lr.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [12]:
# 결과 확인 - Weight(Coefficient, 계수)
lr.coef_

array([[-0.48619865,  0.79125412, -2.35181355, -0.94314285],
       [ 0.50544959, -0.3353252 , -0.10191787, -1.00708912],
       [-0.01925094, -0.45592893,  2.45373142,  1.95023197]])

In [13]:
# 결과 확인 - Bias(Intercept, 절편)
lr.intercept_

array([ 10.07398767,   1.92986752, -12.00385518])

### 결정 트리(Decision Tree)

In [14]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train,y_train)
pred_dt = dtc.predict(X_test)
accuracy_score(y_test,pred_dt)

0.9

In [23]:
# 하이퍼 파라미터 확인
dtc.get_params()

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'presort': 'deprecated',
 'random_state': None,
 'splitter': 'best'}

In [15]:
# 결과 확인
dtc.feature_importances_

array([0.        , 0.01666667, 0.55525328, 0.42808005])

### 서포트 벡터 머신(Support Vector Machine)

In [16]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train, y_train)
pred_svc = svc.predict(X_test)
accuracy_score(y_test, pred_svc)

0.9

### 랜덤 포레스트(Random Forest)

In [18]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf.fit(X_train,y_train)
pred_rf = rf.predict(X_test)
accuracy_score(y_test, pred_rf)

0.9

In [24]:
# 하이퍼 파라미터 확인
rf.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

### K 최근접 이웃(K Nearest Neighbor)

In [19]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
pred_knn = knn.predict(X_test)
accuracy_score(y_test,pred_knn)

0.9333333333333333

In [25]:
# 하이퍼 파라미터 확인
knn.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

### 결과 비교

In [21]:
import pandas as pd
df = pd.DataFrame({'y_test':y_test, 'lr':pred_lr, 'dt':pred_dt, 'sv':pred_svc, 'rf':pred_rf, 'knn': pred_knn})
df.head()

Unnamed: 0,y_test,lr,dt,sv,rf,knn
0,0,0,0,0,0,0
1,1,1,1,1,1,1
2,1,1,2,1,2,2
3,2,2,2,2,2,2
4,0,0,0,0,0,0
