# 분류 (classification)

In [11]:
from sklearn.datasets import load_iris
iris = load_iris()

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target,test_size=0.2, stratify=iris.target, random_state=2021
)

### Logistic Regression

In [13]:
# 모델 생성
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(verbose=1)

In [14]:
# 모델 학습
lr.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


LogisticRegression(verbose=1)

In [15]:
# 예측
pred_lr = lr.predict(X_test)

In [16]:
#평가(정확도)
from sklearn.metrics import accuracy_score
accuracy_score(y_test, pred_lr)

0.9333333333333333

In [17]:
# 하이퍼 파라미터 확인
print(lr.get_params())

{'C': 1.0, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 100, 'multi_class': 'auto', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'lbfgs', 'tol': 0.0001, 'verbose': 1, 'warm_start': False}


In [18]:
# 결과확인 - weight(coefficient, 계수)
lr.coef_

array([[-0.4949127 ,  0.79326939, -2.34869935, -0.93928032],
       [ 0.50986843, -0.33916789, -0.10284747, -1.00755976],
       [-0.01495572, -0.4541015 ,  2.45154682,  1.94684008]])

In [19]:
# 결과확인 - Bios(Intercept, 절편)
lr.intercept_

array([ 10.09845541,   1.920151  , -12.01860641])

### 결정 트리(Decision Tree)

In [20]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
pred_dt = dtc.predict(X_test)
accuracy_score(y_test, pred_dt)

0.9

In [27]:
# 하이퍼 파라미터 확인
print(dtc.get_params())

{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'presort': 'deprecated', 'random_state': None, 'splitter': 'best'}


In [21]:
# 결과 확인
dtc.feature_importances_

array([0.        , 0.01666667, 0.55525328, 0.42808005])

### 서포트 벡터 머신

In [22]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train, y_train)
pred_svc = svc.predict(X_test)
accuracy_score(y_test, pred_svc)

0.9

In [28]:
# 하이퍼 파라미터 확인
print(svc.get_params())

{'C': 1.0, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}


### 랜덤 포레스트

In [23]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
pred_rf = rf.predict(X_test)
accuracy_score(y_test, pred_rf)

0.9

In [29]:
# 하이퍼 파라미터 확인
print(rf.get_params())

{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 'auto', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_impurity_split': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}


### k 최근접 이웃(K Nearest Neighbor)

In [24]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
pred_knn = knn.predict(X_test)
accuracy_score(y_test, pred_knn)

0.9333333333333333

In [30]:
# 하이퍼 파라미터 확인
print(knn.get_params())

{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}


### 결과비교

In [25]:
import pandas as pd
df = pd.DataFrame({'y_test':y_test, 'lr':pred_lr, 'dt':pred_dt,
                    'sv':pred_svc, 'rf':pred_rf, 'knn':pred_knn})
df

Unnamed: 0,y_test,lr,dt,sv,rf,knn
0,0,0,0,0,0,0
1,1,1,1,1,1,1
2,1,1,2,1,2,2
3,2,2,2,2,2,2
4,0,0,0,0,0,0
5,1,1,1,1,1,1
6,0,0,0,0,0,0
7,1,1,1,1,1,1
8,2,2,2,2,2,2
9,0,0,0,0,0,0
