# 앙상블 학습
## - 1.voting 방식
### - 1.1.hard voting 
### - 로지스틱 회귀
### - 서포터 벡터 머신
### - k-최근접 이웃

In [1]:
from sklearn.datasets import load_breast_cancer
cancer=load_breast_cancer()

In [4]:
from sklearn.preprocessing import MinMaxScaler
cancer_scaled = MinMaxScaler().fit_transform(cancer.data)


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    cancer_scaled, cancer.target, stratify=cancer.target,
    test_size=0.2, random_state=2022
)

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [7]:
lrc=LogisticRegression(random_state=2022)
svc=SVC(random_state=2022)
knn=KNeighborsClassifier()


####1.1.하드 보팅

In [8]:
#하드 보팅을 위한 앙상블 분류기
from sklearn.ensemble import VotingClassifier
voc=VotingClassifier(
    estimators=[('LRC',lrc),('SVC',svc),('KNN',knn)],
    voting='hard'
)

In [9]:
voc.fit(X_train,y_train)
voc.score(X_test,y_test)

1.0

In [11]:
#개별 분류기의 성능
lrc.fit(X_train,y_train)
svc.fit(X_train,y_train)
knn.fit(X_train,y_train)
lrc.score(X_test,y_test),svc.score(X_test,y_test),knn.score(X_test,y_test)
#개별 분류기의 정확도는 완벽하지 않지만 얘네를 모았을 때는 1.0이 된다. 

(0.9912280701754386, 1.0, 0.9824561403508771)

####1.2.소프트 보팅

In [None]:
#하드 보팅을 위한 앙상블 분류기
from sklearn.ensemble import VotingClassifier
voc=VotingClassifier(
    estimators=[('LRC',lrc),('SVC',svc),('KNN',knn)],
    voting='hard'
)

In [None]:
voc.fit(X_train,y_train)
voc.score(X_test,y_test)

1.0

In [None]:
#개별 분류기의 성능
lrc.fit(X_train,y_train)
svc.fit(X_train,y_train)
knn.fit(X_train,y_train)
lrc.score(X_test,y_test),svc.score(X_test,y_test),knn.score(X_test,y_test)
#개별 분류기의 정확도는 완벽하지 않지만 얘네를 모았을 때는 1.0이 된다. 

(0.9912280701754386, 1.0, 0.9824561403508771)

In [12]:
lrc.predict(X_test[:5])

array([0, 1, 0, 1, 0])

In [15]:
lrc.predict_proba(X_test[:5])

array([[0.99792166, 0.00207834],
       [0.07775117, 0.92224883],
       [0.9774613 , 0.0225387 ],
       [0.05952966, 0.94047034],
       [0.99554778, 0.00445222]])

In [16]:
svc.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': 2022,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [17]:
svc2=SVC(probability=True, random_state=2022)
svc2.fit(X_train, y_train)
svc2.predict_proba(X_test[:5])

array([[9.99896299e-01, 1.03701492e-04],
       [3.84470713e-03, 9.96155293e-01],
       [9.99896384e-01, 1.03616009e-04],
       [5.97356113e-03, 9.94026439e-01],
       [9.99311796e-01, 6.88204061e-04]])

KNN

In [19]:
knn.predict_proba(X_test[-5:])

array([[0.8, 0.2],
       [1. , 0. ],
       [0.8, 0.2],
       [0. , 1. ],
       [0. , 1. ]])

soft voting

In [22]:
voc2=VotingClassifier(
    estimators=[('LRC',lrc),('SVC',svc2),('KNN',knn)],
    voting='soft'
)

In [23]:
voc2.fit(X_train,y_train)
voc2.score(X_test,y_test)

1.0

In [25]:
voc2.predict_proba(X_test[:5])

array([[9.99272654e-01, 7.27346212e-04],
       [2.71986265e-02, 9.72801374e-01],
       [9.92452563e-01, 7.54743719e-03],
       [2.18344062e-02, 9.78165594e-01],
       [9.98286525e-01, 1.71347474e-03]])

gridsearchCV

In [26]:
lrc.C, svc2.C

(1.0, 1.0)

In [28]:
#각각의 C를 설정해준다. 

params = {
    'LRC__C':[0.1,1,10],
    'SVC__C':[0.1,1,10]

}

In [29]:
from sklearn.model_selection import GridSearchCV
grid_voc2=GridSearchCV(voc2,params,scoring='accuracy',cv=5)
grid_voc2.fit(X_train,y_train)
grid_voc2.best_params_

{'LRC__C': 10, 'SVC__C': 0.1}

In [30]:
params = {
    'LRC__C':[5,10,30],
    'SVC__C':[0.05,0.1,0.3]
}
grid_voc2=GridSearchCV(voc2,params,scoring='accuracy',cv=5)
grid_voc2.fit(X_train,y_train)
grid_voc2.best_params_

{'LRC__C': 10, 'SVC__C': 0.05}

In [31]:
grid_voc2.best_estimator_.score(X_test,y_test)

1.0

### 2. Bagging 방식 - Random Forest

In [33]:
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier(random_state=2022)
rfc.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 2022,
 'verbose': 0,
 'warm_start': False}

In [34]:
rfc.fit(X_train,y_train)
rfc.score(X_test,y_test)

1.0