### Random Forest

In [1]:
from sklearn.datasets import load_iris, load_wine, load_breast_cancer, load_diabetes
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate

In [2]:
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

In [3]:
from sklearn.ensemble import BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

In [4]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor

In [5]:
iris = load_iris()
wine = load_wine()
cancer = load_breast_cancer()

#### comparison with KNN Bagging

In [6]:
KB_model = make_pipeline(
    StandardScaler(),
    KNeighborsClassifier()
)

bagging_model = BaggingClassifier(KB_model, n_estimators=10, max_samples=0.5, max_features=0.5)

#### KNN Bagging with IRIS

In [8]:
cross_val = cross_validate(
    estimator=KB_model,
    X = iris.data, y = iris.target, 
    cv=5
)

print("KB_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

KB_model
평균 fit time : 0.0017156600952148438
평균 score time : 0.003604555130004883
평균 test score : 0.96


In [9]:
cross_val = cross_validate(
    estimator= bagging_model,
    X=iris.data, y = iris.target,
    cv=5
)

print("bagging_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

bagging_model
평균 fit time : 0.01171875
평균 score time : 0.006546592712402344
평균 test score : 0.9466666666666665


#### KNN Bagging with WINE

In [10]:
cross_val = cross_validate(
    estimator=KB_model,
    X = wine.data, y=wine.target,
    cv=5
)

print("KB_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

KB_model
평균 fit time : 0.0016893863677978516
평균 score time : 0.0031976222991943358
평균 test score : 0.9493650793650794


In [11]:
cross_val = cross_validate(
    estimator=bagging_model,
    X=wine.data, y=wine.target,
    cv=5
)

print("bagging_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

bagging_model
평균 fit time : 0.011741828918457032
평균 score time : 0.00609283447265625
평균 test score : 0.9663492063492063


#### KNN Bagging with Breast_Cancer

In [12]:
cross_val = cross_validate(
    estimator=KB_model,
    X=cancer.data, y=cancer.target,
    cv=5
)


print("KB_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

KB_model
평균 fit time : 0.0014884471893310547
평균 score time : 0.6988715171813965
평균 test score : 0.9648501785437045


In [13]:
cross_val = cross_validate(
    estimator=bagging_model,
    X=cancer.data, y=cancer.target,
    cv=5
)

print("bagging_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

bagging_model
평균 fit time : 0.015011215209960937
평균 score time : 0.0070953369140625
평균 test score : 0.9613414066138798


#### comparison with SVC Bagging

In [14]:
SB_model = make_pipeline(
    StandardScaler(),
    SVC()
)

Sbagging_model = BaggingClassifier(SB_model, n_estimators=10, max_samples=1.0, max_features=1.0)

#### SVC Bagging with IRIS

In [15]:
cross_val = cross_validate(
    estimator=SB_model,
    X=iris.data, y=iris.target,
    cv=5
)

print("SB_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

SB_model
평균 fit time : 0.0032828807830810546
평균 score time : 0.0011794090270996094
평균 test score : 0.9666666666666666


In [16]:
cross_val = cross_validate(
    estimator=Sbagging_model,
    X=iris.data, y=iris.target,
    cv=5
)

print("Sbagging_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

Sbagging_model
평균 fit time : 0.01695423126220703
평균 score time : 0.0058258056640625
평균 test score : 0.9666666666666666


#### SVC bagging with WINE

In [17]:
cross_val = cross_validate(
    estimator=SB_model,
    X=wine.data, y=wine.target,
    cv=5
)


print("SB_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

SB_model
평균 fit time : 0.0024710178375244142
평균 score time : 0.001197052001953125
평균 test score : 0.9833333333333334


In [18]:
cross_val = cross_validate(
    estimator=Sbagging_model,
    X=wine.data, y=wine.target,
    cv=5
)

print("Sbagging_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

Sbagging_model
평균 fit time : 0.02301158905029297
평균 score time : 0.0008749485015869141
평균 test score : 0.9833333333333334


#### SVC Bagging with Breast_Cancer

In [19]:
cross_val = cross_validate(
    estimator=SB_model,
    X=cancer.data, y=cancer.target,
    cv=5
)

print("SB_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

SB_model
평균 fit time : 0.002946901321411133
평균 score time : 0.0020462512969970704
평균 test score : 0.9736376339077782


In [20]:
cross_val = cross_validate(
    estimator=Sbagging_model,
    X=cancer.data, y=cancer.target,
    cv=5
)

print("Sbagging_model")
print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

Sbagging_model
평균 fit time : 0.02575340270996094
평균 score time : 0.010900068283081054
평균 test score : 0.9736221083682658


#### Comparison with Decision Tree Bagging

In [21]:
DB_model = make_pipeline(
    StandardScaler(),
    DecisionTreeClassifier()
)

Dbagging_model = BaggingClassifier(DB_model, n_estimators=10, max_samples=1.0, max_features=1.0)

#### Decision Tree Bagging with IRIS

In [23]:
cross_val = cross_validate(
    estimator=DB_model,
    X=iris.data, y=iris.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.0013933658599853515
평균 score time : 0.0012427330017089843
평균 test score : 0.9600000000000002


In [24]:
cross_val = cross_validate(
    estimator=Dbagging_model,
    X=iris.data, y=iris.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.016581630706787108
평균 score time : 0.0002377033233642578
평균 test score : 0.9400000000000001


#### Decision Tree Bagging with WINE

In [25]:
cross_val = cross_validate(
    estimator=DB_model,
    X=wine.data, y=wine.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.0018618106842041016
평균 score time : 0.0012020587921142579
평균 test score : 0.8987301587301587


In [26]:
cross_val = cross_validate(
    estimator=Dbagging_model,
    X=wine.data, y=wine.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.020525455474853516
평균 score time : 0.0005774021148681641
평균 test score : 0.9496825396825397


#### Decision Tree Bagging with Breast_Cancer

In [27]:
cross_val = cross_validate(
    estimator=DB_model,
    X=cancer.data, y=cancer.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.009351491928100586
평균 score time : 0.0005723953247070313
평균 test score : 0.9208818506443098


In [28]:
cross_val = cross_validate(
    estimator=Dbagging_model,
    X=cancer.data, y=cancer.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.04588375091552734
평균 score time : 0.007817745208740234
평균 test score : 0.9525694767893185


In [None]:
#### 캘리포니아 데이터는 시간 오래걸리니까 남으면

#### Regression Bagging

In [29]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
diabetes = load_diabetes()

#### KNN Regression Bagging with California

In [30]:
RB_model = make_pipeline(
    StandardScaler(),
    KNeighborsRegressor()
)

Rbagging_model = BaggingRegressor(RB_model, n_estimators=10, max_samples=0.5, max_features=0.5)

In [31]:
cross_val = cross_validate(
    estimator=RB_model,
    X=housing.data, y=housing.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.01843400001525879
평균 score time : 0.15346097946166992
평균 test score : 0.5211863022484255


In [32]:
cross_val = cross_validate(
    estimator=Rbagging_model,
    X=housing.data, y=housing.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.06612548828125
평균 score time : 0.1938253879547119
평균 test score : 0.4786457694823514


#### diabetes

In [34]:
cross_val = cross_validate(
    estimator=RB_model,
    X=diabetes.data, y=diabetes.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.0017603397369384765
평균 score time : 0.0015401363372802735
평균 test score : 0.3689720650295623


In [35]:
cross_val = cross_validate(
    estimator=Rbagging_model,
    X=diabetes.data, y=diabetes.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.009599447250366211
평균 score time : 0.010251760482788086
평균 test score : 0.37979838466485843


#### SVM Regression Bagging

In [36]:
SVRB_model = make_pipeline(
    StandardScaler(),
    SVR()
)

SVRbagging_model = BaggingRegressor(SVRB_model, n_estimators=10, max_samples=0.5, max_features=0.5)

#### SVM regressino Bagging with California house price

In [37]:
cross_val = cross_validate(
    estimator=SVRB_model,
    X=housing.data, y=housing.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 4.293575954437256
평균 score time : 2.166254425048828
평균 test score : 0.6686837576949693


In [38]:
cross_val = cross_validate(
    estimator=SVRbagging_model,
    X=housing.data, y=housing.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 11.137107133865356
평균 score time : 11.754446935653686
평균 test score : 0.47146352125965957


#### SVM regressino Bagging with Diabetes

In [39]:
cross_val = cross_validate(
    estimator=SVRB_model,
    X=diabetes.data, y=diabetes.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.0031022071838378907
평균 score time : 0.002146196365356445
평균 test score : 0.14659868748701582


In [40]:
cross_val = cross_validate(
    estimator=SVRbagging_model,
    X=diabetes.data, y=diabetes.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.01959705352783203
평균 score time : 0.006229400634765625
평균 test score : 0.04588375436561987


#### Random Forest

In [41]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor

In [42]:
RF_model = make_pipeline(
    StandardScaler(),
    RandomForestClassifier()
)

In [43]:
cross_val = cross_validate(
    estimator=RF_model,
    X=iris.data, y=iris.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.06251811981201172
평균 score time : 0.010295867919921875
평균 test score : 0.9666666666666668


In [44]:
cross_val = cross_validate(
    estimator=RF_model,
    X=wine.data, y=wine.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.073870849609375
평균 score time : 0.003404855728149414
평균 test score : 0.9665079365079364


In [45]:
cross_val = cross_validate(
    estimator=RF_model,
    X=cancer.data, y=cancer.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.12566785812377929
평균 score time : 0.0011131763458251953
평균 test score : 0.956078248719143


#### Random Forest Regression

In [46]:
RFR_model = make_pipeline(
    StandardScaler(),
    RandomForestRegressor()
)

In [47]:
cross_val = cross_validate(
    estimator=RFR_model,
    X=housing.data, y=housing.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 9.999165391921997
평균 score time : 0.05374903678894043
평균 test score : 0.6484190587681129


In [48]:
cross_val = cross_validate(
    estimator=RFR_model,
    X=diabetes.data, y=diabetes.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.16965742111206056
평균 score time : 0.0033438682556152345
평균 test score : 0.41761221857544195


#### Extra Tree Classifier(Extreamely Randomized Tree)

In [49]:
ETC_model = make_pipeline(
    StandardScaler(),
    ExtraTreesClassifier()
)

In [50]:
cross_val = cross_validate(
    estimator=ETC_model,
    X=iris.data, y=iris.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.053739070892333984
평균 score time : 0.005111265182495117
평균 test score : 0.9533333333333334


In [51]:
cross_val = cross_validate(
    estimator=ETC_model,
    X=wine.data, y=wine.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.053258323669433595
평균 score time : 0.0028647422790527345
평균 test score : 0.972063492063492


In [52]:
cross_val = cross_validate(
    estimator=ETC_model,
    X=cancer.data, y=cancer.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.06360702514648438
평균 score time : 0.007068538665771484
평균 test score : 0.9666200900481291


#### Extra Tree Regressor

In [53]:
ETR_model = make_pipeline(
    StandardScaler(),
    ExtraTreesRegressor()
)

In [54]:
from sklearn.model_selection import cross_validate, train_test_split

# 데이터 처리속도를 위해 학습데이터의 비율을 0.5지정
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, train_size=0.5)

In [55]:
cross_val = cross_validate(
    estimator=ETR_model,
    X=housing.data, y=housing.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 3.0569358348846434
평균 score time : 0.05836491584777832
평균 test score : 0.6755411599563632


In [56]:
cross_val = cross_validate(
    estimator=ETR_model,
    X=diabetes.data, y=diabetes.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.10213246345520019
평균 score time : 0.005986452102661133
평균 test score : 0.430452745713616


#### ADA Boost Classifier

In [57]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import AdaBoostRegressor

In [58]:
ADA_model = make_pipeline(
    StandardScaler(),
    AdaBoostClassifier()
)

In [59]:
cross_val = cross_validate(
    estimator=ADA_model,
    X=iris.data, y=iris.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.04316344261169434
평균 score time : 0.002000570297241211
평균 test score : 0.9533333333333334


In [60]:
cross_val = cross_validate(
    estimator=ADA_model,
    X=wine.data, y=wine.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.053311824798583984
평균 score time : 0.0024951934814453126
평균 test score : 0.9333333333333332


In [61]:
cross_val = cross_validate(
    estimator=ADA_model,
    X=cancer.data, y=cancer.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.10466852188110351
평균 score time : 0.0026703357696533205
평균 test score : 0.9683744760130415


In [62]:
ADAR_model = make_pipeline(
    StandardScaler(),
    AdaBoostRegressor()
)

In [63]:
cross_val = cross_validate(
    estimator=ADAR_model,
    X=X_train, y=y_train,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.36344265937805176
평균 score time : 0.007728385925292969
평균 test score : 0.4461062070581871


In [64]:
cross_val = cross_validate(
    estimator=ADAR_model,
    X=diabetes.data, y=diabetes.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.05445361137390137
평균 score time : 0.0014202117919921875
평균 test score : 0.42171664903641143


#### Gradient Tree Boosting Classifier

In [65]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingRegressor

In [66]:
GRAC_model = make_pipeline(
    StandardScaler(),
    GradientBoostingClassifier()
)

In [67]:
cross_val = cross_validate(
    estimator=GRAC_model,
    X=iris.data, y=iris.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.13029799461364747
평균 score time : 0.003695964813232422
평균 test score : 0.9666666666666668


In [68]:
cross_val = cross_validate(
    estimator=GRAC_model,
    X=wine.data, y=wine.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.23796467781066893
평균 score time : 0.0006236553192138672
평균 test score : 0.9385714285714286


In [69]:
cross_val = cross_validate(
    estimator=GRAC_model,
    X=cancer.data, y=cancer.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.3892977237701416
평균 score time : 0.00046677589416503905
평균 test score : 0.9631268436578171


#### Gradient Tree Boosting Rgerssor

In [70]:
GRAR_model = make_pipeline(
    StandardScaler(),
    GradientBoostingRegressor()
)

In [71]:
cross_val = cross_validate(
    estimator=GRAR_model,
    X=X_train, y=y_train,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 1.4720391750335693
평균 score time : 0.003227043151855469
평균 test score : 0.770982928064017


In [72]:
cross_val = cross_validate(
    estimator=GRAR_model,
    X=diabetes.data, y=diabetes.target,
    cv=5
)

print(f"평균 fit time : {cross_val['fit_time'].mean()}")
print(f"평균 score time : {cross_val['score_time'].mean()}")
print(f"평균 test score : {cross_val['test_score'].mean()}")

평균 fit time : 0.07579250335693359
평균 score time : 0.000344085693359375
평균 test score : 0.40893250640854023


#### Voting Classifier -hard voting

In [73]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import cross_val_score

In [74]:
model_s = SVC()
model_d = DecisionTreeClassifier()
model_r = RandomForestClassifier()
voting_model = VotingClassifier(
    estimators=[('svc', model_s), ('DecisionTree', model_d), ('forest', model_r)],
    voting='hard'
)

In [75]:
for model in (model_s, model_d, model_r, voting_model):
    model_name = str(type(model)).split('.')[-1][:-2]
    scores = cross_val_score(model, iris.data, iris.target, cv=5)
    print(f'Accuracy : {scores.mean():.2f} [{model_name}]')

Accuracy : 0.97 [SVC]
Accuracy : 0.96 [DecisionTreeClassifier]
Accuracy : 0.97 [RandomForestClassifier]
Accuracy : 0.97 [VotingClassifier]


![image.png](attachment:image.png)

#### Voting Classifier - soft voting

In [76]:
model_s = SVC(probability=True) # 가중치에의한 확률값 계산이 가능하도록 설정
model_d = DecisionTreeClassifier()
model_r = RandomForestClassifier()
voting_model = VotingClassifier(
    estimators=[('svc', model_s), ('DecisionTree', model_d), ('forest', model_r)],
    voting='soft',
    weights=[1,1,5] # 평균을 위한 모델별 가중치 부여
)

#### 가중치는 리스트 형태로 제공되며, 리스트의 순서는 estimators에 정의된 모델들과 동일한 순서여야 'forest' 순서로 정의

In [77]:
for model in (model_s, model_d, model_r, voting_model):
    model_name = str(type(model)).split('.')[-1][:-2]
    scores = cross_val_score(model, iris.data, iris.target)
    print(f'Accuracy : {scores.mean():.2f} [{model_name}]')

Accuracy : 0.97 [SVC]
Accuracy : 0.96 [DecisionTreeClassifier]
Accuracy : 0.97 [RandomForestClassifier]
Accuracy : 0.97 [VotingClassifier]
