In [6]:
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import BaggingClassifier 
from sklearn.model_selection import train_test_split 

from sklearn.datasets import load_breast_cancer 

breast_cancer = load_breast_cancer()
data = breast_cancer.data 
target = breast_cancer.target 

X_train, X_test, y_train, y_test = train_test_split(
  data, 
  target, 
  test_size=0.2, 
  random_state=2205, 
  stratify=target 
)

dtr = DecisionTreeClassifier(
  max_depth=3, 
  min_samples_leaf=10
)

bag_bin = BaggingClassifier(
  estimator=dtr, 
  n_estimators=500, 
  random_state=2022
)

bag_bin.fit(X_train, y_train)
y_score = bag_bin.predict_proba(X_test)[:, 1]

from sklearn.metrics import roc_curve, auc 

fpr, tpr, thresholds = roc_curve(y_test, y_score) 
AUC = auc(fpr, tpr)
print(AUC)

0.9973544973544974


In [7]:
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import BaggingClassifier 
from sklearn.model_selection import train_test_split 
from sklearn.datasets import load_iris 

iris = load_iris() 
data = iris.data 
target = iris.target 

X_train, X_test, y_train, y_test = train_test_split(
  data, 
  target, 
  test_size=0.2, 
  random_state=2205, 
  stratify=target
)

dtr = DecisionTreeClassifier(
  max_depth=3, 
  min_samples_leaf=10
)

bag_multi = BaggingClassifier(
  estimator=dtr, 
  n_estimators=500, 
  random_state=2022 
)

bag_multi.fit(X_train, y_train)
y_pred = bag_multi.predict(X_test)

from sklearn.metrics import f1_score 
macro_f1 = f1_score(y_test, y_pred, average="macro")
macro_f1

0.9665831244778613

In [10]:
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import BaggingRegressor 
from sklearn.model_selection import train_test_split 
from sklearn.datasets import load_diabetes 

diabetes = load_diabetes() 
data = diabetes.data 
target = diabetes.target 

X_train, X_test, y_train, y_test = train_test_split(
  data, 
  target, 
  test_size=0.2, 
  random_state=2205 
)

dtr = DecisionTreeClassifier(
  max_depth=3, 
  min_samples_leaf=10 
)

bag_conti = BaggingRegressor(
  estimator=dtr, 
  n_estimators=500, 
  random_state=2022
)

model_bag_conti =bag_conti.fit(X_train, y_train)
y_pred = bag_conti.predict(X_test)

from sklearn.metrics import root_mean_squared_error

rmse = root_mean_squared_error(y_test, y_pred)
print(rmse)


76.8334087628055


In [15]:
### 랜덤포레스트(이진분류모형)

from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import train_test_split 
from sklearn.datasets import load_breast_cancer

breast_cancer = load_breast_cancer()
data = breast_cancer.data 
target = breast_cancer.target 

X_train, X_test, y_train, y_test = train_test_split(
  data, 
  target, 
  test_size=0.2, 
  random_state=2205, 
  stratify=target 
)

rf_bin = RandomForestClassifier(
  n_estimators=500, 
  max_depth=3, 
  min_samples_leaf=10, 
  max_features="sqrt", 
  random_state=2022
)

rf_bin.fit(X_train, y_train)
y_score = rf_bin.predict_proba(X_test)[:, 1]
print(y_score)

from sklearn.metrics import roc_curve, auc 

fpr, tpr, thresholds = roc_curve(y_test, y_score)
print(thresholds)
AUC = auc(fpr, tpr) 
print(AUC)

[0.98099818 0.09079731 0.98484473 0.98004614 0.98677052 0.98945006
 0.08129826 0.987916   0.97716151 0.96929995 0.12100348 0.97620381
 0.0160499  0.02146784 0.37395488 0.88674269 0.45630911 0.97958596
 0.82602614 0.98444679 0.00445563 0.98210796 0.64622102 0.96642203
 0.98187091 0.06033479 0.15449038 0.98446604 0.02067586 0.93320562
 0.97213289 0.96813578 0.36962848 0.98637854 0.97894816 0.00662002
 0.23310394 0.97539262 0.86963843 0.9658736  0.93432569 0.27447534
 0.00953479 0.0796946  0.00626846 0.01550761 0.71673568 0.00407486
 0.5597179  0.98024057 0.04878536 0.97414184 0.78880321 0.96031096
 0.09091248 0.14475582 0.03316477 0.98732336 0.12838329 0.98176835
 0.0258579  0.91625961 0.01166216 0.13466088 0.39041401 0.98887271
 0.01776098 0.98363002 0.75415932 0.98976089 0.98896912 0.98878326
 0.02088798 0.05014248 0.23984069 0.91701141 0.50110812 0.55888534
 0.94219923 0.02763924 0.00473159 0.02560635 0.03585786 0.97706196
 0.98519591 0.90730211 0.0079546  0.97465816 0.02369729 0.0134

In [16]:
### 랜덤포레스트(다지분류)

from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import train_test_split 
from sklearn.datasets import load_iris 

iris = load_iris() 
data = iris.data 
target = iris.target 

X_train, X_test, y_train, y_test = train_test_split(
  data, 
  target, 
  test_size=0.2, 
  random_state=2205, 
  stratify=target 
)

rf_multi = RandomForestClassifier(
  n_estimators=500, 
  max_depth=3, 
  min_samples_leaf=15, 
  max_features="sqrt", 
  random_state=2022
)

rf_multi.fit(X_train, y_train) 
y_pred = rf_multi.predict(X_test)

from sklearn.metrics import f1_score 
macro_f1 = f1_score(y_test, y_pred, average="macro")
macro_f1

0.9665831244778613

In [17]:
### 랜덤포레스트(연속형)
from sklearn.ensemble import RandomForestRegressor 
from sklearn.model_selection import train_test_split 
from sklearn.datasets import load_diabetes

diabetes = load_diabetes() 
data = diabetes.data 
target = diabetes.target 

X_train, X_test, y_train, y_test = train_test_split(
  data, 
  target, 
  test_size=0.2, 
  random_state=2205 
)

rf_conti = RandomForestRegressor(
  n_estimators=500, 
  max_depth=3, 
  min_samples_leaf=10, 
  max_features=3, 
  random_state=2022 
)

rf_conti.fit(X_train, y_train)
y_pred = rf_conti.predict(X_test)

from sklearn.metrics import root_mean_squared_error 

rmse = root_mean_squared_error(y_test, y_pred)
print(rmse)

63.50376483948605


In [31]:
### AdaBoost(이진분류)

from sklearn.ensemble import AdaBoostClassifier 
from sklearn.model_selection import train_test_split 
from sklearn.datasets import load_breast_cancer 

breast_cancer = load_breast_cancer()
data = breast_cancer.data 
target = breast_cancer.target 

X_train, X_test, y_train, y_test = train_test_split(
  data, 
  target, 
  test_size=0.2, 
  random_state=2205, 
  stratify=target 
)

ada_bin = AdaBoostClassifier(
  n_estimators=100, 
  learning_rate=0.5, 
  random_state=2022, 
  algorithm="SAMME"
)

ada_bin.fit(X_train, y_train)
y_score = ada_bin.predict_proba(X_test)[:, 1]

from sklearn.metrics import roc_curve, auc 

fpr, tpr, thresholds = roc_curve(y_test, y_score)
AUC= auc(fpr, tpr)
print(AUC)

1.0
