<a href="https://colab.research.google.com/github/ashu433/Machine-Learning-Book-Practice-Q-A/blob/main/Chapter_7_Ensemble_Meathod.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# **Voting Classifier**

In [2]:
log_clf=LogisticRegression()
rnd_fore_clf=RandomForestClassifier()
svc_clf=SVC()

voting_clf=VotingClassifier(estimators=[("log_clf",log_clf),("rand_forest",rnd_fore_clf),("SVC_Clf",svc_clf)])

In [3]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

In [4]:
X,Y=make_moons(n_samples=1000,noise=0.4,random_state=42)

X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)

In [5]:
voting_clf.fit(X_train,Y_train)

In [6]:
Y_predict=voting_clf.predict(X_test)

In [7]:
from sklearn.metrics import accuracy_score

In [8]:
accuracy_score(Y_test,Y_predict)

0.86

In [9]:
for clf in (log_clf,rnd_fore_clf,svc_clf,voting_clf):
  clf.fit(X_train,Y_train)
  Y_predict=clf.predict(X_test)
  print(clf.__class__.__name__,accuracy_score(Y_test,Y_predict))

LogisticRegression 0.82
RandomForestClassifier 0.84
SVC 0.855
VotingClassifier 0.865


# **Bagging and Pasting**

In [10]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [11]:
dec_clf=DecisionTreeClassifier()

In [12]:
bag_clf=BaggingClassifier(estimator=DecisionTreeClassifier(),n_estimators=500,max_samples=100,bootstrap=True,n_jobs=-1)

In [13]:
bag_clf.fit(X_train,Y_train)

In [14]:
Y_predict=bag_clf.predict(X_test)

In [15]:
accuracy_score(Y_test,Y_predict)

0.845

# **Out of bag sampling**

In [16]:
bag_clf=BaggingClassifier(estimator=DecisionTreeClassifier(),n_estimators=500,max_samples=100,bootstrap=True,n_jobs=-1,oob_score=True)

In [17]:
bag_clf.fit(X_train,Y_train)

In [18]:
Y_predict=bag_clf.predict(X_test)

In [19]:
bag_clf.oob_score_

0.85625

In [20]:
accuracy_score(Y_test,Y_predict)

0.855

In [21]:
M=bag_clf.oob_decision_function_

In [22]:
M.shape

(800, 2)

In [23]:
from sklearn.ensemble import RandomForestClassifier

In [24]:
rnf_clf=RandomForestClassifier(n_estimators=500,max_leaf_nodes=16,n_jobs=-1)

In [25]:
rnf_clf.fit(X_train,Y_train)

In [26]:
Y_predict=rnf_clf.predict(X_test)

In [27]:
accuracy_score(Y_test,Y_predict)

0.845

# **Feature importance using Random forest**

In [28]:
from sklearn.datasets import load_iris

In [29]:
iris_dataset=load_iris()

rnf_clf=RandomForestClassifier(n_estimators=500,n_jobs=-1)
rnf_clf.fit(iris_dataset["data"],iris_dataset["target"])
for name,score in zip(iris_dataset["feature_names"],rnf_clf.feature_importances_):
  print(name,score)

sepal length (cm) 0.10507565636456265
sepal width (cm) 0.023228599983778398
petal length (cm) 0.4197480570746191
petal width (cm) 0.45194768657703993


# **Boosting**

Ada Boost

In [30]:
from sklearn.ensemble import AdaBoostClassifier

In [31]:
ada_boost=AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),n_estimators=200,algorithm="SAMME",learning_rate=0.5)

In [32]:
ada_boost.fit(X_train,Y_train)



In [33]:
ada_boost=AdaBoostClassifier(n_estimators=200,algorithm="SAMME",learning_rate=0.5)

In [34]:
ada_boost.fit(X_train,Y_train)



Gradient Boosting

In [35]:
from sklearn.ensemble import GradientBoostingRegressor

In [36]:
gbr=GradientBoostingRegressor(max_depth=2,n_estimators=3,learning_rate=1)

In [37]:
gbr.fit(X_train,Y_train)

In [38]:
Y_predicted=gbr.predict(X_test)

In [39]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [40]:
X_train,X_teat,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)

In [41]:
gbrt=GradientBoostingRegressor(max_depth=2,n_estimators=120)
gbrt.fit(X_train,Y_train)

In [42]:
error_list=[]
for y_pred in gbrt.staged_predict(X_test):
  error=mean_squared_error(Y_test,y_pred)
  error_list.append(error)

In [43]:
len(error_list)

120

In [44]:
best_no_tree=np.argmin(error_list)+1

In [45]:
best_no_tree

78

In [46]:
gbrt=GradientBoostingRegressor(max_depth=2,n_estimators=best_no_tree)
gbrt.fit(X_train,Y_train)

In [47]:
gbrt=GradientBoostingRegressor(max_depth=2,warm_start=True)

min_val=float("inf")
error_up=0

In [48]:
for n_estimator in range(1,120):
  gbrt.n_estimators=n_estimator
  gbrt.fit(X_train,Y_train)
  y_predict=gbrt.predict(X_test)
  error_vale=mean_squared_error(Y_test,y_predict)
  if error_vale<min_val:
    min_val=error_vale
  else:
    error_up=error_up+1
    if error_up==5:
      break

In [49]:
import xgboost

In [50]:
xgbr=xgboost.XGBRegressor()
xgbr.fit(X_train,Y_train)

In [51]:
y_predict=xgbr.predict(X_test)

In [52]:
import xgboost as xgb

dtrain = xgb.DMatrix(X_train, label=Y_train)
dtest = xgb.DMatrix(X_test, label=Y_test)

params = {
    'objective': 'reg:squarederror',  # For regression tasks
    'max_depth': 3,
    'learning_rate': 0.1,
    'eval_metric': 'rmse'
}

model = xgb.train(
    params,
    dtrain,
    num_boost_round=100,
    evals=[(dtest, 'validation')],
    early_stopping_rounds=2
)

y_pred = model.predict(dtest)

[0]	validation-rmse:0.47323
[1]	validation-rmse:0.45044
[2]	validation-rmse:0.43094
[3]	validation-rmse:0.41435
[4]	validation-rmse:0.40037
[5]	validation-rmse:0.38854
[6]	validation-rmse:0.37866
[7]	validation-rmse:0.37101
[8]	validation-rmse:0.36427
[9]	validation-rmse:0.35820
[10]	validation-rmse:0.35331
[11]	validation-rmse:0.34957
[12]	validation-rmse:0.34595
[13]	validation-rmse:0.34370
[14]	validation-rmse:0.34102
[15]	validation-rmse:0.33905
[16]	validation-rmse:0.33683
[17]	validation-rmse:0.33547
[18]	validation-rmse:0.33415
[19]	validation-rmse:0.33259
[20]	validation-rmse:0.33042
[21]	validation-rmse:0.32902
[22]	validation-rmse:0.32721
[23]	validation-rmse:0.32598
[24]	validation-rmse:0.32589
[25]	validation-rmse:0.32571
[26]	validation-rmse:0.32568
[27]	validation-rmse:0.32503
[28]	validation-rmse:0.32370
[29]	validation-rmse:0.32378
[30]	validation-rmse:0.32357
[31]	validation-rmse:0.32334
[32]	validation-rmse:0.32221
[33]	validation-rmse:0.32212
[34]	validation-rmse:0.3