In [1]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
X,y=make_classification(n_samples=10000,n_features=12,n_informative=4)

In [3]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [4]:
model=DecisionTreeClassifier(random_state=42)
model.fit(X_train,y_train)
y_pred=model.predict(X_test)
print("accuracy:",accuracy_score(y_test,y_pred))

accuracy: 0.9445


## Bagging using Decision Tree

In [5]:
model=BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.5,
    bootstrap=True,
    random_state=42,
    n_jobs=-1)

In [6]:
model.fit(X_train,y_train)

In [7]:
y_pred=model.predict(X_test)

In [8]:
print("accuracy:",accuracy_score(y_test,y_pred))

accuracy: 0.9585


In [9]:
model.estimators_features_[0].shape

(12,)

In [10]:
model.estimators_samples_[0].shape

(4000,)

## SVM Classifier

In [11]:
model=BaggingClassifier(
    estimator=SVC(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    random_state=42,
    n_jobs=-1)

In [12]:
model.fit(X_train,y_train)
y_pred=model.predict(X_test)
print("accuracy:",accuracy_score(y_test,y_pred))

accuracy: 0.922


# Types Of SVM Classifier

## Pasting

In [13]:
model=BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=False,
    random_state=42,
    verbose=True,
    n_jobs=-1
)

In [14]:
model.fit(X_train,y_train)
y_pred=model.predict(X_test)
print("accuracy:",accuracy_score(y_test,y_pred))

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    4.6s remaining:   13.9s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    5.4s finished
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.2s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.1s finished


accuracy: 0.9555


## Random Subspaces

In [15]:
model=BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=1.0,
    bootstrap=False,
    max_features=0.5,
    bootstrap_features=True,
    random_state=42,
    verbose=True,
    n_jobs=-1
)

In [16]:
model.fit(X_train,y_train)
y_pred=model.predict(X_test)
print("accuracy:",accuracy_score(y_test,y_pred))

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:   16.3s remaining:   49.1s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:   18.9s finished
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.2s remaining:    0.9s


accuracy: 0.938


[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.6s finished


## Random Patches

In [17]:
model=BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    max_features=0.5,
    bootstrap_features=True,
    random_state=42,
    verbose=True,
    n_jobs=-1
)

In [18]:
model.fit(X_train,y_train)
y_pred=model.predict(X_test)
print("accuracy:",accuracy_score(y_test,y_pred))

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    2.8s remaining:    8.6s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    3.3s finished
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.2s


accuracy: 0.9335


[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.2s finished


# OOB Score

In [19]:
model=BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=0.25,
    bootstrap=True,
    oob_score=True,
    random_state=42,
    verbose=True,
    n_jobs=-1
)

In [20]:
model.fit(X_train,y_train)
y_pred=model.predict(X_test)
print("accuracy:",accuracy_score(y_test,y_pred))

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    4.1s remaining:   12.6s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    4.7s finished


accuracy: 0.9535


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:    0.1s finished


In [21]:
model.oob_score_

0.963125

# GRID SEARCH CV

In [22]:
from sklearn.model_selection import GridSearchCV

In [23]:
hyper_parameters = {
    'n_estimators': [100,500], 
    'max_samples': [0.4,0.7],
    'bootstrap' : [True,False],
    'max_features' : [0.4,0.7]
    }

In [24]:
search = GridSearchCV(BaggingClassifier(random_state=42,n_jobs=-1), hyper_parameters, cv=5,n_jobs=-1,verbose=1)

In [25]:
%%time
search.fit(X_train,y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
Wall time: 4min 4s


In [26]:
search.best_score_

0.9648749999999999

In [27]:
search.best_params_

{'bootstrap': False,
 'max_features': 0.7,
 'max_samples': 0.7,
 'n_estimators': 100}