# Voting

In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

# Load the dataset 
iris = load_breast_cancer()
X = iris.data
y = iris.target

# Normalize
scaler.fit(X)
X = scaler.fit_transform(X)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.25,random_state=42)

In [16]:
# Different models
log_clf = LogisticRegression()
svm_clf = SVC()
tree_clf = DecisionTreeClassifier()

# Ensemble learning of different models
voting_clf = VotingClassifier(
  estimators=[('lr', log_clf), ('svc', svm_clf), ('tree_clf', tree_clf)],
  voting='hard'
)

In [17]:
for clf in (log_clf, svm_clf, tree_clf, voting_clf):
  clf.fit(X_train, y_train)
  y_pred = clf.predict(X_test)
  print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.986013986013986
SVC 0.9790209790209791
DecisionTreeClassifier 0.951048951048951
VotingClassifier 0.986013986013986


In [None]:
log_clf.fit(X_train,y_train)
print(log_clf.predict_proba(X_test))

# Boostraping

In [9]:
from sklearn.ensemble import BaggingClassifier

bag_clf = BaggingClassifier(
  DecisionTreeClassifier(), 
  n_estimators = 500, 
  bootstrap=True, 
  max_samples = 100,
  oob_score=True
)
bag_clf.fit(X_train,y_train)
y_pred = bag_clf.predict(X_test)

In [90]:
bag_clf.oob_score_


0.9483568075117371

In [91]:
print(accuracy_score(y_test, y_pred))

0.965034965034965


In [80]:
bag_clf.estimators_samples_[0].shape

(300,)

In [55]:
X_train.shape

(426, 30)

# Random Forest

In [7]:
from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(n_estimators=500,max_depth= 20, max_leaf_nodes=20)
rnd_clf.fit(X_train, y_train)
y_pred = rnd_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.972027972027972

In [92]:
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y_train)
y_pred_rf = rnd_clf.predict(X_test)
accuracy_score(y_test, y_pred_rf)

0.972027972027972

In [15]:
bag_clf = BaggingClassifier(
 DecisionTreeClassifier(splitter="random", max_depth= 20, max_leaf_nodes=20,),
 n_estimators=500, max_samples=1.0, bootstrap=True)
bag_clf.fit(X_train,y_train)
y_pred_rf = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred_rf)

0.972027972027972