In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as ply
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,log_loss
from sklearn.model_selection import train_test_split,StratifiedKFold,RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,StackingClassifier,GradientBoostingClassifier

In [2]:
bcancer=pd.read_csv('BreastCancer.csv',index_col=0)

In [3]:
lbl=LabelEncoder()

In [4]:
bcancer['Class']=lbl.fit_transform(bcancer['Class'])

In [5]:
X=bcancer.drop('Class',axis=1)
y=bcancer['Class']

In [6]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=23,stratify=y)

In [7]:
lr=LogisticRegression()

In [8]:
svm=SVC(kernel='linear',random_state=23,probability=True)
dtc=DecisionTreeClassifier(random_state=23)
rf=RandomForestClassifier(random_state=23)

In [9]:
stack=StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=rf)

In [10]:
stack.fit(X_train,y_train)

In [11]:
y_pred=stack.predict(X_test)
accuracy_score(y_test,y_pred)

0.9523809523809523

# passthrough

In [12]:
stack=StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=rf,passthrough=True)

In [13]:
stack.fit(X_train,y_train)

In [14]:
y_pred=stack.predict(X_test)
accuracy_score(y_test,y_pred)

0.9523809523809523

#Grid search CV

In [15]:
gbm=GradientBoostingClassifier(random_state=23)

In [16]:
stack=StackingClassifier([('LR',lr),('SVM',svm),('TREE',dtc)],final_estimator=gbm,passthrough=True)

In [17]:
stack.get_params()

{'cv': None,
 'estimators': [('LR', LogisticRegression()),
  ('SVM', SVC(kernel='linear', probability=True, random_state=23)),
  ('TREE', DecisionTreeClassifier(random_state=23))],
 'final_estimator__ccp_alpha': 0.0,
 'final_estimator__criterion': 'friedman_mse',
 'final_estimator__init': None,
 'final_estimator__learning_rate': 0.1,
 'final_estimator__loss': 'log_loss',
 'final_estimator__max_depth': 3,
 'final_estimator__max_features': None,
 'final_estimator__max_leaf_nodes': None,
 'final_estimator__min_impurity_decrease': 0.0,
 'final_estimator__min_samples_leaf': 1,
 'final_estimator__min_samples_split': 2,
 'final_estimator__min_weight_fraction_leaf': 0.0,
 'final_estimator__n_estimators': 100,
 'final_estimator__n_iter_no_change': None,
 'final_estimator__random_state': 23,
 'final_estimator__subsample': 1.0,
 'final_estimator__tol': 0.0001,
 'final_estimator__validation_fraction': 0.1,
 'final_estimator__verbose': 0,
 'final_estimator__warm_start': False,
 'final_estimator': G

In [18]:
params={'TREE__max_depth':[2,3,4,5,6,None],'TREE__min_samples_split':[2,5,10],
        'TREE__min_samples_leaf':[1,2,5,7,10,15],'LR__penalty': ['l1','l2',None],'SVM__C':np.linspace(0.001,3,5),
        'final_estimator__learning_rate':[0.1,0.2],'final_estimator__max_depth':[2,3,4,5,6,None]}

In [19]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)

In [21]:
rgcv_stack=RandomizedSearchCV(stack,param_distributions=params,verbose=3,cv=kfold,scoring='neg_log_loss',n_iter=50)

In [22]:
rgcv_stack.fit(X,y)

Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV 1/5] END LR__penalty=l2, SVM__C=2.25025, TREE__max_depth=5, TREE__min_samples_leaf=7, TREE__min_samples_split=10, final_estimator__learning_rate=0.1, final_estimator__max_depth=4;, score=-0.193 total time=   0.7s
[CV 2/5] END LR__penalty=l2, SVM__C=2.25025, TREE__max_depth=5, TREE__min_samples_leaf=7, TREE__min_samples_split=10, final_estimator__learning_rate=0.1, final_estimator__max_depth=4;, score=-0.127 total time=   0.7s
[CV 3/5] END LR__penalty=l2, SVM__C=2.25025, TREE__max_depth=5, TREE__min_samples_leaf=7, TREE__min_samples_split=10, final_estimator__learning_rate=0.1, final_estimator__max_depth=4;, score=-0.065 total time=   0.7s
[CV 4/5] END LR__penalty=l2, SVM__C=2.25025, TREE__max_depth=5, TREE__min_samples_leaf=7, TREE__min_samples_split=10, final_estimator__learning_rate=0.1, final_estimator__max_depth=4;, score=-0.236 total time=   0.6s
[CV 5/5] END LR__penalty=l2, SVM__C=2.25025, TREE__max_depth=5, TREE__

[CV 1/5] END LR__penalty=l2, SVM__C=1.5005, TREE__max_depth=3, TREE__min_samples_leaf=2, TREE__min_samples_split=5, final_estimator__learning_rate=0.1, final_estimator__max_depth=5;, score=-0.337 total time=   0.6s
[CV 2/5] END LR__penalty=l2, SVM__C=1.5005, TREE__max_depth=3, TREE__min_samples_leaf=2, TREE__min_samples_split=5, final_estimator__learning_rate=0.1, final_estimator__max_depth=5;, score=-0.121 total time=   0.6s
[CV 3/5] END LR__penalty=l2, SVM__C=1.5005, TREE__max_depth=3, TREE__min_samples_leaf=2, TREE__min_samples_split=5, final_estimator__learning_rate=0.1, final_estimator__max_depth=5;, score=-0.077 total time=   0.6s
[CV 4/5] END LR__penalty=l2, SVM__C=1.5005, TREE__max_depth=3, TREE__min_samples_leaf=2, TREE__min_samples_split=5, final_estimator__learning_rate=0.1, final_estimator__max_depth=5;, score=-0.425 total time=   0.6s
[CV 5/5] END LR__penalty=l2, SVM__C=1.5005, TREE__max_depth=3, TREE__min_samples_leaf=2, TREE__min_samples_split=5, final_estimator__learnin

[CV 4/5] END LR__penalty=None, SVM__C=1.5005, TREE__max_depth=2, TREE__min_samples_leaf=5, TREE__min_samples_split=2, final_estimator__learning_rate=0.1, final_estimator__max_depth=4;, score=-0.269 total time=   0.5s
[CV 5/5] END LR__penalty=None, SVM__C=1.5005, TREE__max_depth=2, TREE__min_samples_leaf=5, TREE__min_samples_split=2, final_estimator__learning_rate=0.1, final_estimator__max_depth=4;, score=-0.286 total time=   0.5s
[CV 1/5] END LR__penalty=l1, SVM__C=0.75075, TREE__max_depth=5, TREE__min_samples_leaf=15, TREE__min_samples_split=10, final_estimator__learning_rate=0.1, final_estimator__max_depth=2;, score=nan total time=   0.0s
[CV 2/5] END LR__penalty=l1, SVM__C=0.75075, TREE__max_depth=5, TREE__min_samples_leaf=15, TREE__min_samples_split=10, final_estimator__learning_rate=0.1, final_estimator__max_depth=2;, score=nan total time=   0.0s
[CV 3/5] END LR__penalty=l1, SVM__C=0.75075, TREE__max_depth=5, TREE__min_samples_leaf=15, TREE__min_samples_split=10, final_estimator__

[CV 2/5] END LR__penalty=None, SVM__C=1.5005, TREE__max_depth=None, TREE__min_samples_leaf=15, TREE__min_samples_split=2, final_estimator__learning_rate=0.1, final_estimator__max_depth=5;, score=-0.129 total time=   0.6s
[CV 3/5] END LR__penalty=None, SVM__C=1.5005, TREE__max_depth=None, TREE__min_samples_leaf=15, TREE__min_samples_split=2, final_estimator__learning_rate=0.1, final_estimator__max_depth=5;, score=-0.078 total time=   0.6s
[CV 4/5] END LR__penalty=None, SVM__C=1.5005, TREE__max_depth=None, TREE__min_samples_leaf=15, TREE__min_samples_split=2, final_estimator__learning_rate=0.1, final_estimator__max_depth=5;, score=-0.355 total time=   0.6s
[CV 5/5] END LR__penalty=None, SVM__C=1.5005, TREE__max_depth=None, TREE__min_samples_leaf=15, TREE__min_samples_split=2, final_estimator__learning_rate=0.1, final_estimator__max_depth=5;, score=-0.379 total time=   0.5s
[CV 1/5] END LR__penalty=l2, SVM__C=0.75075, TREE__max_depth=6, TREE__min_samples_leaf=7, TREE__min_samples_split=2,

[CV 1/5] END LR__penalty=None, SVM__C=0.001, TREE__max_depth=6, TREE__min_samples_leaf=1, TREE__min_samples_split=2, final_estimator__learning_rate=0.2, final_estimator__max_depth=6;, score=-0.961 total time=   0.5s
[CV 2/5] END LR__penalty=None, SVM__C=0.001, TREE__max_depth=6, TREE__min_samples_leaf=1, TREE__min_samples_split=2, final_estimator__learning_rate=0.2, final_estimator__max_depth=6;, score=-0.262 total time=   0.5s
[CV 3/5] END LR__penalty=None, SVM__C=0.001, TREE__max_depth=6, TREE__min_samples_leaf=1, TREE__min_samples_split=2, final_estimator__learning_rate=0.2, final_estimator__max_depth=6;, score=-0.175 total time=   0.5s
[CV 4/5] END LR__penalty=None, SVM__C=0.001, TREE__max_depth=6, TREE__min_samples_leaf=1, TREE__min_samples_split=2, final_estimator__learning_rate=0.2, final_estimator__max_depth=6;, score=-0.982 total time=   0.5s
[CV 5/5] END LR__penalty=None, SVM__C=0.001, TREE__max_depth=6, TREE__min_samples_leaf=1, TREE__min_samples_split=2, final_estimator__le

[CV 4/5] END LR__penalty=l2, SVM__C=0.75075, TREE__max_depth=4, TREE__min_samples_leaf=15, TREE__min_samples_split=5, final_estimator__learning_rate=0.2, final_estimator__max_depth=4;, score=-0.363 total time=   0.5s
[CV 5/5] END LR__penalty=l2, SVM__C=0.75075, TREE__max_depth=4, TREE__min_samples_leaf=15, TREE__min_samples_split=5, final_estimator__learning_rate=0.2, final_estimator__max_depth=4;, score=-0.472 total time=   0.4s
[CV 1/5] END LR__penalty=None, SVM__C=3.0, TREE__max_depth=3, TREE__min_samples_leaf=2, TREE__min_samples_split=10, final_estimator__learning_rate=0.2, final_estimator__max_depth=4;, score=-0.389 total time=   0.6s
[CV 2/5] END LR__penalty=None, SVM__C=3.0, TREE__max_depth=3, TREE__min_samples_leaf=2, TREE__min_samples_split=10, final_estimator__learning_rate=0.2, final_estimator__max_depth=4;, score=-0.141 total time=   0.7s
[CV 3/5] END LR__penalty=None, SVM__C=3.0, TREE__max_depth=3, TREE__min_samples_leaf=2, TREE__min_samples_split=10, final_estimator__lea

[CV 3/5] END LR__penalty=None, SVM__C=0.001, TREE__max_depth=5, TREE__min_samples_leaf=1, TREE__min_samples_split=10, final_estimator__learning_rate=0.2, final_estimator__max_depth=3;, score=-0.066 total time=   0.4s
[CV 4/5] END LR__penalty=None, SVM__C=0.001, TREE__max_depth=5, TREE__min_samples_leaf=1, TREE__min_samples_split=10, final_estimator__learning_rate=0.2, final_estimator__max_depth=3;, score=-0.240 total time=   0.4s
[CV 5/5] END LR__penalty=None, SVM__C=0.001, TREE__max_depth=5, TREE__min_samples_leaf=1, TREE__min_samples_split=10, final_estimator__learning_rate=0.2, final_estimator__max_depth=3;, score=-0.343 total time=   0.4s
[CV 1/5] END LR__penalty=l1, SVM__C=0.001, TREE__max_depth=4, TREE__min_samples_leaf=7, TREE__min_samples_split=2, final_estimator__learning_rate=0.1, final_estimator__max_depth=6;, score=nan total time=   0.0s
[CV 2/5] END LR__penalty=l1, SVM__C=0.001, TREE__max_depth=4, TREE__min_samples_leaf=7, TREE__min_samples_split=2, final_estimator__learni

90 fits failed out of a total of 250.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
90 fits failed with the following error:
Traceback (most recent call last):
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/ensemble/_stacking.py", line 658, in fit
    return super().fit(X, y_encoded, sample_weight)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dai/anaconda3/lib/python3.11/site-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  

In [23]:
rgcv_stack.best_params_

{'final_estimator__max_depth': 2,
 'final_estimator__learning_rate': 0.1,
 'TREE__min_samples_split': 10,
 'TREE__min_samples_leaf': 1,
 'TREE__max_depth': 6,
 'SVM__C': 3.0,
 'LR__penalty': None}

In [24]:
rgcv_stack.best_score_

-0.12341388732490091