##Bagging Decision Tree for Classification

In [2]:
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [8]:
filename='/content/pima-indians-diabetes.data.csv'
names=['preg','plas','pres','skin','test','mass','pedi','age','class']
dataframe=read_csv(filename,names=names)

In [10]:
array=dataframe.values
X=array[:,0:8]
Y=array[:,8]
seed=7

In [12]:
kfold= KFold(n_splits=10,random_state=seed,shuffle=True)
cart= DecisionTreeClassifier()
num_trees=100

model=BaggingClassifier(estimator=cart,n_estimators=num_trees,random_state=seed)
results=cross_val_score(model,X,Y,cv=kfold)

In [13]:
results

array([0.76623377, 0.75324675, 0.74025974, 0.77922078, 0.80519481,
       0.79220779, 0.66233766, 0.75324675, 0.78947368, 0.73684211])

In [15]:
print(results.mean())

0.7578263841421736


##Random Forest Classifier

In [17]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

In [19]:
array=dataframe.values
X=array[:,0:8]
Y=array[:,8]

num_trees=100
max_features=3
kfold= KFold(n_splits=10)

In [21]:
model=RandomForestClassifier(n_estimators=num_trees,max_features=max_features)
results=cross_val_score(model,X,Y,cv=kfold)

In [23]:
results

array([0.71428571, 0.84415584, 0.74025974, 0.64935065, 0.79220779,
       0.81818182, 0.80519481, 0.87012987, 0.71052632, 0.77631579])

In [25]:
print(results.mean())

0.7720608339029391


##Boosting Classification(AdaBoost)

In [27]:
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier

In [29]:
filename='/content/pima-indians-diabetes.data.csv'
names=['preg','plas','pres','skin','test','mass','pedi','age','class']
dataframe=read_csv(filename,names=names)

In [31]:
array=dataframe.values
X=array[:,0:8]
Y=array[:,8]

num_trees=100
seed=7
kfold= KFold(n_splits=10,random_state=seed,shuffle=True)

In [33]:
model=AdaBoostClassifier(n_estimators=num_trees,random_state=seed)
results=cross_val_score(model,X,Y,cv=kfold)

In [35]:
results

array([0.76623377, 0.75324675, 0.7012987 , 0.79220779, 0.81818182,
       0.74025974, 0.66233766, 0.79220779, 0.80263158, 0.75      ])

In [37]:
print(results.mean())

0.7578605604921395


##Stacking Classification

In [39]:
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [41]:
filename='/content/pima-indians-diabetes.data.csv'
names=['preg','plas','pres','skin','test','mass','pedi','age','class']
dataframe=read_csv(filename,names=names)

In [43]:
array=dataframe.values
X=array[:,0:8]
Y=array[:,8]

num_trees=100
seed=7
kfold= KFold(n_splits=10)

In [47]:
estimators=[]

model1=LogisticRegression(max_iter=500)
estimators.append(('logistic',model1))

model2=DecisionTreeClassifier()
estimators.append(('cart',model2))

model3=SVC()
estimators.append(('svm',model3))

In [49]:
ensemble=VotingClassifier(estimators)
results=cross_val_score(ensemble,X,Y,cv=kfold)

In [51]:
estimators

[('logistic', LogisticRegression(max_iter=500)),
 ('cart', DecisionTreeClassifier()),
 ('svm', SVC())]

In [53]:
results

array([0.64935065, 0.80519481, 0.72727273, 0.64935065, 0.77922078,
       0.80519481, 0.84415584, 0.85714286, 0.75      , 0.77631579])

In [54]:
print(results.mean())

0.7643198906356801
