In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

In [3]:
df = pd.read_csv("Wine.csv")

In [4]:
df.head()

Unnamed: 0,Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline,Customer_Segment
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065,1
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050,1
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185,1
3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480,1
4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735,1


In [5]:
x = df.drop('Customer_Segment', axis=1)
y = df['Customer_Segment']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [6]:
lr = LogisticRegression()
lr.fit(x_train, y_train)
lr.score(x_test, y_test)

0.9629629629629629

In [7]:
dt = DecisionTreeClassifier()
dt.fit(x_train, y_train)
dt.score(x_test, y_test)

0.9074074074074074

In [8]:
#Naive Aggregartion
from sklearn.ensemble import VotingClassifier
# estimators are list of tuples (name of algo, object of algo)
estimator_list = [('Logistic Reg', LogisticRegression()),
              ('DT gini', DecisionTreeClassifier()),
              ('DT entropy', DecisionTreeClassifier(criterion='entropy'))]
# Hard voting
hard = VotingClassifier(estimators=estimator_list)
hard.fit(x_train, y_train)
print("Hard Voting = ", hard.score(x_test, y_test))
#Soft Voting
soft = VotingClassifier(estimators=estimator_list, voting="soft")
soft.fit(x_train, y_train)
print("Soft Voting = ", soft.score(x_test, y_test))

Hard Voting =  0.9629629629629629
Soft Voting =  0.9629629629629629


In [9]:
df.shape

(178, 14)

In [10]:
#Stacking
from mlxtend.classifier import StackingClassifier
#list base model
classifier_list = [DecisionTreeClassifier(), 
                   DecisionTreeClassifier(), 
                   DecisionTreeClassifier(),
                   DecisionTreeClassifier()]
#meta classifier
meta_class = LogisticRegression()
stack = StackingClassifier(classifiers=classifier_list, meta_classifier=meta_class, use_probas=True)
stack.fit(x_train, y_train)
stack.score(x_test, y_test)

0.9444444444444444

In [11]:
#Bootstrap Aggregation
from sklearn.ensemble import BaggingClassifier
#Bagging
bagging = BaggingClassifier(LogisticRegression(), n_estimators=4, max_samples=40)
bagging.fit(x_train, y_train)
print("Bagging with Logistic Regression = ", bagging.score(x_test, y_test))
bagging = BaggingClassifier(DecisionTreeClassifier(), n_estimators=4, max_samples=40)
bagging.fit(x_train, y_train)
print("Bagging with Decision Tree = ", bagging.score(x_test, y_test))
#Pasting
pasting = BaggingClassifier(LogisticRegression(), n_estimators=4, max_samples=40, bootstrap=False)
pasting.fit(x_train, y_train)
print("pasting with Logistic Regression = ", pasting.score(x_test, y_test))
pasting = BaggingClassifier(DecisionTreeClassifier(), n_estimators=4, max_samples=40, bootstrap=False)
pasting.fit(x_train, y_train)
print("pasting with DT = ", pasting.score(x_test, y_test))

Bagging with Logistic Regression =  0.9259259259259259
Bagging with Decision Tree =  0.9074074074074074
pasting with Logistic Regression =  0.9629629629629629
pasting with DT =  0.9814814814814815


In [12]:
#Ada Boosting
from sklearn.ensemble import AdaBoostClassifier
ada = AdaBoostClassifier(base_estimator=LogisticRegression())
ada.fit(x_train, y_train)
ada.score(x_test, y_test)

0.9444444444444444

In [13]:
#Gradient Tree Boosting
from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier()
gb.fit(x_train, y_train)
gb.score(x_test, y_test)

0.9814814814814815

In [14]:
# pip install xgboost

In [15]:
from xgboost import XGBClassifier
xgb = XGBClassifier(base_estimator=LogisticRegression())
xgb.fit(x_train, y_train)
xgb.score(x_test, y_test)

0.9814814814814815

In [16]:
# pip install imbalanced-learn