[View in Colaboratory](https://colab.research.google.com/github/duakaran96/ML-AcadView/blob/master/Ensembles.ipynb)

## Imports

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Importing the wine dataset from sklearn

In [0]:
from sklearn.datasets import load_wine

In [0]:
data = load_wine()

### Description of Dataset

In [64]:
print(data.DESCR)

Wine Data Database

Notes
-----
Data Set Characteristics:
    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- 1) Alcohol
 		- 2) Malic acid
 		- 3) Ash
		- 4) Alcalinity of ash  
 		- 5) Magnesium
		- 6) Total phenols
 		- 7) Flavanoids
 		- 8) Nonflavanoid phenols
 		- 9) Proanthocyanins
		- 10)Color intensity
 		- 11)Hue
 		- 12)OD280/OD315 of diluted wines
 		- 13)Proline
        	- class:
                - class_0
                - class_1
                - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:     

### Splitting into features and target

In [0]:
X = data.data
y = data.target

### Standardizing

In [0]:
from sklearn.preprocessing import StandardScaler

In [0]:
scaler = StandardScaler()

In [68]:
scaler.fit(X)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [0]:
X = scaler.transform(X)

### Train Test Split

In [0]:
from sklearn.model_selection import train_test_split

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40, random_state=101)

In [72]:
print(X_train.shape)
print(X_test.shape)

(106, 13)
(72, 13)


## Trying various ensemble methods

### Bagging Tree Classifier

In [0]:
from sklearn.ensemble import BaggingClassifier

In [0]:
baggingClassifier = BaggingClassifier()

In [75]:
baggingClassifier.fit(X_train, y_train)

BaggingClassifier(base_estimator=None, bootstrap=True,
         bootstrap_features=False, max_features=1.0, max_samples=1.0,
         n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
         verbose=0, warm_start=False)

In [0]:
predictionsBagging = baggingClassifier.predict(X_test)

In [0]:
from sklearn.metrics import accuracy_score

In [78]:
print(accuracy_score(y_test, predictionsBagging))

0.9027777777777778


### Random Forest Classifier

In [0]:
from sklearn.ensemble import RandomForestClassifier

In [0]:
randomForest = RandomForestClassifier()

In [81]:
randomForest.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [0]:
predictionsRandomForest = randomForest.predict(X_test)

In [83]:
print(accuracy_score(y_test, predictionsRandomForest))

0.9861111111111112


## Voting ensemble

In [0]:
from sklearn.ensemble import VotingClassifier

In [0]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [0]:
votingClassifier = VotingClassifier([('tree', DecisionTreeClassifier()), ('log', LogisticRegression()), ('svc', SVC())])

In [87]:
votingClassifier.fit(X_train, y_train)

VotingClassifier(estimators=[('tree', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_le...,
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))],
         flatten_transform=None, n_jobs=1, voting='hard', weights=None)

In [88]:
predictionsVoting = votingClassifier.predict(X_test)

  if diff:


In [89]:
print(accuracy_score(y_test, predictionsVoting))

0.9861111111111112


### Gradient Boosting

In [0]:
from sklearn.ensemble import GradientBoostingClassifier

In [0]:
gbc = GradientBoostingClassifier()

In [92]:
gbc.fit(X_train, y_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False)

In [0]:
predictionsGBC = gbc.predict(X_test)

In [94]:
print(accuracy_score(y_test, predictionsGBC))

0.8333333333333334
