## Example (Naive Bayes vs Trees for Iris)

In [1]:
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn import tree
from sklearn.model_selection import GridSearchCV

In [2]:
df = pd.read_csv('Data/Iris/Iris-cleaned.csv')
df.head(5)

Unnamed: 0,sepal length,sepal width,petal length,petal width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
attributes = df.drop(['species'], axis = 1) #continuous -> one hot encode
target = df.species.astype('category')
#bin attributes into three categories (S) short, (M) medium, (L) large
for k in range(attributes.shape[1]):
    attributes.iloc[:,k] = pd.qcut(attributes.iloc[:,k], q = 3, labels=["S", "M", "L"])
attributes.head(10)

Unnamed: 0,sepal length,sepal width,petal length,petal width
0,S,L,S,S
1,S,M,S,S
2,S,M,S,S
3,S,M,S,S
4,S,L,S,S
5,S,L,S,S
6,S,L,S,S
7,S,L,S,S
8,S,S,S,S
9,S,M,S,S


In [4]:
attributes = pd.get_dummies(attributes)
attributes.head(3)

Unnamed: 0,sepal length_S,sepal length_M,sepal length_L,sepal width_S,sepal width_M,sepal width_L,petal length_S,petal length_M,petal length_L,petal width_S,petal width_M,petal width_L
0,1,0,0,0,0,1,1,0,0,1,0,0
1,1,0,0,0,1,0,1,0,0,1,0,0
2,1,0,0,0,1,0,1,0,0,1,0,0


In [5]:
param_grid = {'alpha': [0,1,2,3,4,5,6,7,8,9,10]}

In [6]:
NBayes = MultinomialNB()

In [7]:
NBayesCV = GridSearchCV(NBayes,param_grid=param_grid,n_jobs=-1) 

In [8]:
NBayesCV.fit(attributes,target)



GridSearchCV(estimator=MultinomialNB(), n_jobs=-1,
             param_grid={'alpha': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})

In [9]:
NBayesCV.best_params_

{'alpha': 0}

In [10]:
print('Naive Bayes error rate:', (1- NBayesCV.best_score_).round(2))

Naive Bayes error rate: 0.04
