# Classification with Generative Models

In [6]:
# Practice Problem.
# 1. In the code stub, use Gaussian Naive Bayes model to predict the accuracy on the test iris data (use 2 features)
# 2. In the code stub, use Multinomial Naive Bayes model to predict the accuracy on the test iris data (use 2 features)

In [7]:
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
import numpy as np

# Load the iris dataset
iris = datasets.load_iris()
X = iris.data[:,:2]
y = iris.target

# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [8]:
# Initialize Gaussian Naive Bayes
gnb = GaussianNB()
# Train the classifier
gnb.fit(X_train, y_train)
# Make predictions on test data
y_pred = gnb.predict(X_test)
y_train_pred = gnb.predict(X_train)

# print the accuracy
print ('Training accuracy = ' + str(np.sum(y_train_pred == y_train)/len(y_train)))
print ('Test accuracy = ' + str(np.sum(y_pred == y_test)/len(y_test)))

Training accuracy = 0.775
Test accuracy = 0.766666666667


Now let's repeat this with Multinomial Naive Bayes

In [9]:
from sklearn.naive_bayes import MultinomialNB

# Create validation set from training set
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2)

alphas = [0.1, 0.5, 1, 2, 3, 4, 5, 10, 100]
best_alpha = 0.1
best_acc = 0.0

for alpha in alphas:
    # Initialize
    clf = MultinomialNB(alpha=alpha)
    # Train
    clf.fit(X_train, y_train)
    # Make predictions on validation data
    y_pred = clf.predict(X_valid)
    accuracy = np.sum(y_pred == y_valid)/len(y_valid)
    print ('Validation accuracy = ' + str(accuracy) + ' at alpha = ' + str(alpha))
    if accuracy > best_acc:
        best_acc = accuracy
        best_alpha = alpha

print ('Best alpha = ' + str(best_alpha))        
        
X_train = np.concatenate((X_train, X_valid))
y_train = np.concatenate((y_train, y_valid))

clf = MultinomialNB(alpha=best_alpha)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_train_pred = clf.predict(X_train)

# print the accuracy
print ('Training accuracy = ' + str(np.sum(y_train_pred == y_train)/len(y_train)))
print ('Test accuracy = ' + str(np.sum(y_pred == y_test)/len(y_test)))

Validation accuracy = 0.208333333333 at alpha = 0.1
Validation accuracy = 0.208333333333 at alpha = 0.5
Validation accuracy = 0.208333333333 at alpha = 1
Validation accuracy = 0.208333333333 at alpha = 2
Validation accuracy = 0.208333333333 at alpha = 3
Validation accuracy = 0.208333333333 at alpha = 4
Validation accuracy = 0.208333333333 at alpha = 5
Validation accuracy = 0.166666666667 at alpha = 10
Validation accuracy = 0.166666666667 at alpha = 100
Best alpha = 0.1
Training accuracy = 0.616666666667
Test accuracy = 0.7


Multi-class Discriminant Analysis using LinearDiscriminantAnalysis