# Classification with Generative Models

This notebook covers the following topics:
-> Gaussion Naive Bayes
-> Multinomial NB
-> Multi-class Discriminant Analysis using Linear/Quadratic Analysis

Again, we will be using the Iris dataset

In [None]:
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
import numpy as np

# Load the iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

In [None]:
# Initialize Gaussian Naive Bayes
gnb = GaussianNB()
# Train the classifier
gnb.fit(X_train, y_train)
# Make predictions on test data
y_pred = gnb.predict(X_test)
y_train_pred = gnb.predict(X_train)

# print the accuracy
print ('Training accuracy = ' + str(np.sum(y_train_pred == y_train)/len(y_train)))
print ('Test accuracy = ' + str(np.sum(y_pred == y_test)/len(y_test)))

Now let's repeat this with Multinomial Naive Bayes

In [None]:
from sklearn.naive_bayes import MultinomialNB

# Create validation set from training set
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2)

alphas = [0.1, 0.5, 1, 2, 3, 4, 5, 10, 100]
best_alpha = 0.1
best_acc = 0.0

for alpha in alphas:
    # Initialize
    clf = MultinomialNB(alpha=alpha)
    # Train
    clf.fit(X_train, y_train)
    # Make predictions on validation data
    y_pred = clf.predict(X_valid)
    accuracy = np.sum(y_pred == y_valid)/len(y_valid)
    print ('Validation accuracy = ' + str(accuracy) + ' at alpha = ' + str(alpha))
    if accuracy > best_acc:
        best_acc = accuracy
        best_alpha = alpha

print ('Best alpha = ' + str(best_alpha))        
        
X_train = np.concatenate((X_train, X_valid))
y_train = np.concatenate((y_train, y_valid))

clf = MultinomialNB(alpha=best_alpha)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
y_train_pred = clf.predict(X_train)

# print the accuracy
print ('Training accuracy = ' + str(np.sum(y_train_pred == y_train)/len(y_train)))
print ('Test accuracy = ' + str(np.sum(y_pred == y_test)/len(y_test)))

Multi-class Discriminant Analysis using LinearDiscriminantAnalysis

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Intialize
clf = LinearDiscriminantAnalysis()
# Train
clf.fit(X_train, y_train)
# Test
y_pred = clf.predict(X_test)
y_train_pred = clf.predict(X_train)

# print the accuracy
print ('Training accuracy = ' + str(np.sum(y_train_pred == y_train)/len(y_train)))
print ('Test accuracy = ' + str(np.sum(y_pred == y_test)/len(y_test)))

Multi-class Discriminant Analysis using QuadraticDiscriminantAnalysis

In [None]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

# Intialize
clf = QuadraticDiscriminantAnalysis()
# Train
clf.fit(X_train, y_train)
# Test
y_pred = clf.predict(X_test)
y_train_pred = clf.predict(X_train)

# print the accuracy
print ('Training accuracy = ' + str(np.sum(y_train_pred == y_train)/len(y_train)))
print ('Test accuracy = ' + str(np.sum(y_pred == y_test)/len(y_test)))