In [36]:
import numpy as np
from sklearn.neighbors import NearestCentroid
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC 

def run(x_train, y_train, x_test, y_test, clf):
    clf.fit(x_train, y_train)
    print("    predictions  :", clf.predict(x_test))
    print("    actual labels:", y_test)
    print("    score = %0.7f" % clf.score(x_test, y_test))
    print()


In [37]:

x = np.load("../data/iris/iris_features.npy")
y = np.load("../data/iris/iris_labels.npy")
N = 120 
x_train = x[:N]; x_test = x[N:]
y_train = y[:N]; y_test = y[N:]
xa_train=np.load("../data/iris/iris_train_features_augmented.npy")
ya_train=np.load("../data/iris/iris_train_labels_augmented.npy")
xa_test =np.load("../data/iris/iris_test_features_augmented.npy")
ya_test =np.load("../data/iris/iris_test_labels_augmented.npy")



In [38]:


print("Nearest centroid:")
run(x_train, y_train, x_test, y_test, NearestCentroid())


Nearest centroid:
    predictions  : [0 0 1 1 0 1 2 1 1 1 0 2 1 0 0 1 2 2 1 1 0 2 2 0 1 0 2 1 2 0]
    actual labels: [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 2 1 1 0 2 2 0 1 0 2 1 2 0]
    score = 0.9000000



In [39]:

print("k-NN classifier (k=3):")
run(x_train, y_train, x_test, y_test, KNeighborsClassifier(n_neighbors=3))



k-NN classifier (k=3):
    predictions  : [0 0 1 1 0 1 2 1 1 1 0 2 2 0 0 2 2 1 1 1 0 2 2 0 1 0 2 1 2 0]
    actual labels: [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 2 1 1 0 2 2 0 1 0 2 1 2 0]
    score = 0.9333333



In [40]:

print("Naive Bayes classifier (Gaussian):")
run(x_train, y_train, x_test, y_test, GaussianNB())



Naive Bayes classifier (Gaussian):
    predictions  : [0 0 1 1 0 1 2 1 1 1 0 2 2 0 0 2 2 1 1 1 0 2 2 0 1 0 2 1 2 0]
    actual labels: [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 2 1 1 0 2 2 0 1 0 2 1 2 0]
    score = 0.9333333



In [41]:
print("Naive Bayes classifier (Multinomial):")
run(x_train, y_train, x_test, y_test, MultinomialNB())



Naive Bayes classifier (Multinomial):
    predictions  : [0 0 1 2 0 1 2 1 1 1 0 1 2 0 0 2 2 1 1 1 0 2 2 0 1 0 1 1 2 0]
    actual labels: [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 2 1 1 0 2 2 0 1 0 2 1 2 0]
    score = 0.9000000



Gaussian Naive Bayes (GaussianNB) and Multinomial Naive Bayes (MultinomialNB) are two variants of the Naive Bayes algorithm used for classification tasks. **GaussianNB is designed for continuous data, while MultinomialNB is designed for discrete count data.** GaussianNB models the probability distribution of each feature using a Gaussian distribution, while MultinomialNB models the probability distribution of each feature using a multinomial distribution. GaussianNB is often used for image recognition or text classification, while MultinomialNB is often used for text classification with word counts. Other variants of the Naive Bayes algorithm exist, such as Bernoulli Naive Bayes and Complement Naive Bayes, that are designed to handle different types of data and assumptions about the data distribution.

Sources:
- [Scikit-learn documentation on Naive Bayes](https://scikit-learn.org/stable/modules/naive_bayes.html)

In [42]:

print("Decision Tree classifier:")
run(x_train, y_train, x_test, y_test, DecisionTreeClassifier())


Decision Tree classifier:
    predictions  : [0 0 1 1 0 1 2 1 1 1 0 2 2 0 0 2 2 1 1 1 0 2 2 0 1 0 2 1 2 0]
    actual labels: [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 2 1 1 0 2 2 0 1 0 2 1 2 0]
    score = 0.9333333



In [28]:


print("Random Forest classifier (estimators=5):")
run(xa_train, ya_train, xa_test, ya_test, RandomForestClassifier(n_estimators=5))


Random Forest classifier (estimators=5):
    predictions  : [0 0 1 1 0 1 2 1 1 1 0 2 2 0 0 2 2 1 1 1 0 2 2 0 1 0 2 1 2 0]
    actual labels: [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 2 1 1 0 2 2 0 1 0 2 1 2 0]
    score = 0.9333333



In [29]:

print("SVM (linear, C=1.0):")
run(xa_train, ya_train, xa_test, ya_test, SVC(kernel="linear", C=1.0))
print("SVM (RBF, C=1.0, gamma=0.25):")
run(xa_train, ya_train, xa_test, ya_test, SVC(kernel="rbf", C=1.0, gamma=0.25))
print("SVM (RBF, C=1.0, gamma=0.001, augmented)")
run(xa_train, ya_train, xa_test, ya_test, SVC(kernel="rbf", C=1.0, gamma=0.001))
print("SVM (RBF, C=1.0, gamma=0.001, original)")
run(x_train, y_train, x_test, y_test, SVC(kernel="rbf", C=1.0, gamma=0.001))





SVM (linear, C=1.0):
    predictions  : [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 1 1 1 0 2 2 0 1 0 2 1 2 0]
    actual labels: [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 2 1 1 0 2 2 0 1 0 2 1 2 0]
    score = 0.9666667

SVM (RBF, C=1.0, gamma=0.25):
    predictions  : [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 1 1 1 0 2 2 0 1 0 2 1 2 0]
    actual labels: [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 2 1 1 0 2 2 0 1 0 2 1 2 0]
    score = 0.9666667

SVM (RBF, C=1.0, gamma=0.001, augmented)
    predictions  : [0 0 1 1 0 1 2 1 1 1 0 2 2 0 0 1 2 1 1 1 0 2 2 0 1 0 2 1 2 0]
    actual labels: [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 2 1 1 0 2 2 0 1 0 2 1 2 0]
    score = 0.9000000

SVM (RBF, C=1.0, gamma=0.001, original)
    predictions  : [0 0 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 0 1 1 0 1 0 1 1 1 0]
    actual labels: [0 0 1 2 0 1 2 1 1 1 0 2 2 0 0 2 2 2 1 1 0 2 2 0 1 0 2 1 2 0]
    score = 0.6000000

