In [1]:
from sklearn.model_selection import train_test_split
import sklearn.metrics
import sklearn.naive_bayes
from sklearn import preprocessing, datasets
import complement_nb
import negation_nb
import universalset_nb
import selective_nb

DATASETS = (
    datasets.load_iris(),
    datasets.fetch_mldata('MNIST original'),
    datasets.fetch_20newsgroups_vectorized(),
    datasets.fetch_mldata("yahoo-web-directory-topics")
)

In [2]:
def benchmark(clf):
    for dataset in DATASETS:
        if hasattr(dataset, 'DESCR'):
            print(dataset.DESCR.splitlines()[0].strip())
        else:
            print('20newsgroups')

        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target,
                                                            test_size=0.2, random_state=42)
        clf.fit(X_train, y_train)
        result = clf.predict(X_test)
        print(sklearn.metrics.classification_report(y_test, result))

In [3]:
benchmark(sklearn.naive_bayes.MultinomialNB())

Iris Plants Database
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        10
          1       0.75      1.00      0.86         9
          2       1.00      0.73      0.84        11

avg / total       0.93      0.90      0.90        30

mldata.org dataset: mnist-original
             precision    recall  f1-score   support

        0.0       0.92      0.90      0.91      1349
        1.0       0.88      0.95      0.91      1581
        2.0       0.88      0.83      0.86      1400
        3.0       0.79      0.82      0.81      1434
        4.0       0.83      0.74      0.79      1328
        5.0       0.85      0.67      0.75      1286
        6.0       0.89      0.92      0.90      1407
        7.0       0.95      0.83      0.88      1476
        8.0       0.66      0.78      0.71      1391
        9.0       0.68      0.82      0.74      1348

avg / total       0.84      0.83      0.83     14000

20newsgroups
             precision  

In [4]:
benchmark(complement_nb.ComplementNB())

Iris Plants Database
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        10
          1       0.00      0.00      0.00         9
          2       0.55      1.00      0.71        11

avg / total       0.54      0.70      0.59        30

mldata.org dataset: mnist-original


  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

        0.0       0.61      0.91      0.73      1349
        1.0       0.70      0.97      0.81      1581
        2.0       0.82      0.73      0.77      1400
        3.0       0.67      0.78      0.72      1434
        4.0       0.93      0.58      0.72      1328
        5.0       0.78      0.46      0.58      1286
        6.0       0.81      0.86      0.83      1407
        7.0       0.65      0.90      0.75      1476
        8.0       0.85      0.38      0.52      1391
        9.0       0.64      0.54      0.58      1348

avg / total       0.74      0.72      0.71     14000

20newsgroups
             precision    recall  f1-score   support

          0       0.97      0.61      0.75        93
          1       0.88      0.79      0.83       118
          2       0.91      0.85      0.88       128
          3       0.74      0.79      0.77       120
          4       0.91      0.84      0.87       102
          5       0.86      0

In [5]:
benchmark(negation_nb.NegationNB())

Iris Plants Database
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        10
          1       0.00      0.00      0.00         9
          2       0.55      1.00      0.71        11

avg / total       0.54      0.70      0.59        30

mldata.org dataset: mnist-original


  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

        0.0       0.61      0.91      0.73      1349
        1.0       0.70      0.97      0.81      1581
        2.0       0.82      0.73      0.77      1400
        3.0       0.67      0.78      0.72      1434
        4.0       0.93      0.58      0.72      1328
        5.0       0.78      0.46      0.58      1286
        6.0       0.81      0.86      0.83      1407
        7.0       0.65      0.90      0.75      1476
        8.0       0.85      0.38      0.52      1391
        9.0       0.64      0.54      0.58      1348

avg / total       0.74      0.72      0.71     14000

20newsgroups
             precision    recall  f1-score   support

          0       0.90      0.81      0.85        93
          1       0.86      0.79      0.82       118
          2       0.90      0.88      0.89       128
          3       0.74      0.78      0.76       120
          4       0.93      0.84      0.89       102
          5       0.87      0

In [6]:
benchmark(universalset_nb.UniversalSetNB())

Iris Plants Database
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        10
          1       1.00      0.11      0.20         9
          2       0.58      1.00      0.73        11

avg / total       0.85      0.73      0.66        30

mldata.org dataset: mnist-original
             precision    recall  f1-score   support

        0.0       0.90      0.91      0.90      1349
        1.0       0.88      0.95      0.91      1581
        2.0       0.88      0.84      0.86      1400
        3.0       0.78      0.83      0.80      1434
        4.0       0.84      0.74      0.79      1328
        5.0       0.87      0.64      0.74      1286
        6.0       0.88      0.92      0.90      1407
        7.0       0.93      0.85      0.89      1476
        8.0       0.68      0.76      0.72      1391
        9.0       0.68      0.81      0.74      1348

avg / total       0.83      0.83      0.83     14000

20newsgroups
             precision  

In [7]:
benchmark(selective_nb.SelectiveNB())

Iris Plants Database
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        10
          1       0.00      0.00      0.00         9
          2       0.55      1.00      0.71        11

avg / total       0.54      0.70      0.59        30

mldata.org dataset: mnist-original


  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

        0.0       0.61      0.91      0.73      1349
        1.0       0.70      0.97      0.81      1581
        2.0       0.82      0.73      0.77      1400
        3.0       0.67      0.78      0.72      1434
        4.0       0.93      0.58      0.72      1328
        5.0       0.78      0.46      0.58      1286
        6.0       0.81      0.86      0.83      1407
        7.0       0.65      0.90      0.75      1476
        8.0       0.85      0.38      0.52      1391
        9.0       0.64      0.54      0.58      1348

avg / total       0.74      0.72      0.71     14000

20newsgroups
             precision    recall  f1-score   support

          0       0.89      0.82      0.85        93
          1       0.86      0.79      0.82       118
          2       0.90      0.88      0.89       128
          3       0.74      0.78      0.76       120
          4       0.93      0.83      0.88       102
          5       0.87      0