In [25]:
import ast 
import numpy as np
import pandas as pd

from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import CategoricalNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import ComplementNB

from sklearn.metrics import recall_score, accuracy_score, precision_score, f1_score, confusion_matrix

In [3]:
%ls data/

[0m[01;32madult.data[0m*  [01;32madult.names[0m*  [01;32madult.test[0m*  [01;32mtest.csv[0m*  [01;32mtrain.csv[0m*


In [6]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

x = train.iloc[:,:-2].to_numpy()
y = train.iloc[:,-1].to_numpy()

x_test = test.iloc[:,:-2].to_numpy()
y_test = test.iloc[:,-1].to_numpy()

# Support Vector Machine

In [13]:
clf = SVC(kernel='rbf')
clf.fit(x, y)
y_pred = clf.predict(x_test)
print('accuracy score:   \t', accuracy_score(y_pred, y_test),
      '\nprecision score:\t', precision_score(y_pred, y_test),
      '\nrecall score:   \t', recall_score(y_pred, y_test),
      '\nf1 score:       \t', f1_score(y_pred, y_test))
print('confusion matrix:\n',confusion_matrix(y_pred, y_test))

accuracy score:   	 0.8228610036238561 
precision score:	 0.4209568382735309 
recall score:   	 0.711335676625659 
f1 score:       	 0.5289121202221495
confusion matrix:
 [[11778  2227]
 [  657  1619]]


In [12]:
clf = SVC(kernel='linear')
clf.fit(x, y)
y_pred = clf.predict(x_test)
print('accuracy score:\t', accuracy_score(y_pred, y_test),
      '\nprecision score:\t', precision_score(y_pred, y_test),
      '\nrecall score:\t', recall_score(y_pred, y_test),
      '\nf1 score:\t', f1_score(y_pred, y_test))
print('confusion matrix:\n',confusion_matrix(y_pred, y_test))

accuracy score:	 0.8032061912658928 
precision score:	 0.31357254290171604 
recall score:	 0.6813559322033899 
f1 score:	 0.4294871794871794
confusion matrix:
 [[11871  2640]
 [  564  1206]]


In [14]:
clf = SVC(kernel='poly')
clf.fit(x, y)
y_pred = clf.predict(x_test)
print('accuracy score:\t', accuracy_score(y_pred, y_test),
      '\nprecision score:\t', precision_score(y_pred, y_test),
      '\nrecall score:\t', recall_score(y_pred, y_test),
      '\nf1 score:\t', f1_score(y_pred, y_test))
print('confusion matrix:\n',confusion_matrix(y_pred, y_test))

accuracy score:	 0.8223082120262883 
precision score:	 0.40977639105564223 
recall score:	 0.7166894042746703 
f1 score:	 0.5214226633581472
confusion matrix:
 [[11812  2270]
 [  623  1576]]


# Gaussian Naive Bayes

In [15]:
clf = GaussianNB()
clf.fit(x, y)
ry_pred = clf.predict(x_test)
print('accuracy score:\t', accuracy_score(y_pred, y_test),
      '\nprecision score:\t', precision_score(y_pred, y_test),
      '\nrecall score:\t', recall_score(y_pred, y_test),
      '\nf1 score:\t', f1_score(y_pred, y_test))
print('confusion matrix:\n',confusion_matrix(y_pred, y_test))

accuracy score:	 0.8223082120262883 
precision score:	 0.40977639105564223 
recall score:	 0.7166894042746703 
f1 score:	 0.5214226633581472
confusion matrix:
 [[11812  2270]
 [  623  1576]]


# Bernoulli Naive Bayes

In [18]:
clf = BernoulliNB()
clf.fit(x, y)
y_pred = clf.predict(x_test)
print('accuracy score:\t', accuracy_score(y_pred, y_test),
      '\nprecision score:\t', precision_score(y_pred, y_test),
      '\nrecall score:\t', recall_score(y_pred, y_test),
      '\nf1 score:\t', f1_score(y_pred, y_test))
print('confusion matrix:\n',confusion_matrix(y_pred, y_test))

accuracy score:	 0.7913518825624961 
precision score:	 0.608164326573063 
recall score:	 0.5530858358950106 
f1 score:	 0.5793188854489164
confusion matrix:
 [[10545  1507]
 [ 1890  2339]]


# Categorical Naive Bayes

In [21]:
clf = CategoricalNB()
clf.fit(x, y)
y_pred = clf.predict(x_test)
print('accuracy score:\t', accuracy_score(y_pred, y_test),
      '\nprecision score:\t', precision_score(y_pred, y_test),
      '\nrecall score:\t', recall_score(y_pred, y_test),
      '\nf1 score:\t', f1_score(y_pred, y_test))
print('confusion matrix:\n',confusion_matrix(y_pred, y_test))

accuracy score:	 0.8158589767213316 
precision score:	 0.7438897555902236 
recall score:	 0.586992203528929 
f1 score:	 0.6561926605504587
confusion matrix:
 [[10422   985]
 [ 2013  2861]]


# Multinomial Naive Bayes

In [23]:
clf = MultinomialNB()
clf.fit(x, y)
y_pred = clf.predict(x_test)
print('accuracy score:\t', accuracy_score(y_pred, y_test),
      '\nprecision score:\t', precision_score(y_pred, y_test),
      '\nrecall score:\t', recall_score(y_pred, y_test),
      '\nf1 score:\t', f1_score(y_pred, y_test))
print('confusion matrix:\n',confusion_matrix(y_pred, y_test))

accuracy score:	 0.7742767643265156 
precision score:	 0.5390015600624025 
recall score:	 0.5215094339622641 
f1 score:	 0.5301112389719984
confusion matrix:
 [[10533  1773]
 [ 1902  2073]]


# Complement Naive Bayes

In [26]:
clf = ComplementNB()
clf.fit(x, y)
y_pred = clf.predict(x_test)
print('accuracy score:\t', accuracy_score(y_pred, y_test),
      '\nprecision score:\t', precision_score(y_pred, y_test),
      '\nrecall score:\t', recall_score(y_pred, y_test),
      '\nf1 score:\t', f1_score(y_pred, y_test))
print('confusion matrix:\n',confusion_matrix(y_pred, y_test))

accuracy score:	 0.6604016952275659 
precision score:	 0.8244929797191888 
recall score:	 0.39514018691588787 
f1 score:	 0.5342431134698004
confusion matrix:
 [[7581  675]
 [4854 3171]]
