Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectFromModel

  from numpy.core.umath_tests import inner1d


Load data

In [6]:
data = pd.read_csv("preprocessed_data_two.csv")

Split dataset

In [7]:
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,1:34], data["class"], random_state=69, test_size=0.33)

Naive Bayes

In [8]:
clf_Bayes = Pipeline([
  ('feature_selection', SelectFromModel(LinearSVC())),
  ('classification', BernoulliNB())
])
clf_Bayes.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('feature_selection', SelectFromModel(estimator=LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0),
        norm_order=1, prefit=False, threshold=None)), ('classification', BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True))])

In [9]:
y_preds = clf_Bayes.predict(X_test)
report = classification_report(y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.72      1.00      0.83       755
          1       0.67      0.01      0.01       301

avg / total       0.70      0.72      0.60      1056



Linear SVC

In [10]:
clf_lns = Pipeline([
  ('feature_selection', SelectFromModel(LinearSVC())),
  ('classification', LinearSVC(C=0.05,random_state=0))
])
clf_lns.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('feature_selection', SelectFromModel(estimator=LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0),
        norm_order=1, prefit=..., max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
     verbose=0))])

In [11]:
y_preds = clf_lns.predict(X_test)
report = classification_report( y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.82      0.95      0.88       755
          1       0.79      0.50      0.61       301

avg / total       0.81      0.82      0.80      1056



Logistic Regression

In [12]:
clf_log = Pipeline([
  ('feature_selection', SelectFromModel(LinearSVC(C=0.05,max_iter=2000))),
  ('classification', LogisticRegression(class_weight='balanced',penalty='l2'))
])
clf_log.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('feature_selection', SelectFromModel(estimator=LinearSVC(C=0.05, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=2000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0),
        norm_order=1, prefit...ty='l2', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False))])

In [13]:
y_preds = clf_log.predict(X_test)
report = classification_report( y_test, y_preds )
print(report)

             precision    recall  f1-score   support

          0       0.91      0.80      0.85       755
          1       0.62      0.79      0.69       301

avg / total       0.82      0.80      0.81      1056



Decision Tree

In [14]:
clf_tree = Pipeline([
  ('feature_selection', SelectFromModel(LinearSVC(C=0.05,max_iter=1500))),
  ('classification', DecisionTreeClassifier())
])
clf_tree.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('feature_selection', SelectFromModel(estimator=LinearSVC(C=0.05, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1500,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0),
        norm_order=1, prefit...      min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'))])

In [15]:
y_preds = clf_tree.predict(X_test)
report = classification_report( y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.87      0.86      0.87       755
          1       0.67      0.68      0.68       301

avg / total       0.81      0.81      0.81      1056



Random Forest

In [16]:
clf_forest = Pipeline([
  ('feature_selection', SelectFromModel(LinearSVC(C=0.05,max_iter=2000))),
  ('classification', RandomForestClassifier(n_estimators=300, random_state=0))
])
clf_forest.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('feature_selection', SelectFromModel(estimator=LinearSVC(C=0.05, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=2000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0),
        norm_order=1, prefit...stimators=300, n_jobs=1,
            oob_score=False, random_state=0, verbose=0, warm_start=False))])

In [17]:
y_preds = clf_forest.predict(X_test)
report = classification_report( y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.89      0.93      0.91       755
          1       0.80      0.70      0.75       301

avg / total       0.86      0.87      0.86      1056



Neural Network - Perceptron

In [18]:
clf_neural = Pipeline([
  ('feature_selection', SelectFromModel(LinearSVC(C=0.05,max_iter=2000))),
  ('classification', MLPClassifier())
])
clf_neural.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('feature_selection', SelectFromModel(estimator=LinearSVC(C=0.05, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=2000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0),
        norm_order=1, prefit...=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False))])

In [19]:
y_preds = clf_neural.predict(X_test)
report = classification_report( y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.87      0.89      0.88       755
          1       0.70      0.66      0.68       301

avg / total       0.82      0.82      0.82      1056

