Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier

  from numpy.core.umath_tests import inner1d


Load data

In [2]:
data =pd.read_csv("preprocessed_data.csv")

Split dataset

In [3]:
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,1:34], data["class"], test_size=0.33, random_state=42)

Naive Bayes

In [4]:
clf_Bayes = BernoulliNB()
clf_Bayes.fit(X_train, y_train)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

In [5]:
y_preds = clf_Bayes.predict(X_test)
report = classification_report( y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.39      0.21      0.27       290
          1       0.66      0.86      0.75       691
          2       0.00      0.00      0.00        75

avg / total       0.54      0.62      0.57      1056



  'precision', 'predicted', average, warn_for)


SVM

In [6]:
clf_svm = SVC(C=0.1)
clf_svm.fit(X_train,y_train)

SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [7]:
y_preds = clf_svm.predict(X_test)
report = classification_report( y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.75      0.03      0.06       290
          1       0.66      1.00      0.79       691
          2       0.00      0.00      0.00        75

avg / total       0.64      0.66      0.54      1056



  'precision', 'predicted', average, warn_for)


Linear SVC

In [26]:
clf_lns = LinearSVC(C = 0.75)
clf_lns.fit(X_train,y_train)

LinearSVC(C=0.75, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [27]:
y_preds = clf_lns.predict(X_test)
report = classification_report( y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.35      0.67      0.46       290
          1       0.78      0.03      0.05       691
          2       0.11      0.73      0.20        75

avg / total       0.62      0.25      0.17      1056



Logistic Regression

In [30]:
clf_Logistic = LogisticRegression(C = 0.75)
clf_Logistic.fit(X_train,y_train)

LogisticRegression(C=0.75, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [31]:
y_preds = clf_Logistic.predict(X_test)
report = classification_report( y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.77      0.56      0.65       290
          1       0.76      0.93      0.84       691
          2       0.00      0.00      0.00        75

avg / total       0.71      0.76      0.72      1056



  'precision', 'predicted', average, warn_for)


Decision Tree

In [12]:
clf_tree = DecisionTreeClassifier()
clf_tree.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [13]:
y_preds = clf_tree.predict(X_test)
report = classification_report( y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.64      0.64      0.64       290
          1       0.79      0.78      0.78       691
          2       0.36      0.41      0.39        75

avg / total       0.72      0.71      0.72      1056



Random Forest

In [14]:
clf_Forest = RandomForestClassifier()
clf_Forest.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [15]:
y_preds = clf_Forest.predict(X_test)
report = classification_report( y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.77      0.80      0.78       290
          1       0.84      0.90      0.87       691
          2       0.90      0.24      0.38        75

avg / total       0.83      0.82      0.81      1056



Neural Network - Perceptron

In [16]:
clf_neural = MLPClassifier()
clf_neural.fit(X_train,y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [17]:
y_preds = clf_neural.predict(X_test)
report = classification_report( y_test, y_preds)
print(report)

             precision    recall  f1-score   support

          0       0.53      0.06      0.10       290
          1       0.66      0.97      0.78       691
          2       0.08      0.01      0.02        75

avg / total       0.58      0.65      0.54      1056

