Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectFromModel

  from numpy.core.umath_tests import inner1d


Load data

In [2]:
data = pd.read_csv("preprocessed_data.csv")

Split dataset

In [5]:
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,1:34], data["class"], random_state=69, test_size=0.33)

Naive Bayes

In [18]:
pipe = Pipeline(
        [('select', SelectFromModel(LogisticRegression(C=1.5, penalty='l1', dual=False))),
        ('model', BernoulliNB())])
param_grid = [{}] # Optionally add parameters here
grid_search = GridSearchCV(pipe, param_grid,cv=StratifiedKFold(n_splits=5, random_state=69).split(X_train, y_train), verbose=2)
model = grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV]  ................................................................
[CV] ................................................. , total=   0.8s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s


[CV] ................................................. , total=   1.0s
[CV]  ................................................................
[CV] ................................................. , total=   0.8s
[CV]  ................................................................
[CV] ................................................. , total=   0.9s
[CV]  ................................................................
[CV] ................................................. , total=   0.9s


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    4.7s finished


In [19]:
y_preds = model.predict(X_test)
report = classification_report( y_test, y_preds )
print(report)

             precision    recall  f1-score   support

          0       0.40      0.07      0.11       302
          1       0.65      0.96      0.77       683
          2       0.00      0.00      0.00        71

avg / total       0.53      0.64      0.53      1056



  'precision', 'predicted', average, warn_for)


Linear SVC

In [20]:
pipe = Pipeline(
        [('select', SelectFromModel(LogisticRegression(C=2, penalty='l1', dual=False))),
        ('model', LinearSVC(C=0.75))])
param_grid = [{}] # Optionally add parameters here
grid_search = GridSearchCV(pipe, param_grid,cv=StratifiedKFold().split(X_train, y_train), verbose=2)
model = grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV]  ................................................................
[CV] ................................................. , total=   0.7s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s remaining:    0.0s


[CV] ................................................. , total=   0.5s
[CV]  ................................................................
[CV] ................................................. , total=   0.8s


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    2.3s finished


In [21]:
y_preds = model.predict(X_test)
report = classification_report( y_test, y_preds )
print(report)

             precision    recall  f1-score   support

          0       0.78      0.67      0.72       302
          1       0.79      0.92      0.85       683
          2       0.50      0.01      0.03        71

avg / total       0.77      0.78      0.75      1056



Logistic Regression

In [38]:
pipe = Pipeline(
        [('select', SelectFromModel(LogisticRegression(C=2, penalty='l1',dual=False))),
        ('model', LogisticRegression(C=2,penalty='l2',dual=False))])
param_grid = [{}] # Optionally add parameters here
grid_search = GridSearchCV(pipe, param_grid,cv=StratifiedKFold().split(X_train, y_train), verbose=2)
model = grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV]  ................................................................
[CV] ................................................. , total=   0.7s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s remaining:    0.0s


[CV] ................................................. , total=   0.4s
[CV]  ................................................................
[CV] ................................................. , total=   0.8s


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    2.1s finished


In [39]:
y_preds = model.predict(X_test)
report = classification_report( y_test, y_preds )
print(report)

             precision    recall  f1-score   support

          0       0.80      0.62      0.70       302
          1       0.78      0.93      0.84       683
          2       0.50      0.01      0.03        71

avg / total       0.76      0.78      0.75      1056



Decision Tree

In [40]:
pipe = Pipeline(
        [('select', SelectFromModel(LogisticRegression(C=0.75, penalty='l2', dual=False))),
        ('model', DecisionTreeClassifier())])
param_grid = [{}] # Optionally add parameters here
grid_search = GridSearchCV(pipe, param_grid,cv=StratifiedKFold().split(X_train, y_train), verbose=2)
model = grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV]  ................................................................
[CV] ................................................. , total=   0.0s
[CV]  ................................................................
[CV] ................................................. , total=   0.0s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s finished


[CV] ................................................. , total=   0.0s


In [41]:
y_preds = model.predict(X_test)
report = classification_report( y_test, y_preds )
print(report)

             precision    recall  f1-score   support

          0       0.71      0.70      0.71       302
          1       0.82      0.82      0.82       683
          2       0.48      0.49      0.49        71

avg / total       0.77      0.77      0.77      1056



Random Forest

In [42]:
pipe = Pipeline(
        [('select', SelectFromModel(LogisticRegression(C=0.75,penalty='l1', dual=False))),
        ('model', RandomForestClassifier(n_estimators=300,random_state=0))])
param_grid = [{}] # Optionally add parameters here
grid_search = GridSearchCV(pipe, param_grid,cv=StratifiedKFold().split(X_train, y_train), verbose=2)
model = grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV]  ................................................................
[CV] ................................................. , total=   2.4s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.5s remaining:    0.0s


[CV] ................................................. , total=   2.1s
[CV]  ................................................................
[CV] ................................................. , total=   2.4s


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    7.5s finished


In [43]:
y_preds = model.predict(X_test)
report = classification_report( y_test, y_preds )
print(report)

             precision    recall  f1-score   support

          0       0.85      0.78      0.81       302
          1       0.84      0.93      0.89       683
          2       0.88      0.30      0.44        71

avg / total       0.85      0.85      0.83      1056



Neural Network - Perceptron

In [46]:
pipe = Pipeline(
        [('select', SelectFromModel(LinearSVC(C=0.05,max_iter=2000))),
        ('model', MLPClassifier(activation='relu',solver='lbfgs'))])
param_grid = [{}] # Optionally add parameters here
grid_search = GridSearchCV(pipe, param_grid,cv=StratifiedKFold().split(X_train, y_train), verbose=2)
model = grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[CV]  ................................................................
[CV] ................................................. , total=   1.3s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.3s remaining:    0.0s


[CV] ................................................. , total=   1.3s
[CV]  ................................................................
[CV] ................................................. , total=   1.3s


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    4.0s finished


In [47]:
y_preds = model.predict(X_test)
report = classification_report( y_test, y_preds )
print(report)

             precision    recall  f1-score   support

          0       0.75      0.76      0.76       302
          1       0.84      0.87      0.85       683
          2       0.64      0.38      0.48        71

avg / total       0.80      0.80      0.80      1056

