In [6]:
import numpy as np
import pandas as pd
import random
import os
#from imutils import paths
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score as acc
from sklearn.metrics import precision_score as precision
from sklearn.metrics import recall_score as recall
from sklearn.metrics import f1_score as f1

from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from lightgbm import LGBMClassifier
from xgboost.sklearn import XGBClassifier

import autokeras as ak
import keras
from keras.models import Sequential
from keras.layers import Dense
import tensorflow as tf

In [7]:
import warnings
warnings.filterwarnings("ignore")  # "error", "ignore", "always", "default", "module" or "once"

# Binary Classification

In [8]:
#declear path to your data
krono_data_path1 = 'data/kronodroid.csv'
# Importing the dataset
Krono_data = pd.read_csv(krono_data_path1)
Krono_data = Krono_data.sample(frac = 1)
X = Krono_data.iloc[:,range(1,Krono_data.shape[1]-1)].values
y = Krono_data.iloc[:, -1].values

lb = LabelBinarizer()
y = lb.fit_transform(y)

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y.astype(int), test_size = 0.3, random_state = 0)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [9]:
# "GaussianNB": GaussianNB(), "DT": DecisionTreeClassifier(), "MLP": MLPClassifier(random_state=1, max_iter=300),
# 47.9%, 65.8%, 79%,  
classifiers = {"RFC": RandomForestClassifier(), "SVM": SVC(kernel = 'poly', degree=3), "SGD": SGDClassifier(), "XGB": XGBClassifier(), "LGBM": LGBMClassifier()} 

for classifier_pair in classifiers.items():
    print("---------------------------")
    print(classifier_pair[0])
    
    classifier = classifier_pair[1]
    classifier.fit(X_train, y_train)
    
    # Predicting the Test set results
    y_pred = classifier.predict(X_test)
    print(y_pred)

    # Making the Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    print('Confusion Matrix', cm)

    #compute accuracy_score
    accuracy = acc(y_test, y_pred)
    print('accuracy', accuracy)

    #compute precision score
    precision_score = precision(y_test, y_pred, average='micro')
    print('precision', precision_score)

    #compute recall score
    recall_score = recall(y_test, y_pred)
    print('recall', recall_score)

    #compute f1 score
    f1_score = f1(y_test, y_pred)
    print('f1', f1_score)
    
print("---------------------------")

---------------------------
RFC
[0 1 0 ... 1 0 1]
Confusion Matrix [[10744   303]
 [  811 11584]]
accuracy 0.9524784574694992
precision 0.9524784574694992
recall 0.9345703912868092
f1 0.9541223951898525
---------------------------
SVM
[0 1 0 ... 1 0 0]
Confusion Matrix [[10875   172]
 [ 1959 10436]]
accuracy 0.9090947871342036
precision 0.9090947871342036
recall 0.8419524001613554
f1 0.9073599095770117
---------------------------
SGD
[0 1 0 ... 1 0 1]
Confusion Matrix [[ 9966  1081]
 [  717 11678]]
accuracy 0.9233000597218667
precision 0.9233000597218667
recall 0.9421540943929003
f1 0.9285203148604596
---------------------------
XGB
[0 1 0 ... 1 0 1]
Confusion Matrix [[10365   682]
 [  474 11921]]
accuracy 0.9506868014674515
precision 0.9506868014674515
recall 0.9617587736990723
f1 0.9537563005040404
---------------------------
LGBM
[0 1 0 ... 1 0 1]
Confusion Matrix [[ 9895  1152]
 [  369 12026]]
accuracy 0.9351164576401331
precision 0.9351164576401331
recall 0.9702299314239613
f1 0.9