In [1]:
import numpy as np
import pandas as pd
import random
import os
#from imutils import paths
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score as acc
from sklearn.metrics import precision_score as precision
from sklearn.metrics import recall_score as recall
from sklearn.metrics import f1_score as f1

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from lightgbm import LGBMClassifier
from xgboost.sklearn import XGBClassifier

In [2]:
import warnings
warnings.filterwarnings("ignore")  # "error", "ignore", "always", "default", "module" or "once"

# Binary Classification

In [3]:
#declear path to your data
krono_data_path1 = 'data/kronodroid.csv'
# Importing the dataset
Krono_data = pd.read_csv(krono_data_path1)
Krono_data = Krono_data.sample(frac = 1)
X = Krono_data.iloc[:,range(1,Krono_data.shape[1]-1)].values
y = Krono_data.iloc[:, -1].values

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y.astype(int), test_size = 0.3, random_state = 0)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [4]:
classifiers = {"RFC": RandomForestClassifier(), "SVM": SVC(kernel = 'poly', degree=3), "SGD": SGDClassifier(), "XGB": XGBClassifier(), "LGBM": LGBMClassifier()} 

for classifier_pair in classifiers.items():
    print("---------------------------")
    print(classifier_pair[0])
    
    classifier = classifier_pair[1]
    classifier.fit(X_train, y_train)
    
    # Predicting the Test set results
    y_pred = classifier.predict(X_test)
    print(y_pred)

    # Making the Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    print('Confusion Matrix', cm)

    #compute accuracy_score
    accuracy = acc(y_test, y_pred)
    print('accuracy', accuracy)

    #compute precision score
    precision_score = precision(y_test, y_pred, average='micro')
    print('precision', precision_score)

    #compute recall score
    recall_score = recall(y_test, y_pred)
    print('recall', recall_score)

    #compute f1 score
    f1_score = f1(y_test, y_pred)
    print('f1', f1_score)
    
print("---------------------------")

---------------------------
RFC
[0 1 1 ... 1 1 1]
Confusion Matrix [[10726   335]
 [  795 11586]]
accuracy 0.9517959218496715
precision 0.9517959218496715
recall 0.9357887085049673
f1 0.9535017694016953
---------------------------
SVM
[0 1 1 ... 1 1 1]
Confusion Matrix [[10943   118]
 [ 2080 10301]]
accuracy 0.9062366692261753
precision 0.9062366692261753
recall 0.8320006461513609
f1 0.9035964912280702
---------------------------
SGD
[0 1 1 ... 1 1 1]
Confusion Matrix [[10273   788]
 [  731 11650]]
accuracy 0.9352017745926116
precision 0.9352017745926116
recall 0.9409579193926177
f1 0.938796889479834
---------------------------
XGB
[0 1 1 ... 1 1 1]
Confusion Matrix [[ 9889  1172]
 [  211 12170]]
accuracy 0.9410033273611467
precision 0.9410033273611467
recall 0.9829577578547775
f1 0.94623488706605
---------------------------
LGBM
[0 1 1 ... 1 1 1]
Confusion Matrix [[ 9830  1231]
 [  238 12143]]
accuracy 0.937334698404573
precision 0.937334698404573
recall 0.98077699701155
f1 0.94296253

# Ensemble Learning - Majority Voting

In [5]:
def majority_voting(classifiers):
    y_preds = np.ndarray(shape=(5,len(y_test)))
    i=0
    for classifier_pair in classifiers.items():
        classifier = classifier_pair[1]
        # Predicting the Test set results
        y_preds[i] = classifier.predict(X_test)
        i += 1
    y_preds = np.transpose(y_preds)
    y_pred = []
    for preds in y_preds:
        if sum(preds) >= 3:
            y_pred.append(1)
        else:
            y_pred.append(0)
    #compute accuracy_score
    accuracy = acc(y_test, y_pred)
    print('accuracy', accuracy)
    return accuracy

In [7]:
majority_voting(classifiers)

accuracy 0.9626311748144356


0.9626311748144356