In [1]:
from numpy import mean
from numpy import std
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import StackingClassifier
from matplotlib import pyplot

In [2]:
import pandas as pd

In [3]:
def get_dataset():
    from sklearn.model_selection import train_test_split
    col_names = ['Direction', 'Saved', 'Verified', 'NoOfArtifacts', 'NoOfSensitiveInformation', 'URLReputationScore', 'DomainReputationScore', 'IPReputationScore', 'EmailReputationScore','PhoneNumbersReputationScore','CryptoWalletReputationScore','ContextualAnalysisScore','Verdict']
    data = pd.read_csv('sample4.csv', header=None, names=col_names)
    feature_cols = ['Direction', 'Saved', 'Verified', 'NoOfArtifacts', 'NoOfSensitiveInformation', 'URLReputationScore', 'DomainReputationScore', 'IPReputationScore', 'EmailReputationScore','PhoneNumbersReputationScore','CryptoWalletReputationScore','ContextualAnalysisScore']
    X = data[feature_cols] # Features
    y = data.Verdict
    return X, y

In [4]:
# get a stacking ensemble of models
def get_stacking():
 # define the base models
 level0 = list()
 level0.append(('lr', LogisticRegression()))
 level0.append(('knn', KNeighborsClassifier()))
 level0.append(('cart', DecisionTreeClassifier()))
 level0.append(('svm', SVC()))
 level0.append(('bayes', GaussianNB()))
 # define meta learner model
 level1 = LogisticRegression()
 # define the stacking ensemble
 model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5)
 return model

In [5]:
def get_models():
 models = dict()
 models['lr'] = LogisticRegression()
 models['knn'] = KNeighborsClassifier()
 models['cart'] = DecisionTreeClassifier()
 models['svm'] = SVC()
 models['bayes'] = GaussianNB()
 models['stacking'] = get_stacking()
 return models

In [6]:
def evaluate_model(model, X, y):
 cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
 scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
 return scores

In [7]:
X, y = get_dataset()
# get the models to evaluate
models = get_models()
# evaluate the models and store results
results, names = list(), list()
for name, model in models.items():
 scores = evaluate_model(model, X, y)
 results.append(scores)
 names.append(name)
 print('>%s %.3f (%.3f)' % (name, mean(scores), std(scores)))

>lr 0.684 (0.040)
>knn 0.979 (0.014)
>cart 0.980 (0.012)
>svm 0.919 (0.028)
>bayes 0.595 (0.051)
>stacking 0.980 (0.012)
