In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import javabridge
from weka.classifiers import Classifier, Evaluation
from weka.core.converters import Loader
import weka.filters as filters
from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection
from weka.core.classes import Random

In [2]:
import weka.core.jvm as jvm
jvm.start()

DEBUG:weka.core.jvm:Adding bundled jars
DEBUG:weka.core.jvm:Classpath=['/usr/local/lib/python3.8/dist-packages/javabridge/jars/rhino-1.7R4.jar', '/usr/local/lib/python3.8/dist-packages/javabridge/jars/runnablequeue.jar', '/usr/local/lib/python3.8/dist-packages/javabridge/jars/cpython.jar', '/usr/local/lib/python3.8/dist-packages/weka/lib/weka.jar', '/usr/local/lib/python3.8/dist-packages/weka/lib/python-weka-wrapper.jar']
DEBUG:weka.core.jvm:MaxHeapSize=default
DEBUG:weka.core.jvm:Package support disabled


In [3]:
# create a list of dataset filenames
dataset_filenames = ['data1.csv', 'data2.csv', 'data3.csv', 'data4.csv', 'data5.csv',
                     'data6.csv', 'data7.csv', 'data8.csv', 'data9.csv', 'data10.csv',
                     'data11.csv', 'data12.csv', 'data13.csv', 'data14.csv', 'data15.csv']

In [4]:
dataset_filenames = ["Class/binaryAllNaturalPlusNormalVsAttacks/" + str(filename) for filename in dataset_filenames]

In [5]:
jrip_accuracy_on_triple = []
loader = Loader(classname="weka.core.converters.CSVLoader")
for filename in dataset_filenames:
    data = loader.load_file(filename)
    data.class_is_last()
    remove_filter = filters.Filter(classname="weka.filters.unsupervised.attribute.RemoveType", options=["-T", "string"])
    remove_filter.inputformat(data)
    data = remove_filter.filter(data)

    # Perform attribute selection
    # Select the best 4 attributes using BestFirst search and CfsSubsetEval evaluation
    search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"])
    eval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"])
    attsel = AttributeSelection()
    attsel.search(search)
    attsel.evaluator(eval)
    attsel.select_attributes(data)
    selected_data = attsel.reduce_dimensionality(data)

    # split the selected dataset into training and testing sets
    train, test = selected_data.train_test_split(85)

    # create a new Evaluation object for the selected attributes
    eval = Evaluation(train)

    # build the classifier on the training data
    base_cls = Classifier(classname="weka.classifiers.rules.JRip", options=["-F", "3", "-N", "2.0", "-O", "2"])
    cls = Classifier(classname="weka.classifiers.meta.AdaBoostM1", options=["-P", "100", "-S", "1", "-I", "10", "-W", base_cls.classname, "--"])
    cls.build_classifier(train)

    # make predictions on the test data using the new Evaluation object
    predictions = eval.test_model(cls, test)

    # print the evaluation summary
    accuracy = eval.percent_correct
    jrip_accuracy_on_triple.append(accuracy)
    print("Accuracy:", accuracy, "%")

Accuracy: 12.751677852348994 %
Accuracy: 16.710526315789473 %
Accuracy: 9.975369458128078 %
Accuracy: 12.179487179487179 %
Accuracy: 8.010335917312661 %
Accuracy: 8.053691275167786 %
Accuracy: 6.751592356687898 %
Accuracy: 15.683814303638645 %
Accuracy: 9.363295880149813 %
Accuracy: 13.293413173652695 %
Accuracy: 2.918781725888325 %
Accuracy: 13.64795918367347 %
Accuracy: 3.034134007585335 %
Accuracy: 9.647979139504564 %
Accuracy: 13.274336283185841 %
