# Run Weka Experiment

In [1]:
import PxW
import pandas as pd
import arff
import os
import numpy as np
from os.path import dirname

RANDOM_STATE = 42

def filename(basename, step=1, prefix="", suffix="", extension="arff", check=True):
    
    filename = "-".join([x for x in (prefix, basename, suffix) if len(x) > 0])+".{}".format(extension)
    
    root_dir = dirname(os.getcwd())
    data_dir = os.path.relpath(os.path.join(root_dir, 'data'))
    step_dir = os.path.join(data_dir, "step-"+str(step).zfill(2))

    if check:
        if not os.path.exists(step_dir):
            os.makedirs(step_dir)
    

    return os.path.join(step_dir, filename)

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import f1_score

def f1_weka(out, average='macro'):
    out = out.apply(LabelEncoder().fit_transform)
    f1 = f1_score(out['actual'], out['predicted'], average=average)
    return f1

In [2]:
root_dir = dirname(os.getcwd())
data_dir = os.path.relpath(os.path.join(root_dir, 'data'))

step = 2 # Where final datasets reside.
step_dir = os.path.join(data_dir, "step-"+str(step).zfill(2))

datasets = ['iris']

In [3]:
res = []
for ds in datasets:
    
    # Train
    fn_train = filename(ds, step=1, suffix='train')
    clf = PxW.J48()
    clf.fit(fn_train, verbose=False)
    
    # Test
    fn_qry = [os.path.join(step_dir, fn) for fn in os.listdir(step_dir) if ds in fn]
    fn_qry.sort()
    
    for q_idx, fn in enumerate(fn_qry):
        print(q_idx)
        print(fn)
        out = clf.predict(fn, verbose=True)
        f1 = f1_weka(out, average='macro')
        res.append(f1)
    

[2019-08-28 10:26:40,630] INFO - prefect.FlowRunner | Beginning Flow run for 'fit'
[2019-08-28 10:26:40,632] INFO - prefect.FlowRunner | Starting flow run.
[2019-08-28 10:26:40,641] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-08-28 10:26:40,643] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-08-28 10:26:40,644] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-08-28 10:26:41,090] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-08-28 10:26:41,093] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-08-28 10:26:41,096] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-08-28 10:26:41,097] INFO - prefect.FlowRunner | Starting flow run.
[2019-08-28 10:26:41,102] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-08-28 10:26:41,103] INFO - pref

0
../data/step-02/iris-q_000.arff


[2019-08-28 10:26:41,321] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-08-28 10:26:41,322] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-08-28 10:26:41,341] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-08-28 10:26:41,342] INFO - prefect.FlowRunner | Starting flow run.
[2019-08-28 10:26:41,346] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-08-28 10:26:41,348] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-08-28 10:26:41,349] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...


1
../data/step-02/iris-q_001.arff


[2019-08-28 10:26:41,565] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-08-28 10:26:41,568] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
  'precision', 'predicted', average, warn_for)
[2019-08-28 10:26:41,580] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-08-28 10:26:41,581] INFO - prefect.FlowRunner | Starting flow run.
[2019-08-28 10:26:41,586] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-08-28 10:26:41,588] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-08-28 10:26:41,589] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...


2
../data/step-02/iris-q_002.arff


[2019-08-28 10:26:41,819] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-08-28 10:26:41,820] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
  'precision', 'predicted', average, warn_for)
[2019-08-28 10:26:41,834] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-08-28 10:26:41,835] INFO - prefect.FlowRunner | Starting flow run.
[2019-08-28 10:26:41,842] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-08-28 10:26:41,845] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-08-28 10:26:41,846] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...


3
../data/step-02/iris-q_003.arff


[2019-08-28 10:26:42,078] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-08-28 10:26:42,079] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
  'precision', 'predicted', average, warn_for)


In [4]:
res

[0.9665831244778613,
 0.2501414827391058,
 0.2501414827391058,
 0.2501414827391058]