# Weka Experiment

The definitive notebook.

# Preliminaries

## Imports

In [1]:
import pandas as pd
import arff
import os
import numpy as np
import aaai20
import PxW

from os.path import dirname
from aaai20.io import filename_dataset, filename_query
from aaai20.exp import collect_results, process_outcomes, save_outcome
from sklearn.model_selection import train_test_split
from modulo.utils.encoding import query_to_code, code_to_query, encode_attribute

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score

RANDOM_STATE = 42

## Helpers

In [2]:
def f1_weka(out, average='macro'):
    # Each label gets encoded.
    le = LabelEncoder().fit(out.values.reshape(-1,))
    
    out = out.apply(le.transform)
    f1 = f1_score(out['actual'], out['predicted'], average=average)
    return f1

## Functions

In [3]:
def fit_weka(ds):
    # Preliminaries
    fn_train = filename_dataset(ds, step=1, suffix='train')
    
    # Train
    clf = PxW.J48()
    clf.fit(fn_train, verbose=False)
    return clf

In [4]:
def predict_weka(dataset, classifier, f1_average='macro'):
    result = []
    
    # Get queries
    fn_qry = filename_query(dataset, suffix="default")
    q_codes = np.load(fn_qry)
    
    for q_idx, q_code in enumerate(q_codes):
        fn = filename_dataset(dataset, step=2, suffix='q_{}'.format(str(q_idx).zfill(3)))
        
        out = clf.predict(fn, verbose=True)
        
        #print(ds)
        #print(out.head())
        
        f1 = f1_weka(out, average=f1_average)
        result.append(f1)
    
    return q_codes, result

# Actual Flow

In [5]:
datasets = ['glass',
             'credit-g',
             'ionosphere',
             'lymph',
             'vehicle',
             'iris',
             'splice',
             'sonar',
             'vowel',
             'segment',
             'zoo',
             'heart-statlog',
             'waveform-5000',
             'kr-vs-kp',
             'diabetes',
             'letter',
             'balance-scale']

print(len(datasets))

dataframes = {k:[] for k in datasets}

for ds in datasets:
    print(ds)
    clf = fit_weka(ds)
    
    q_codes, results = predict_weka(ds, clf, f1_average='micro')

    dataframes[ds] = collect_results(ds, q_codes, results, algorithm='weka')
    
df = process_outcomes(dataframes)
save_outcome(df, filename='weka')
   

[2019-10-04 08:09:53,376] INFO - prefect.FlowRunner | Beginning Flow run for 'fit'
[2019-10-04 08:09:53,377] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:09:53,384] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:09:53,385] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:09:53,386] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...


17
glass


[2019-10-04 08:09:55,015] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:09:55,017] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:09:55,027] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:09:55,028] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:09:55,031] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:09:55,032] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:09:55,033] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:09:55,239] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:09:55,240] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:09:55,390] INFO - prefect.FlowRunner | Beginni

credit-g


[2019-10-04 08:09:57,480] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:09:57,480] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:09:57,483] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:09:57,483] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:09:57,486] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:09:57,487] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:09:57,487] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:09:57,693] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:09:57,694] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:09:57,700] INFO - prefect.FlowRunner | Beginni

ionosphere


[2019-10-04 08:10:00,103] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:00,104] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:00,106] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:00,107] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:00,109] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:00,110] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:00,111] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:00,307] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:00,308] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:00,314] INFO - prefect.FlowRunner | Beginni

lymph


[2019-10-04 08:10:02,549] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:02,550] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:02,552] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:02,553] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:02,555] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:02,556] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:02,557] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:02,748] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:02,750] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:02,756] INFO - prefect.FlowRunner | Beginni

vehicle


[2019-10-04 08:10:04,958] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:04,959] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:04,962] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:04,962] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:04,965] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:04,966] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:04,966] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:05,168] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:05,170] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:05,176] INFO - prefect.FlowRunner | Beginni

iris


[2019-10-04 08:10:07,519] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:07,520] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:07,522] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:07,523] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:07,525] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:07,526] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:07,527] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:07,719] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:07,720] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:07,727] INFO - prefect.FlowRunner | Beginni

splice


[2019-10-04 08:10:09,248] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:09,249] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:09,251] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:09,252] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:09,256] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:09,257] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:09,259] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:09,527] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:09,529] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:09,536] INFO - prefect.FlowRunner | Beginni

sonar


[2019-10-04 08:10:12,533] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:12,534] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:12,536] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:12,536] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:12,539] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:12,540] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:12,541] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:12,739] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:12,740] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:12,746] INFO - prefect.FlowRunner | Beginni

vowel


[2019-10-04 08:10:15,053] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:15,054] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:15,056] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:15,056] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:15,059] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:15,060] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:15,061] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:15,266] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:15,267] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:15,274] INFO - prefect.FlowRunner | Beginni

segment


[2019-10-04 08:10:17,807] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:17,808] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:17,810] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:17,811] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:17,813] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:17,814] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:17,815] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:18,039] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:18,040] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:18,047] INFO - prefect.FlowRunner | Beginni

zoo


[2019-10-04 08:10:20,480] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:20,481] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:20,483] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:20,483] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:20,486] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:20,487] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:20,487] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:20,672] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:20,674] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:20,686] INFO - prefect.FlowRunner | Beginni

heart-statlog


[2019-10-04 08:10:22,863] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:22,864] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:22,866] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:22,867] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:22,870] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:22,871] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:22,871] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:23,062] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:23,063] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:23,069] INFO - prefect.FlowRunner | Beginni

waveform-5000


[2019-10-04 08:10:25,720] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:25,721] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:25,723] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:25,724] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:25,729] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:25,730] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:25,731] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:25,987] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:25,988] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:25,995] INFO - prefect.FlowRunner | Beginni

kr-vs-kp


[2019-10-04 08:10:29,016] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:29,017] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:29,019] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:29,020] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:29,022] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:29,023] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:29,024] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:29,247] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:29,248] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:29,255] INFO - prefect.FlowRunner | Beginni

diabetes


[2019-10-04 08:10:31,759] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:31,760] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:31,762] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:31,763] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:31,766] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:31,767] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:31,768] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:31,957] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:31,958] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:31,965] INFO - prefect.FlowRunner | Beginni

letter


[2019-10-04 08:10:34,892] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:34,893] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:34,895] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:34,896] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:34,899] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:34,900] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:34,901] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:35,216] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:35,217] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:35,228] INFO - prefect.FlowRunner | Beginni

balance-scale


[2019-10-04 08:10:48,313] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:48,314] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:48,316] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-10-04 08:10:48,317] INFO - prefect.FlowRunner | Starting flow run.
[2019-10-04 08:10:48,319] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-10-04 08:10:48,320] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-10-04 08:10:48,321] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-10-04 08:10:48,516] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-10-04 08:10:48,517] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-10-04 08:10:48,523] INFO - prefect.FlowRunner | Beginni