# Weka Experiment

The definitive notebook.

# Preliminaries

## Imports

In [1]:
import pandas as pd
import arff
import os
import numpy as np
import aaai20
import PxW

from os.path import dirname
from aaai20.io import filename_dataset, filename_query
from aaai20.exp import collect_results, process_outcomes, save_outcome
from sklearn.model_selection import train_test_split
from modulo.utils.encoding import query_to_code, code_to_query, encode_attribute

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score

RANDOM_STATE = 42

## Helpers

In [2]:
def f1_weka(out, average='macro'):
    # Each label gets encoded.
    le = LabelEncoder().fit(out.values.reshape(-1,))
    
    out = out.apply(le.transform)
    f1 = f1_score(out['actual'], out['predicted'], average=average)
    return f1

## Functions

In [3]:
def fit_weka(ds):
    # Preliminaries
    fn_train = filename_dataset(ds, step=1, suffix='train')
    
    # Train
    clf = PxW.J48()
    clf.fit(fn_train, verbose=False)
    return clf

In [4]:
def predict_weka(dataset, classifier, f1_average='macro'):
    result = []
    
    # Get queries
    fn_qry = filename_query(dataset, suffix="default")
    q_codes = np.load(fn_qry)
    
    for q_idx, q_code in enumerate(q_codes):
        fn = filename_dataset(dataset, step=2, suffix='q_{}'.format(str(q_idx).zfill(3)))
        
        out = clf.predict(fn, verbose=True)
        
        #print(ds)
        #print(out.head())
        
        f1 = f1_weka(out, average=f1_average)
        result.append(f1)
    
    return q_codes, result

# Actual Flow

In [5]:
datasets = ['glass',
             'credit-g',
             'ionosphere',
             'lymph',
             'vehicle',
             'iris',
             'splice',
             'sonar',
             'vowel',
             'segment',
             'zoo',
             'heart-statlog',
             'waveform-5000',
             'kr-vs-kp',
             'diabetes',
             'letter',
             'balance-scale']

print(len(datasets))

dataframes = {k:[] for k in datasets}

for ds in datasets:
    print(ds)
    clf = fit_weka(ds)
    
    q_codes, results = predict_weka(ds, clf, f1_average='macro')

    dataframes[ds] = collect_results(ds, q_codes, results, algorithm='weka')
    
df = process_outcomes(dataframes)
save_outcome(df, filename='weka')
   

[2019-09-04 13:25:54,546] INFO - prefect.FlowRunner | Beginning Flow run for 'fit'
[2019-09-04 13:25:54,548] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:25:54,554] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:25:54,556] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:25:54,557] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...


17
glass


[2019-09-04 13:25:55,316] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:25:55,320] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:25:55,324] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:25:55,325] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:25:55,329] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:25:55,331] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:25:55,332] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:25:55,566] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:25:55,567] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
  'precision', 'predicted', average, warn_for)
  'recall', 't

credit-g


[2019-09-04 13:25:58,446] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:25:58,447] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:25:58,450] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:25:58,452] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:25:58,458] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:25:58,460] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:25:58,461] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:25:58,743] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:25:58,744] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:25:58,754] INFO - prefect.FlowRunner | Beginni

ionosphere


[2019-09-04 13:26:02,008] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:02,009] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:02,013] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:02,014] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:02,020] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:02,022] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:02,023] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:02,262] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:02,263] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:02,273] INFO - prefect.FlowRunner | Beginni

lymph


[2019-09-04 13:26:05,176] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:05,177] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:05,183] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:05,185] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:05,198] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:05,203] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:05,205] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:05,458] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:05,460] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
  'recall', 'true', average, warn_for)
[2019-09-04 13:26:05,4

vehicle


[2019-09-04 13:26:08,712] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:08,713] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:08,718] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:08,719] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:08,725] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:08,728] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:08,729] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:09,057] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:09,058] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:09,070] INFO - prefect.FlowRunner | Beginni

iris


[2019-09-04 13:26:12,330] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:12,331] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:12,335] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:12,337] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:12,342] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:12,344] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:12,346] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:12,566] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:12,568] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:12,580] INFO - prefect.FlowRunner | Beginni

splice


[2019-09-04 13:26:14,838] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:14,839] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:14,844] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:14,845] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:14,852] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:14,854] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:14,856] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:15,528] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:15,529] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:15,545] INFO - prefect.FlowRunner | Beginni

sonar


[2019-09-04 13:26:20,457] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:20,458] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:20,461] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:20,463] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:20,468] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:20,470] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:20,471] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:20,704] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:20,705] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:20,714] INFO - prefect.FlowRunner | Beginni

vowel


[2019-09-04 13:26:23,829] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:23,830] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:23,834] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:23,835] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:23,845] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:23,846] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:23,847] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:24,107] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:24,108] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:24,119] INFO - prefect.FlowRunner | Beginni

segment


[2019-09-04 13:26:27,611] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:27,612] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:27,616] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:27,617] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:27,620] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:27,623] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:27,624] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:27,917] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:27,918] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:27,929] INFO - prefect.FlowRunner | Beginni

zoo


[2019-09-04 13:26:31,318] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:31,319] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:31,323] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:31,325] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:31,332] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:31,333] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:31,334] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:31,586] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:31,587] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
  'precision', 'predicted', average, warn_for)
[2019-09-04 13

heart-statlog


[2019-09-04 13:26:34,453] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:34,454] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:34,458] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:34,459] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:34,463] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:34,465] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:34,465] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:34,691] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:34,693] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:34,702] INFO - prefect.FlowRunner | Beginni

waveform-5000


[2019-09-04 13:26:38,426] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:38,427] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:38,431] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:38,432] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:38,439] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:38,440] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:38,441] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:38,842] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:38,844] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:38,857] INFO - prefect.FlowRunner | Beginni

kr-vs-kp


[2019-09-04 13:26:43,242] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:43,244] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:43,248] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:43,249] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:43,253] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:43,255] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:43,256] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:43,569] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:43,571] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:43,581] INFO - prefect.FlowRunner | Beginni

diabetes


[2019-09-04 13:26:47,180] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:47,181] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:47,185] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:47,186] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:47,193] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:47,194] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:47,196] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:47,483] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:47,484] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:47,494] INFO - prefect.FlowRunner | Beginni

letter


[2019-09-04 13:26:52,315] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:52,316] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:52,320] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:26:52,322] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:26:52,326] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:26:52,327] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:26:52,328] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:26:52,899] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:26:52,901] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:26:52,916] INFO - prefect.FlowRunner | Beginni

balance-scale


[2019-09-04 13:27:07,828] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:27:07,829] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:27:07,833] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-04 13:27:07,835] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-04 13:27:07,845] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-04 13:27:07,846] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-04 13:27:07,847] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-04 13:27:08,092] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-04 13:27:08,093] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-04 13:27:08,103] INFO - prefect.FlowRunner | Beginni