# Weka Experiment

The definitive notebook.

# Preliminaries

## Imports

In [1]:
import pandas as pd
import arff
import os
import numpy as np
import aaai20
import PxW

from os.path import dirname
from aaai20.io import filename_dataset, filename_query
from aaai20.exp import collect_results, process_outcomes, save_outcome
from sklearn.model_selection import train_test_split
from modulo.utils.encoding import query_to_code, code_to_query, encode_attribute

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score

RANDOM_STATE = 42

## Helpers

In [2]:
def f1_weka(out, average='macro'):
    # Each label gets encoded.
    le = LabelEncoder().fit(out.values.reshape(-1,))
    
    out = out.apply(le.transform)
    f1 = f1_score(out['actual'], out['predicted'], average=average)
    return f1

## Functions

In [3]:
def fit_weka(ds):
    # Preliminaries
    fn_train = filename_dataset(ds, step=1, suffix='train')
    
    # Train
    clf = PxW.J48()
    clf.fit(fn_train, verbose=False)
    return clf

In [4]:
def predict_weka(dataset, classifier, f1_average='macro'):
    result = []
    
    # Get queries
    fn_qry = filename_query(dataset, suffix="default")
    q_codes = np.load(fn_qry)
    
    for q_idx, q_code in enumerate(q_codes):
        fn = filename_dataset(dataset, step=2, suffix='q_{}'.format(str(q_idx).zfill(3)))
        
        out = clf.predict(fn, verbose=True)
        
        #print(ds)
        #print(out.head())
        
        f1 = f1_weka(out, average=f1_average)
        result.append(f1)
    
    return q_codes, result

# Actual Flow

In [5]:
datasets = ['glass',
             'credit-g',
             'ionosphere',
             'lymph',
             'vehicle',
             'iris',
             'splice',
             'sonar',
             'vowel',
             'segment',
             'zoo',
             'heart-statlog',
             'waveform-5000',
             'kr-vs-kp',
             'diabetes',
             'letter',
             'balance-scale']

print(len(datasets))

dataframes = {k:[] for k in datasets}

for ds in datasets:
    print(ds)
    clf = fit_weka(ds)
    
    q_codes, results = predict_weka(ds, clf, f1_average='micro')

    dataframes[ds] = collect_results(ds, q_codes, results, algorithm='weka')
    
df = process_outcomes(dataframes)
save_outcome(df, filename='weka')
   

[2019-09-26 08:20:17,779] INFO - prefect.FlowRunner | Beginning Flow run for 'fit'
[2019-09-26 08:20:17,780] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:17,783] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:17,784] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:17,785] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...


17
glass


[2019-09-26 08:20:19,323] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:19,327] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:19,352] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:19,353] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:19,357] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:19,359] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:19,359] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:19,565] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:19,566] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:19,684] INFO - prefect.FlowRunner | Beginni

credit-g


[2019-09-26 08:20:21,855] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:21,856] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:21,859] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:21,860] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:21,863] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:21,864] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:21,865] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:22,069] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:22,070] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:22,077] INFO - prefect.FlowRunner | Beginni

ionosphere


[2019-09-26 08:20:24,527] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:24,528] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:24,546] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:24,546] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:24,549] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:24,550] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:24,550] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:24,780] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:24,781] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:24,789] INFO - prefect.FlowRunner | Beginni

lymph


[2019-09-26 08:20:27,022] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:27,022] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:27,025] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:27,026] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:27,030] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:27,031] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:27,032] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:27,221] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:27,222] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:27,229] INFO - prefect.FlowRunner | Beginni

vehicle


[2019-09-26 08:20:29,499] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:29,500] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:29,503] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:29,504] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:29,506] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:29,507] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:29,508] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:29,713] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:29,716] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:29,730] INFO - prefect.FlowRunner | Beginni

iris


[2019-09-26 08:20:32,106] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:32,107] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:32,109] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:32,110] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:32,113] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:32,114] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:32,114] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:32,300] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:32,302] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:32,309] INFO - prefect.FlowRunner | Beginni

splice


[2019-09-26 08:20:33,488] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:33,489] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:33,492] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:33,493] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:33,495] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:33,496] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:33,496] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:33,775] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:33,776] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:33,783] INFO - prefect.FlowRunner | Beginni

sonar


[2019-09-26 08:20:36,854] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:36,855] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:36,858] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:36,859] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:36,862] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:36,863] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:36,864] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:37,103] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:37,104] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:37,110] INFO - prefect.FlowRunner | Beginni

vowel


[2019-09-26 08:20:39,433] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:39,434] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:39,436] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:39,437] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:39,439] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:39,440] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:39,441] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:39,653] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:39,654] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:39,660] INFO - prefect.FlowRunner | Beginni

segment


[2019-09-26 08:20:42,247] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:42,248] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:42,251] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:42,251] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:42,254] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:42,255] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:42,255] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:42,502] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:42,503] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:42,510] INFO - prefect.FlowRunner | Beginni

zoo


[2019-09-26 08:20:45,189] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:45,190] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:45,202] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:45,203] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:45,208] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:45,211] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:45,212] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:45,431] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:45,433] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:45,439] INFO - prefect.FlowRunner | Beginni

heart-statlog


[2019-09-26 08:20:47,687] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:47,687] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:47,690] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:47,690] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:47,693] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:47,694] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:47,694] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:47,881] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:47,882] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:47,889] INFO - prefect.FlowRunner | Beginni

waveform-5000


[2019-09-26 08:20:50,656] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:50,657] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:50,684] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:50,684] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:50,687] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:50,688] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:50,689] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:50,969] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:50,970] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:50,977] INFO - prefect.FlowRunner | Beginni

kr-vs-kp


[2019-09-26 08:20:54,409] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:54,409] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:54,412] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:54,413] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:54,415] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:54,416] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:54,417] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:54,680] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:54,681] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:54,691] INFO - prefect.FlowRunner | Beginni

diabetes


[2019-09-26 08:20:57,458] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:57,459] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:57,462] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:20:57,462] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:20:57,465] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:20:57,466] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:20:57,467] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:20:57,712] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:20:57,713] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:20:57,720] INFO - prefect.FlowRunner | Beginni

letter


[2019-09-26 08:21:01,194] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:21:01,195] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:21:01,197] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:21:01,198] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:21:01,201] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:21:01,202] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:21:01,203] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:21:01,574] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:21:01,575] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:21:01,590] INFO - prefect.FlowRunner | Beginni

balance-scale


[2019-09-26 08:21:15,050] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:21:15,051] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:21:15,054] INFO - prefect.FlowRunner | Beginning Flow run for 'predict'
[2019-09-26 08:21:15,055] INFO - prefect.FlowRunner | Starting flow run.
[2019-09-26 08:21:15,059] INFO - prefect.TaskRunner | Task 'Constant[str]': Starting task run...
[2019-09-26 08:21:15,060] INFO - prefect.TaskRunner | Task 'Constant[str]': finished task run for task with final state: 'Success'
[2019-09-26 08:21:15,061] INFO - prefect.TaskRunner | Task 'ShellTask': Starting task run...
[2019-09-26 08:21:15,282] INFO - prefect.TaskRunner | Task 'ShellTask': finished task run for task with final state: 'Success'
[2019-09-26 08:21:15,283] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
[2019-09-26 08:21:15,292] INFO - prefect.FlowRunner | Beginni