# PxS on StarAI

Getting BayesFusion to work. It should.

# Preliminaries

## Imports

In [17]:
import dill as pkl
import pandas as pd
import os
import numpy as np

import aaai20

from os.path import dirname
from aaai20.io import (
    filename_dataset,
    filename_query,
    filename_model,
    filename_results,
    build_code_string,
    experiment_suffix,
)
from aaai20.exp import collect_results, process_outcomes, save_outcome

from pxs.core.PxS import PxS

from sklearn.metrics import f1_score

RANDOM_STATE = 42
from mercs.utils.encoding import query_to_code, code_to_query, encode_attribute, get_att

# Functions

## Fit

In [2]:
fit_config = dict()

In [3]:
def fit_pxs(dataset, model_keyword="default", **fit_config):

    # Load data
    suffix = "train-pxs"
    fn_train = filename_dataset(dataset, step=2, suffix=suffix, extension="csv")

    # Train
    suffix = "pxs-{}".format(model_keyword)
    fn_mod = filename_model(dataset, suffix=suffix, extension="xdsl")

    clf = PxS()
    clf.fit(fn_train, model_fname=fn_mod, **fit_config)

    return clf

In [4]:
def save_pxs(dataset, classifier, keyword="default"):

    suffix = "pxs-{}".format(keyword)
    fn_mod = filename_model(dataset, suffix=suffix)

    with open(fn_mod, "wb") as f:
        pkl.dump(classifier, f)
    return


def load_pxs(dataset, keyword="default"):
    suffix = "pxs-{}".format(keyword)
    fn_mod = filename_model(dataset, suffix=suffix)

    with open(fn_mod, "rb") as f:
        clf = pkl.load(f)
    return clf

### Tests

In [5]:
ds = "nltcs"
clf = fit_pxs(ds)

In [6]:
save_pxs(ds, clf, keyword="default")

In [7]:
clf = load_pxs(ds,keyword="default")

## Predict

In [30]:
predict_config = dict(prediction_algorithm="mi")


def predict_pxs(dataset, classifier, q_idx=None, **predict_config):
    # Init
    f1_micro = []
    f1_macro = []
    q_codes_return = []
    q_idx_return = []

    default_predict_config = dict(inference_algorithm="EPIS_sampling")
    predict_config = {**default_predict_config, **predict_config}

    # Load queries
    fn_qry = filename_query(dataset, suffix="default")
    q_codes = np.load(fn_qry)

    # Load data
    suffix = "test-pxs"
    fn_test = filename_dataset(dataset, step=2, suffix=suffix, extension="csv")
    df = pd.read_csv(fn_test, index_col=None)
    test = df.values
    test = test.astype(float)

    # Filter for query_idx
    include = {
        type(None): lambda x: True,
        list: lambda x: x in q_idx,
        int: lambda x: x == q_idx,
    }
    ok = include[type(q_idx)]
    
    for query_idx, q_code in enumerate(q_codes):
        if ok(query_idx):
            q_idx_return.append(query_idx)
            q_codes_return.append(q_code)
            
            q_f1_micro, q_f1_macro = single_query_predict(
                classifier, test, fn_test, query_idx, q_code, **predict_config
            )
            f1_micro.append(q_f1_micro)
            f1_macro.append(q_f1_macro)

    q_codes_return = np.vstack(q_codes_return)
    results = dict(f1_micro=f1_micro, f1_macro=f1_macro)

    return q_idx_return, q_codes_return, results


def single_query_predict(classifier, test, fn_test, q_idx, q_code, **predict_config):
    query_config = dict(
        miss_idx=get_att(q_code, kind="miss").tolist(),
        targ_idx=get_att(q_code, kind="targ").tolist(),
        q_idx=q_idx,
    )

    y_true = test[:, query_config["targ_idx"]]
    y_pred = classifier.predict(fn_test, **query_config, **predict_config)

    f1_micro = f1_score(y_true, y_pred, average="micro")
    f1_macro = f1_score(y_true, y_pred, average="macro")
    return f1_micro, f1_macro

### Test

In [9]:
dataset = "nltcs"
q_idx, q_codes, results = predict_pxs(ds, clf, query_idx=[3,4], inference_algorithm="lauritzen")

{'inference_algorithm': 'lauritzen'}
{'inference_algorithm': 'lauritzen'}


In [10]:
df = collect_results(ds, q_codes, results, q_idx=q_idx, identifier="pxs", exp_idx=1)

In [11]:
df

Unnamed: 0,missing_percentage,difficulty,q_idx,f1_micro,f1_macro,identifier,dataset,exp_idx
0,0.25,3,3,0.894005,0.754598,pxs,nltcs,1
1,0.375,4,4,0.894005,0.754598,pxs,nltcs,1


In [12]:
fn_results = filename_results("test", exp_idx=0)

In [13]:
fn_results

'../../out/exp-0000/results/test.csv'

# Flows

## Model-Building

In [14]:
def flow_build_pxs_model(dataset, model_keyword="default", **fit_config):
    
    # Build model
    model = fit_pxs(dataset, **fit_config)
    
    # Save model
    save_pxs(dataset, model, keyword=model_keyword)

    return

### Test

In [15]:
flow_build_pxs_model("nltcs", model_keyword="default")

## Predictions

In [38]:
def flow_pxs_predict(
    dataset,
    exp_idx=0,
    qry_idx=None,
    exp_keyword="pxs",
    model_keyword="default",
    **predict_config
):
    # Load model
    clf = load_pxs(dataset, keyword=model_keyword)

    # Do predictions
    q_idx_return, q_codes_return, results = predict_pxs(
        ds, clf, q_idx=qry_idx, **predict_config
    )

    # Get and save results
    df = collect_results(
        ds,
        q_codes_return,
        results,
        q_idx=q_idx_return,
        identifier=exp_keyword,
        exp_idx=exp_idx,
    )
    
    suffix = experiment_suffix(qry_idx)
    fn_res = filename_results(exp_identifier, suffix=suffix, exp_idx=exp_idx)

    df.to_csv(fn_res)

    return

In [40]:
options = {
    None: 'a',
    3: 'b'
}

In [43]:
options[None]

'a'

In [34]:
flow_pxs_predict("nltcs", exp_idx=0, q_idx=[2,3,4], exp_identifier="pxs", model_keyword="default", **predict_config)

{'inference_algorithm': 'EPIS_sampling', 'prediction_algorithm': 'mi'}
{'inference_algorithm': 'EPIS_sampling', 'prediction_algorithm': 'mi'}
{'inference_algorithm': 'EPIS_sampling', 'prediction_algorithm': 'mi'}


In [22]:
d = dict(a=1)

In [23]:
if d:
    print('yeah')

yeah
