In [None]:
# NOTE: To save the LF sets, make sure to select "Widgets -> Save Notebook Widget State"

# Example Wɪᴛᴀɴ Labelling Functions

The following Labelling Function (LF) sets were generated by Wɪᴛᴀɴ after 25 iterations. Coloured LFs are those that are selected as "useful" for identifying a particular class by the simulated user (which selects LFs having accuracy at least 20% above the random chance of $\frac{1}{\text{class count}}$).

Each LF matches instances containing *any* of the words listed for that LF (i.e. the `/` can be thought of as an `OR` operator). LFs indented beneath another LF are only applied to instances matching the parent LF (i.e. the indentation can be thought of as an `AND` operator).

The reported **coverage** is the proportion of instances in the training set matched by that LF, while the **accuracy** is the proportion of covered instances having the assigned majority class.

In [None]:
from witan_experiments.evaluation.taxonomy_browser import (witan_rule_browser,
                                                           witan_rule_latex,
                                                           BrowserArgs)
from witan_experiments import run_experiments
from witan_experiments.config import prepare_experiment_configs
from witan_experiments.rule_seeders import BlankRS
from witan_experiments.rule_generators import WitanRG
from witan_experiments.labellers import SnorkelLblr
from witan_experiments.models import AnnClf

In [None]:
ruleset_generator = WitanRG()
dataset_names = [
    'imdb',
    'bias_pa',
    'twentynews',
]

configs = prepare_experiment_configs(
    dataset_name=dataset_names,
    rule_seeder=[BlankRS()],
    rngseed=[1],
    ruleset_generator=[ruleset_generator],
    interaction_count=[10],
    labeller=[SnorkelLblr()],
    classifier=[AnnClf()],
)

In [None]:
results = run_experiments(
    configs,
    default_workers=2,
    rule_workers=1,
    continue_on_failure=False,
)

In [None]:
browser_args = {dataset_name: BrowserArgs(dataset_name) for dataset_name in dataset_names}
dataset_results = {config.dataset_name: result for config, result in results.items()}

## IMDb Reviews

In [None]:
dataset = 'imdb'
witan_rule_browser(dataset_results[dataset], browser_args[dataset])

## Bias Bios: Painter vs Architect

In [None]:
dataset = 'bias_pa'
witan_rule_browser(dataset_results[dataset], browser_args[dataset])

## 20Newsgroups Topics

In [None]:
dataset = 'twentynews'
witan_rule_browser(dataset_results[dataset], browser_args[dataset])

## LaTeX Output

In [None]:
for dataset in dataset_names:
    print(dataset)
    print(witan_rule_latex(dataset_results[dataset], browser_args[dataset]))
    print('')