In [7]:
from active_learning.learner.standard import Learner, get_classifier
from active_learning.weak_supervision.SelfTraining import SelfTraining
import argparse
import numpy as np
import copy
import pandas as pd
import random
from sklearn.metrics import accuracy_score, f1_score
from timeit import default_timer as timer
from typing import List
from active_learning.config import get_active_config
from active_learning.dataStorage import DataStorage
from active_learning.datasets import load_synthetic
from active_learning.logger import init_logger
from active_learning.merge_weak_supervision_label_strategies.MajorityVoteLabelMergeStrategy import (
    MajorityVoteLabelMergeStrategy,
)
from collections import Counter

from active_learning.weak_supervision import SyntheticLabelingFunctions
from active_learning.weak_supervision.BaseWeakSupervision import BaseWeakSupervision

config = argparse.Namespace()
config.AMOUNT_OF_FEATURES = 1
config.RANDOM_SEED = 0
config.AMOUNT_OF_SYNTHETIC_LABELLING_FUNCTIONS = 20
config.OUTPUT_PATH = "tmp"
config.LOG_FILE = "tmp.log"
config.TEST_FRACTION = 0.5

# -2 means that a true random seed is used, all other numbers use the provided CLI argument random_seed
if config.RANDOM_SEED == -2:
    random_but_not_random = True
else:
    random_but_not_random = False


init_logger(config.LOG_FILE)

if random_but_not_random:
    config.RANDOM_SEED = random.randint(0, 2147483647)
    np.random.seed(config.RANDOM_SEED)
    random.seed(config.RANDOM_SEED)


In [38]:
def evaluate_and_print_prediction(Y_pred, Y_true, title):
    acc = accuracy_score(Y_true, Y_pred)
    f1 = f1_score(Y_true, Y_pred, average="weighted")
    c = Counter(Y_pred)

    return [
        title,
        acc,
        f1,
        c.most_common(1)[0][0],
        c.most_common(1)[0][1] / len(Y_pred),
    ]


def train_and_evaluate(title, original_data_storage, WEIGHTS=0, WS=True):
    data_storage = copy.deepcopy(original_data_storage)
    learner = get_classifier("RF", random_state=config.RANDOM_SEED)
    data_storage.generate_weak_labels(learner)

    if WEIGHTS != 0:
        weights = []
        for indice in data_storage.weakly_combined_mask:
            if indice in data_storage.labeled_mask:
                weights.append(WEIGHTS)
            else:
                weights.append(1)
    else:
        weights = None
    if WS:
        mask = data_storage.weakly_combined_mask
    else:
        mask = data_storage.labeled_mask

    learner.fit(
        data_storage.X[mask],
        data_storage.Y_merged_final[mask],
        sample_weight=weights,  # type: ignore
    )
    Y_pred = learner.predict(data_storage.X[data_storage.test_mask])

    Y_true = data_storage.exp_Y[data_storage.test_mask]

    return evaluate_and_print_prediction(Y_pred, Y_true, title)


def test_one_labeled_set(original_data_storage, label_strategy="random", param=5):
    data_storage = copy.deepcopy(original_data_storage)

    if label_strategy == "random":
        random_sample_ids = np.random.choice(
            data_storage.unlabeled_mask,
            size=param,
            replace=False,
        )

        data_storage.label_samples(
            random_sample_ids, data_storage.exp_Y[random_sample_ids], "AL"
        )

    return [
        train_and_evaluate("RF No WS", data_storage, WS=False)
        + [label_strategy, param],
        train_and_evaluate("RF No Weights", data_storage) + [label_strategy, param],
        train_and_evaluate("RF Weights 10", data_storage, WEIGHTS=10)
        + [label_strategy, param],
        train_and_evaluate("RF Weights 50", data_storage, WEIGHTS=50)
        + [label_strategy, param],
        train_and_evaluate("RF Weights 100", data_storage, WEIGHTS=100)
        + [label_strategy, param],
        train_and_evaluate("RF Weights 1000", data_storage, WEIGHTS=1000)
        + [label_strategy, param],
    ]


Problem: many WS labels conceal even the best AL labels (which are in contrast waaaaay less)

# LF When applied individually

In [39]:
df, synthetic_creation_args = load_synthetic(
    config.RANDOM_SEED,
)

data_storage: DataStorage = DataStorage(df=df, TEST_FRACTION=config.TEST_FRACTION)
learner = get_classifier("RF", random_state=config.RANDOM_SEED)

learner.fit(
    data_storage.X[data_storage.labeled_mask],
    data_storage.Y_merged_final[data_storage.labeled_mask],
)


ws_list: List[BaseWeakSupervision] = [
    SyntheticLabelingFunctions(X=data_storage.X, Y=data_storage.exp_Y)
    for _ in range(0, config.AMOUNT_OF_SYNTHETIC_LABELLING_FUNCTIONS)
]  # type: ignore


# tweak to do more than one iteration of self training!
""" ws_list.append(SelfTraining(0.99, 0.99))
ws_list.append(SelfTraining(0.9, 0.9))
ws_list.append(SelfTraining(0.8, 0.8))
ws_list.append(SelfTraining(0.7, 0.7)) """

# add label propagation

"""print(data_storage.test_mask)
print(data_storage.unlabeled_mask)
print(data_storage.labeled_mask)
print(len(data_storage.X))
"""

results = []
import warnings
warnings.filterwarnings("ignore")
for ws in ws_list:
    # calculate f1 and acc for ws on test AND train dataset
    # it actually only get's computed on the test mask, not the train mask itself
    Y_pred = ws.get_labels(data_storage.test_mask, data_storage, learner)

    results.append(
        evaluate_and_print_prediction(
            data_storage.exp_Y[data_storage.test_mask], Y_pred, ws.identifier
        )
    )

pd.DataFrame(results, columns=["title", "Acc", "F1", "MC", "MC%"])

Unnamed: 0,title,Acc,F1,MC,MC%
0,"L_lr #3: [3, 11, 7]",0.305944,0.344939,2,0.36014
1,L_lr #1: [4],0.358392,0.526503,2,0.36014
2,"L_dt #3: [8, 0, 14]",0.473776,0.491909,2,0.36014
3,"L_dt #7: [16, 9, 10, 6, 12, 18, 1]",0.604895,0.616009,2,0.36014
4,"L_lr #7: [14, 1, 6, 5, 7, 16, 18]",0.40035,0.42317,2,0.36014
5,"L_knn #3: [9, 7, 13]",0.442308,0.447483,2,0.36014
6,"L_knn #6: [5, 6, 13, 2, 16, 20]",0.365385,0.33511,2,0.36014
7,"L_lr #4: [18, 16, 17, 14]",0.300699,0.336522,2,0.36014
8,"L_dt #4: [3, 1, 4, 5]",0.482517,0.474695,2,0.36014
9,"L_dt #4: [13, 2, 19, 9]",0.482517,0.474695,2,0.36014


# Combine all LFs with Majority Vote, no classifier

In [40]:
data_storage.set_weak_supervisions(ws_list, MajorityVoteLabelMergeStrategy())
data_storage.generate_weak_labels(learner, mask=data_storage.test_mask)

# Only Majority Vote, no classifier
results = [
    evaluate_and_print_prediction(
        data_storage.Y_merged_final[data_storage.test_mask],
        data_storage.exp_Y[data_storage.test_mask],
        "Majority Vote",
    )
]
pd.DataFrame(results, columns=["title", "Acc", "F1", "MC", "MC%"])

Unnamed: 0,title,Acc,F1,MC,MC%
0,Majority Vote,0.527972,0.501197,2.0,0.473776


# LFs with majority Vote + some random samples (potentially from AL)

In [43]:
'''
exit(-1)
test_one_labeled_set(data_storage, label_strategy="random", param=5)
exit(-1)
test_one_labeled_set(data_storage, label_strategy="random", param=10)
test_one_labeled_set(data_storage, label_strategy="random", param=25)
test_one_labeled_set(data_storage, label_strategy="random", param=50)
test_one_labeled_set(data_storage, label_strategy="random", param=100)
test_one_labeled_set(data_storage, label_strategy="random", param=200)
'''

result:List = test_one_labeled_set(data_storage, label_strategy="start_set", param=0)
result += test_one_labeled_set(data_storage, label_strategy="random", param=5)
test_one_labeled_set(data_storage, label_strategy="random", param=10)
test_one_labeled_set(data_storage, label_strategy="random", param=25)
test_one_labeled_set(data_storage, label_strategy="random", param=50)
test_one_labeled_set(data_storage, label_strategy="random", param=100)
test_one_labeled_set(data_storage, label_strategy="random", param=200)
random_df = pd.DataFrame(result, columns=["title", "Acc", "F1", "MC", "MC%", "label_strategy", "#randomly labeled"])
random_df

In [None]:
# -> how to combine AL and WS labels in a way, that the experiment actually benefits from the labels

# compute the 50/100/200/500 worst wrongly classified samples -> classify them correctly (aka. fake active learning) -> is there really room for improvement after falsely applyed WS??

# use potentially good samples instead of random ones


# wrong_mask = np.logical_not(np.array_equal(Y_pred, Y_true))

# print(data_storage.Y_merged_final[wrong_mask])
# print(data_storage.exp_Y[wrong_mask])

# calculate acc/f1 now and before ONLY on those without abstain!, but add "coverage" to the WS LF
# a) get those samples, who are least covered by the LF
# b) get those samples, where the classification is wrong by the merged LFs
# c) get those samples, with the greatest disagreement among the LFs
