In [1]:
from libs import *
from modelings.modelings_bert import *
from modelings.modelings_roberta import *
from modelings.modelings_gpt2 import *
from modelings.modelings_lstm import *
"""
For evaluate, we use a single random seed, as
the models are trained with 5 different seeds
already.
"""
_ = random.seed(123)
_ = np.random.seed(123)
_ = torch.manual_seed(123)

In [35]:
results = {}
# rerunning the boxes below will only append stuffs to the results.

In [39]:
"""
The following blocks will run CEBaB benchmark in
all the combinations of the following conditions.
"""
grid = {
    "eval_split": ["test"],
    # dev,test
    "control": ["ks"],
    # ks,approximate
    "seed": [42, 66, 77],
    # 42, 66, 77
    "h_dim": [64],
    # 1,16,64,128,192
    # 1,16,64,75
    "interchange_layer" : [1],
    # 0,1; 2,4,6,8,10,12
    "class_num": [5],
    "k" : [19684], 
    # 0;10,100,500,1000,3000,6000,9848,19684
    "alpha" : [1.0],
    # 0.0,1.0
    "beta" : [1.0],
    # 0.0,1.0
    "gemma" : [3.0],
    # 0.0,3.0
    "model_arch" : ["lstm"],
    # lstm, bert-base-uncased, roberta-base, gpt2
    "lr" : ["0.001"],
    # 8e-05; 0.001
    "counterfactual_type" : ["true"],
    # approximate,true
    "self_explain" : [True],
    # True, False
}

keys, values = zip(*grid.items())
permutations_dicts = [dict(zip(keys, v)) for v in itertools.product(*values)]

device = 'cuda:8'
batch_size = 32

if grid["control"][0] == "hdims" or grid["control"][0] == "layers":
    assert grid["eval_split"][0] == "dev"
else:
    assert grid["eval_split"][0] == "test"

In [40]:
for i in range(len(permutations_dicts)):
    
    eval_split=permutations_dicts[i]["eval_split"]
    seed=permutations_dicts[i]["seed"]
    class_num=permutations_dicts[i]["class_num"]
    alpha=permutations_dicts[i]["alpha"]
    beta=permutations_dicts[i]["beta"]
    gemma=permutations_dicts[i]["gemma"]
    h_dim=permutations_dicts[i]["h_dim"]
    dataset_type = f'{class_num}-way'
    control=permutations_dicts[i]["control"]
    model_arch=permutations_dicts[i]["model_arch"]
    k=permutations_dicts[i]["k"]
    interchange_layer=permutations_dicts[i]["interchange_layer"]
    lr=permutations_dicts[i]["lr"]
    counterfactual_type=permutations_dicts[i]["counterfactual_type"]
    self_explain=permutations_dicts[i]["self_explain"]
    
    if model_arch == "bert-base-uncased":
        model_path = "BERT"
        model_module = BERTForCEBaB
        explainer_module = CausalProxyModelForBERT
    elif model_arch == "roberta-base":
        model_path = "RoBERTa" 
        model_module = RoBERTaForCEBaB
        explainer_module = CausalProxyModelForRoBERTa
    elif model_arch == "gpt2":
        model_path = "gpt2"
        model_module = GPT2ForCEBaB
        explainer_module = CausalProxyModelForGPT2
    elif model_arch == "lstm":
        model_path = "lstm"
        model_module = LSTMForCEBaB
        explainer_module = CausalProxyModelForLSTM
    model_path += f"-{control}"
    grid_conditions=(
        ("eval_split", eval_split),
        ("control", control),
        ("seed", seed),
        ("h_dim", h_dim),
        ("interchange_layer", interchange_layer),
        ("class_num", class_num),
        ("k", k),
        ("alpha", alpha),
        ("beta", beta),
        ("gemma", gemma),
        ("model_arch", model_arch),
        ("lr", lr),
        ("counterfactual_type", counterfactual_type),
        ("self_explain", self_explain)
    )
    print("Running for this setting: ", grid_conditions)

    blackbox_model_path = f'CEBaB/{model_arch}.CEBaB.sa.'\
                          f'{class_num}-class.exclusive.seed_{seed}'
    cpm_model_path = f'../proxy_training_results/{model_path}/'\
                     f'cebab.alpha.{alpha}.beta.{beta}.gemma.{gemma}.'\
                     f'lr.{lr}.dim.{h_dim}.hightype.{model_arch}.'\
                     f'CEBaB.cls.dropout.0.1.enc.dropout.0.1.counter.type.'\
                     f'{counterfactual_type}.k.{k}.int.layer.{interchange_layer}.'\
                     f'seed_{seed}/'
    if self_explain:
        # we throw out the black-box model in this case.
        blackbox_model_path = cpm_model_path
    
    # load data from HF
    cebab = datasets.load_dataset(
        'CEBaB/CEBaB', use_auth_token=True,
        cache_dir="../train_cache/"
    )

    train, dev, test = preprocess_hf_dataset_inclusive(
        cebab, verbose=1, dataset_type=dataset_type
    )

    eval_dataset = dev if eval_split == 'dev' else test

    tf_model = model_module(
        blackbox_model_path, 
        device=device, 
        batch_size=batch_size
    )
    explainer = explainer_module(
        blackbox_model_path,
        cpm_model_path, 
        device=device, 
        batch_size=batch_size,
        intervention_h_dim=h_dim,
        self_explain=self_explain,
    )

    result_per_example, ATE, CEBaB_metrics, CEBaB_metrics_per_aspect_direction, \
    CEBaB_metrics_per_aspect, CaCE_per_aspect_direction, \
    ACaCE_per_aspect, performance_report = cebab_pipeline(
        tf_model, explainer, 
        train, eval_dataset,
        seed, k, dataset_type=dataset_type, 
        shorten_model_name=False, 
        train_setting="inclusive", 
        approximate=False if counterfactual_type == "true" else True
    )
    
    results[grid_conditions] = (
        result_per_example, ATE, CEBaB_metrics, CEBaB_metrics_per_aspect_direction, \
        CEBaB_metrics_per_aspect, CaCE_per_aspect_direction, \
        ACaCE_per_aspect, performance_report
    )

Running for this setting:  (('eval_split', 'test'), ('control', 'ks'), ('seed', 42), ('h_dim', 64), ('interchange_layer', 1), ('class_num', 5), ('k', 19684), ('alpha', 1.0), ('beta', 1.0), ('gemma', 3.0), ('model_arch', 'lstm'), ('lr', '0.001'), ('counterfactual_type', 'true'), ('self_explain', True))


Using custom data configuration CEBaB--CEBaB-ccd674d249652bd4
Reusing dataset parquet (../train_cache/CEBaB___parquet/CEBaB--CEBaB-ccd674d249652bd4/0.0.0/7328ef7ee03eaf3f86ae40594d46a1cec86161704e02dd19f232d81eee72ade8)


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at ../proxy_training_results/lstm-ks/cebab.alpha.1.0.beta.1.0.gemma.3.0.lr.0.001.dim.64.hightype.lstm.CEBaB.cls.dropout.0.1.enc.dropout.0.1.counter.type.true.k.19684.int.layer.1.seed_42/ were not used when initializing LSTMForNonLinearSequenceClassification: ['multitask_classifier.out_proj.weight', 'multitask_classifier.out_proj.bias', 'multitask_classifier.dense.weight', 'multitask_classifier.dense.bias']
- This IS expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Dropping no majority reviews: 16.6382% of train_exclusive dataset.
Dropping no majority reviews: 16.03% of train_inclusive dataset.


Some weights of the model checkpoint at ../proxy_training_results/lstm-ks/cebab.alpha.1.0.beta.1.0.gemma.3.0.lr.0.001.dim.64.hightype.lstm.CEBaB.cls.dropout.0.1.enc.dropout.0.1.counter.type.true.k.19684.int.layer.1.seed_42/ were not used when initializing LSTMForNonLinearSequenceClassification: ['multitask_classifier.out_proj.weight', 'multitask_classifier.out_proj.bias', 'multitask_classifier.dense.weight', 'multitask_classifier.dense.bias']
- This IS expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


intervention_h_dim=64


100%|██████████| 124/124 [00:01<00:00, 69.47it/s]


Running for this setting:  (('eval_split', 'test'), ('control', 'ks'), ('seed', 66), ('h_dim', 64), ('interchange_layer', 1), ('class_num', 5), ('k', 19684), ('alpha', 1.0), ('beta', 1.0), ('gemma', 3.0), ('model_arch', 'lstm'), ('lr', '0.001'), ('counterfactual_type', 'true'), ('self_explain', True))


Using custom data configuration CEBaB--CEBaB-ccd674d249652bd4
Reusing dataset parquet (../train_cache/CEBaB___parquet/CEBaB--CEBaB-ccd674d249652bd4/0.0.0/7328ef7ee03eaf3f86ae40594d46a1cec86161704e02dd19f232d81eee72ade8)


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at ../proxy_training_results/lstm-ks/cebab.alpha.1.0.beta.1.0.gemma.3.0.lr.0.001.dim.64.hightype.lstm.CEBaB.cls.dropout.0.1.enc.dropout.0.1.counter.type.true.k.19684.int.layer.1.seed_66/ were not used when initializing LSTMForNonLinearSequenceClassification: ['multitask_classifier.out_proj.weight', 'multitask_classifier.out_proj.bias', 'multitask_classifier.dense.weight', 'multitask_classifier.dense.bias']
- This IS expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Dropping no majority reviews: 16.6382% of train_exclusive dataset.
Dropping no majority reviews: 16.03% of train_inclusive dataset.


Some weights of the model checkpoint at ../proxy_training_results/lstm-ks/cebab.alpha.1.0.beta.1.0.gemma.3.0.lr.0.001.dim.64.hightype.lstm.CEBaB.cls.dropout.0.1.enc.dropout.0.1.counter.type.true.k.19684.int.layer.1.seed_66/ were not used when initializing LSTMForNonLinearSequenceClassification: ['multitask_classifier.out_proj.weight', 'multitask_classifier.out_proj.bias', 'multitask_classifier.dense.weight', 'multitask_classifier.dense.bias']
- This IS expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


intervention_h_dim=64


100%|██████████| 124/124 [00:01<00:00, 68.10it/s]


Running for this setting:  (('eval_split', 'test'), ('control', 'ks'), ('seed', 77), ('h_dim', 64), ('interchange_layer', 1), ('class_num', 5), ('k', 19684), ('alpha', 1.0), ('beta', 1.0), ('gemma', 3.0), ('model_arch', 'lstm'), ('lr', '0.001'), ('counterfactual_type', 'true'), ('self_explain', True))


Using custom data configuration CEBaB--CEBaB-ccd674d249652bd4
Reusing dataset parquet (../train_cache/CEBaB___parquet/CEBaB--CEBaB-ccd674d249652bd4/0.0.0/7328ef7ee03eaf3f86ae40594d46a1cec86161704e02dd19f232d81eee72ade8)


  0%|          | 0/5 [00:00<?, ?it/s]

Some weights of the model checkpoint at ../proxy_training_results/lstm-ks/cebab.alpha.1.0.beta.1.0.gemma.3.0.lr.0.001.dim.64.hightype.lstm.CEBaB.cls.dropout.0.1.enc.dropout.0.1.counter.type.true.k.19684.int.layer.1.seed_77/ were not used when initializing LSTMForNonLinearSequenceClassification: ['multitask_classifier.out_proj.weight', 'multitask_classifier.out_proj.bias', 'multitask_classifier.dense.weight', 'multitask_classifier.dense.bias']
- This IS expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Dropping no majority reviews: 16.6382% of train_exclusive dataset.
Dropping no majority reviews: 16.03% of train_inclusive dataset.


Some weights of the model checkpoint at ../proxy_training_results/lstm-ks/cebab.alpha.1.0.beta.1.0.gemma.3.0.lr.0.001.dim.64.hightype.lstm.CEBaB.cls.dropout.0.1.enc.dropout.0.1.counter.type.true.k.19684.int.layer.1.seed_77/ were not used when initializing LSTMForNonLinearSequenceClassification: ['multitask_classifier.out_proj.weight', 'multitask_classifier.out_proj.bias', 'multitask_classifier.dense.weight', 'multitask_classifier.dense.bias']
- This IS expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LSTMForNonLinearSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


intervention_h_dim=64


100%|██████████| 124/124 [00:01<00:00, 68.20it/s]


In [44]:
important_keys = [
    "eval_split",
    "control", "seed", 
    "h_dim", "interchange_layer", 
    "class_num", "k", 
    "beta", "gemma", 
    "model_arch", "lr", "counterfactual_type", 
]
values = []
for k, v in results.items():
    _values = []
    for ik in important_keys:
        _values.append(dict(k)[ik])
    _values.append(v[2]["ICaCE-L2"].iloc[0])
    _values.append(v[2]["ICaCE-cosine"].iloc[0])
    _values.append(v[2]["ICaCE-normdiff"].iloc[0])
    _values.append(v[-1].iloc[0][0])
    values.append(_values)
important_keys.extend(["ICaCE-L2", "ICaCE-cosine", "ICaCE-normdiff", "macro-f1"])
df = pd.DataFrame(values, columns=important_keys)
df.sort_values(by=['seed', 'interchange_layer', 'h_dim'], ascending=True)

Unnamed: 0,eval_split,control,seed,h_dim,interchange_layer,class_num,k,beta,gemma,model_arch,lr,counterfactual_type,ICaCE-L2,ICaCE-cosine,ICaCE-normdiff,macro-f1
0,test,ks,42,64,1,5,19684,1.0,3.0,lstm,0.001,True,0.428,0.3958,0.2988,0.592428
1,test,ks,66,64,1,5,19684,1.0,3.0,lstm,0.001,True,0.4207,0.4066,0.2841,0.584527
2,test,ks,77,64,1,5,19684,1.0,3.0,lstm,0.001,True,0.3994,0.393,0.2559,0.596698


In [45]:
groupby_key = "k"

model_arch = grid["model_arch"][0]
control = grid["control"][0]
df.groupby([groupby_key], as_index=False).mean()

Unnamed: 0,k,seed,h_dim,interchange_layer,class_num,beta,gemma,ICaCE-L2,ICaCE-cosine,ICaCE-normdiff,macro-f1
0,19684,61.666667,64.0,1.0,5.0,1.0,3.0,0.416033,0.398467,0.2796,0.591218


In [46]:
df.groupby([groupby_key], as_index=False).std()

Unnamed: 0,k,seed,h_dim,interchange_layer,class_num,beta,gemma,ICaCE-L2,ICaCE-cosine,ICaCE-normdiff,macro-f1
0,19684,17.897858,0.0,0.0,0.0,0.0,0.0,0.01486,0.007181,0.021801,0.006175
