## Causal Inference

In [1]:
import os
import json
from typing import Dict, List, Union

import pandas as pd
import numpy as np
from scipy.special import softmax
from scipy.special import expit
import torch

from cma import report_CMA
import fuse, causal_utils
from kl_general import DEFAULT_CONFIG as ESTIMATE_C_DEFAULT_CONFIG
from kl_general import TE_CONFIG as ESTIMATE_C_TE_CONFIG


import sys
sys.path.append("../")
from my_package.models.traditional.classifier import Classifier
from my_package.utils.handcrafted_features.counter import count_negations
from my_package.utils.handcrafted_features.overlap import get_lexical_overlap, get_entities_overlap

2022-02-02 23:52:17.083020: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


## Configs

In [1]:
DATASET = "qqp" # fever or qqp
IS_CORRECTION = False

IS_GENERATE_NEW_BIAS_PRED = False

_MODEL_PATH = {
#     "fever": "../results/fever/baseline_lr2e5/outputs_fever_bert_base_1", 
#     "fever": "../results/fever/baseline_lr2e5/outputs_fever_weighted_bert_1", 
#     "fever": "../results/fever/baseline_lr2e5/outputs_fever_poe_bert_1", 
#     "fever": "../results/fever/baseline_lr2e5/outputs_fever_self_distill_bert_1", 
    
#     "qqp": "/raid/can/nli_models/qqp_bias/baseline2e-5/qqp_bert_base_bias"
    "qqp": "/raid/can/nli_models/qqp_bias/reweight2e-5/qqp_bert_base_reweight_bias"    
#     "qqp": "/raid/can/nli_models/qqp_bias/poe2e-5/qqp_bert_base_poe_bias"
#     "qqp": "/raid/can/nli_models/qqp_bias/self_distill2e-5/qqp_bert_base_self_distill_bias"
}

# RESULT_PATH = "../results/fever/baseline_lr2e5/"
# RESULT_PATH = "../results/fever/reweight_lr2e5/"
# RESULT_PATH = "../results/fever/poe_lr2e5/"
# RESULT_PATH = "../results/fever/self_distill_lr2e5/"


# RESULT_PATH = "/raid/can/nli_models/qqp_bias/baseline2e-5/"
RESULT_PATH = "/raid/can/nli_models/qqp_bias/reweight2e-5/"  
# RESULT_PATH = "/raid/can/nli_models/qqp_bias/poe2e-5/" 
# RESULT_PATH = "/raid/can/nli_models/qqp_bias/self_distill2e-5/" 

In [3]:
_BIAS_CLASS = {
    "fever": "REFUTES",
    "qqp": "1"
}
_POSSIBLE_LABELS = {
    "fever": ("SUPPORTS", "NOT ENOUGH INFO", "REFUTES"),
    "qqp": ("0", "1")
}


_DATA_PATH = {
    "fever": "../data/fact_verification", # "../data/fact_verification" "/raid/can/debias_nlu/data/fact_verification"
    "qqp": "/raid/can/debias_nlu/data/paraphrase_identification" # "../data/paraphrase_identification"
}


_SENT1_KEYS = {
    "fever": ("claim", "claim", "claim"),
    "qqp": ("sentence1", "sentence1")
}
_SENT2_KEYS = {
    "fever": ("evidence", "evidence_sentence", "evidence"),
    "qqp": ("sentence2", "sentence2")
}
_LABEL_KEYS = {
    "fever": ("gold_label", "label", "label"),
    "qqp": ("is_duplicate", "is_duplicate")
}
_TEST_FILES = {
    "fever": (
        "fever.dev.jsonl",
        "fever_symmetric_v0.1.test.jsonl",
        "fever_symmetric_v0.2.test.jsonl",
    ),
    "qqp": (
        "qqp.dev.jsonl",
        "paws.dev_and_test.jsonl"
    )
}
_TEST_SETS = {
    "fever": (
        "fever_dev",
        "fever_sym1",
        "fever_sym2",
    ),
    "qqp": (
        "qqp_dev",
        "qqp_paws"
    )
}

_BIAS_MODEL_PATH = {
    "fever": "../results/fever/bias_model",
    "qqp": "../results/qqp/bias_model"
}

_BIAS_VAL_PRED_FILE = {
    "fever": "weighted_fever.val.jsonl",
    "qqp": "qqp_val_overlap_only_bias_weighted.jsonl" # "weighted_qqp.val.jsonl",
}
_MODEL_VAL_PRED_FILE = {
    "fever": "raw_fever.val.jsonl",
    "qqp": "raw_qqp.val.jsonl",
}

MODEL_PROB_KEY = "probs"
BIAS_PROB_KEY = "bias_prob"

# Fusion method
FUSION = fuse.sum_fuse

In [4]:
BIAS_CLASS = _BIAS_CLASS[DATASET]
POSSIBLE_LABELS = _POSSIBLE_LABELS[DATASET]

ESTIMATE_C_DEFAULT_CONFIG["N_LABELS"] = len(POSSIBLE_LABELS)
ESTIMATE_C_TE_CONFIG["N_LABELS"] = len(POSSIBLE_LABELS)
# ESTIMATE_C_DEFAULT_CONFIG["LEARNING_RATE"] = 5e-4
# ESTIMATE_C_TE_CONFIG["LEARNING_RATE"] = 5e-4

ROOT_DATA_PATH = "../data"
DATA_PATH = _DATA_PATH[DATASET]
TEST_FILES = _TEST_FILES[DATASET]
TEST_SETS = _TEST_SETS[DATASET]
SENT1_KEYS = _SENT1_KEYS[DATASET]
SENT2_KEYS = _SENT2_KEYS[DATASET]
LABEL_KEYS = _LABEL_KEYS[DATASET]
WEIGHT_KEY = "sample_weight"

BIAS_MODEL_PATH = _BIAS_MODEL_PATH[DATASET] 
MODEL_PATH = _MODEL_PATH[DATASET]


BIAS_VAL_PRED_FILE = _BIAS_VAL_PRED_FILE[DATASET]
MODEL_VAL_PRED_FILE = _MODEL_VAL_PRED_FILE[DATASET]

## Compute average input for bias model

In [7]:
def _read_jsonl(file_path: str) -> List[Dict[str, Union[str, int]]]:
    output = []
    f = open(file_path, 'r')
    line = f.readline()
    while line:
        doc = json.loads(line)
        output.append(doc)
        line = f.readline()
    f.close()
    return output

In [8]:
val_data = _read_jsonl(os.path.join(DATA_PATH, "%s.val.jsonl"%DATASET))
val_data = [
    (x[SENT1_KEYS[0]], x[SENT2_KEYS[0]])
    for x in val_data
]
print(len(val_data))
val_data[:2]

5000


[('How can I be a political leader?', 'How do I become a leader?'),
 ('How do I check if I have knock knees or not?',
  'How can I check knock knees?')]

## Causal Mediation Analysis

In [15]:
seed_paths = os.listdir(RESULT_PATH)
seed_paths = list(map(lambda x: os.path.join(RESULT_PATH, x), seed_paths))

print(seed_paths)

['/raid/can/nli_models/qqp_bias/self_distill/qqp_bert_base_self_distill_bias_seed43370', '/raid/can/nli_models/qqp_bias/self_distill/qqp_bert_base_self_distill_bias_seed23370', '/raid/can/nli_models/qqp_bias/self_distill/qqp_bert_base_self_distill_bias_seed53370', '/raid/can/nli_models/qqp_bias/self_distill/qqp_bert_base_self_distill_bias_seed13370', '/raid/can/nli_models/qqp_bias/self_distill/qqp_bert_base_self_distill_bias_seed33370']


In [16]:
for test_set, label_key in zip(TEST_SETS, LABEL_KEYS):
    print("========= TEST_SET: %s ========="%test_set)
    report_CMA(
        model_path = "",
        task = DATASET,
        seed_path = seed_paths,

        data_path = DATA_PATH,
        test_set = test_set, ################
        fusion = fuse.sum_fuse,
#         input_a0 = input_a0,
        estimate_c_config = ESTIMATE_C_DEFAULT_CONFIG,
        estimate_c_te_config = ESTIMATE_C_TE_CONFIG,
        correction = IS_CORRECTION,
        ground_truth_key = label_key,
#         model_pred_method = model_pred,

        bias_val_pred_file = BIAS_VAL_PRED_FILE,
        model_val_pred_file = MODEL_VAL_PRED_FILE,

        entropy_threshold = -9999,
    )
    print("========= END ======== \n\n\n")

/raid/can/debias_nlu/data/paraphrase_identification/qqp_dev_overlap_only_bias_weighted.jsonl
/raid/can/nli_models/qqp_bias/self_distill/qqp_bert_base_self_distill_bias_seed43370/raw_qqp.val.jsonl
/raid/can/debias_nlu/data/paraphrase_identification/qqp/qqp_val_overlap_only_bias_weighted.jsonl


  self.target_probs = torch.tensor(target_probs)


c:  [0.40776461 0.40776461]
softmax(c):  [0.5 0.5]
/raid/can/debias_nlu/data/paraphrase_identification/qqp/qqp_val_overlap_only_bias_weighted.jsonl
c:  [0.49881184 0.49881184]
softmax(c):  [0.5 0.5]
unique_labels:  [1, 0]
/raid/can/nli_models/qqp_bias/self_distill/qqp_bert_base_self_distill_bias_seed23370/raw_qqp.val.jsonl
/raid/can/debias_nlu/data/paraphrase_identification/qqp/qqp_val_overlap_only_bias_weighted.jsonl
c:  [0.40762413 0.40762413]
softmax(c):  [0.5 0.5]
/raid/can/debias_nlu/data/paraphrase_identification/qqp/qqp_val_overlap_only_bias_weighted.jsonl
c:  [0.49767417 0.49767417]
softmax(c):  [0.5 0.5]
unique_labels:  [1, 0]
/raid/can/nli_models/qqp_bias/self_distill/qqp_bert_base_self_distill_bias_seed53370/raw_qqp.val.jsonl
/raid/can/debias_nlu/data/paraphrase_identification/qqp/qqp_val_overlap_only_bias_weighted.jsonl
c:  [0.40762514 0.40762514]
softmax(c):  [0.5 0.5]
/raid/can/debias_nlu/data/paraphrase_identification/qqp/qqp_val_overlap_only_bias_weighted.jsonl
c:  [0.5

The minimal epsilon for which Algorithm A is almost stochastically greater than algorithm B is  0.0
since epsilon = 0, algorithm A is stochatically dominant over B
None



