In [1]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import yaml
import torch
from tqdm import tqdm
from src.utils import linearise_input, convert_to_features, nums_to_names


MODEL_TO_CONFIG_DICT = {
    "text-exps-t5-20": "text_3_bigtest",
    "text-exps-t5-20-aug": "text_17",
    "text-exps-t5-10": "text_25",
    "text-exps-t5-10-aug": "text_29",
    "text-exps-t5-large-20": "text_21",
    "text-exps-t5-large-20-aug": "text_22",
    "text-exps-t5-large-10": "text_34",
    "text-exps-t5-large-10-aug": "text_33",
    "text-exps-bart-20": "text_4_bigtest",
    "text-exps-bart-20-aug": "text_18",
    "text-exps-bart-10": "text_26",
    "text-exps-bart-10-aug": "text_30",
    "text-exps-bart-large-20": "text_23",
    "text-exps-bart-large-20-aug": "text_24",
    "text-exps-bart-large-10": "text_31",
    "text-exps-bart-large-10-aug": "text_32",
}

QA_MODEL_TO_CONFIG_DICT = {
    "text-exps-qa-t5": "text_3",
    "text-exps-qa-bart": "text_4",
}

dataset = load_dataset("james-burton/textual-explanations-702010")

model_name = "text-exps-bart-20"
config_type = MODEL_TO_CONFIG_DICT[model_name]

model = AutoModelForSeq2SeqLM.from_pretrained("james-burton/" + model_name)
tokenizer = AutoTokenizer.from_pretrained("james-burton/" + model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


  from .autonotebook import tqdm as notebook_tqdm
2023-04-18 09:48:55.390624: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-18 09:48:55.475906: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda/lib64:/home/james/Downloads/TensorRT-8.5.1.7/lib
2023-04-18 09:48:55.475919: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-04-18 09:48:55.921676: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could

BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(50265, 768, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(50265, 768, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
      (layers): ModuleList(
        (0): BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): LayerNorm((768,), eps=1e-05,

Load the config file

In [2]:
# Import yaml file
with open("../configs/train_default.yaml") as f:
    args = yaml.safe_load(f)

# Update default args with chosen config
with open("../configs/train_configs.yaml") as f:
    yaml_configs = yaml.safe_load_all(f)
    yaml_args = next(
        conf for conf in yaml_configs if conf["config"] == config_type
    )
args.update(yaml_args)
print(f"Updating with:\n{yaml_args}\n")

Updating with:
{'config': 'text_4_bigtest', 'fast_dev_run': False, 'tags': ['bart', 'big-test'], 'batch_size': 8, 'max_features': 20, 'linearisation': 'text', 'model_base': 'facebook/bart-base', 'output_root': 'models/bart-base/', 'num_beams': 20, 'predict_batch_size': 2, 'big_test_set': True}



In [3]:
# Form the linearised or stepwise (and linearised) input
dataset = dataset.map(
    lambda x: linearise_input(x, args["linearisation"], args["max_features"]),
    load_from_cache_file=False,
)

# Convert to tokens
dataset = dataset.map(
    lambda x: convert_to_features(x, tokenizer, args["max_input_len"]),
    batched=True,
    load_from_cache_file=False,
)

100%|██████████| 94/94 [00:00<00:00, 2248.19ex/s]
100%|██████████| 328/328 [00:00<00:00, 2746.33ex/s]
100%|██████████| 47/47 [00:00<00:00, 2673.38ex/s]
100%|██████████| 1/1 [00:00<00:00, 37.12ba/s]
100%|██████████| 1/1 [00:00<00:00, 12.97ba/s]
100%|██████████| 1/1 [00:00<00:00, 61.32ba/s]


In [4]:
print("***** Running Prediction *****")
input_ids = torch.tensor(dataset["test"]["input_ids"]).to(model.device)
attention_mask = torch.tensor(dataset["test"]["attention_mask"]).to(
    model.device
)
all_preds = []
for i in tqdm(range(0, input_ids.shape[0], args["predict_batch_size"])):
    sample_outputs = model.generate(
        input_ids=input_ids[i : i + args["predict_batch_size"]],
        attention_mask=attention_mask[i : i + args["predict_batch_size"]],
        num_beams=args["num_beams"],
        repetition_penalty=args["repetition_penalty"],
        length_penalty=args["length_penalty"],
        max_length=args["max_output_len"],
        no_repeat_ngram_size=2,
        num_return_sequences=1,
        do_sample=True,
        early_stopping=True,
        use_cache=True,
    )
    preds = tokenizer.batch_decode(sample_outputs, skip_special_tokens=True)
    all_preds.extend(preds)

all_preds_w_names = [
    nums_to_names(pred, eval(c2s), eval(f2s))
    for pred, c2s, f2s in zip(
        all_preds, dataset["test"]["class2name"], dataset["test"]["ft_num2name"]
    )
]
narrs_w_names = [
    nums_to_names(narr, eval(c2s), eval(f2s))
    for narr, c2s, f2s in zip(
        dataset["test"]["narration"],
        dataset["test"]["class2name"],
        dataset["test"]["ft_num2name"],
    )
]
input_w_names = [
    nums_to_names(inp, eval(c2s), eval(f2s))
    for inp, c2s, f2s in zip(
        dataset["test"]["input"],
        dataset["test"]["class2name"],
        dataset["test"]["ft_num2name"],
    )
]

***** Running Prediction *****


100%|██████████| 47/47 [02:58<00:00,  3.79s/it]


In [7]:
narrs_w_names = [
    nums_to_names(narr, eval(c2s), eval(f2s))
    for narr, c2s, f2s in zip(
        dataset["test"]["narration"],
        dataset["test"]["class2name"],
        dataset["test"]["ft_num2name"],
    )
]
input_w_names = [
    nums_to_names(inp, eval(c2s), eval(f2s))
    for inp, c2s, f2s in zip(
        dataset["test"]["input"],
        dataset["test"]["class2name"],
        dataset["test"]["ft_num2name"],
    )
]

In [9]:
input_w_names

['Predicted class is "Return", value of 100.00%. Other classes and values are "Go Away" 0.00%. Top features are [Perference(P2), Delay of delivery person picking up food, Ease and convenient, Marital Status, Unaffordable, Occupation, High Quality of package, Influence of time, More restaurant choices, Time saving, Perference(P1), More Offers and Discount, Freshness , Monthly Income, Age, Order Time, Politeness, Influence of rating, Easy Payment option, and Health Concern], with values [-0.07, -0.05, 0.05, -0.05, 0.05, -0.04, -0.04, 0.04, 0.03, 0.03, 0.03, 0.03, -0.03, -0.03, -0.03, 0.03, -0.02, 0.02, 0.02, and 0.02]. Postive features are [Ease and convenient, Unaffordable, Influence of time, More restaurant choices, Time saving, Perference(P1), More Offers and Discount, Order Time, Influence of rating, Easy Payment option, and Health Concern]. Negative features are [Perference(P2), Delay of delivery person picking up food, Marital Status, Occupation, High Quality of package, Freshness 