Inference Routine from a trained model

In [3]:
import json
import os
import torch
from argparse import ArgumentParser

from pytorch_lightning import seed_everything

from transformers import EarlyStoppingCallback

from src.datasetComposer import DatasetBuilder, composed_train_path, composed_test_path, compactComposer, test_path, train_path, test_path,setupTokenizer
from src.inference_routine import InferenceGenerator
from src.datasetHandlers import SmartCollator
from src.model_utils import get_basic_model
from src.trainerArgs import CustomTrainer, getTrainingArguments
os.environ["WANDB_DISABLED"] = "true"
os.environ["TOKENIZERS_PARALLELISM"] = "false"



iterative_gen = True
composed_already = True

# Define the parameters used to set up the models
modeltype = 'iterative' if iterative_gen else 'normal'  # either baseline or 'earlyfusion'

# either t5-small,t5-base, t5-large, facebook/bart-base, or facebook/bart-large
modelbase = 'facebook/bart-base'

# we will use the above variables to set up the folder to save our model
pre_trained_model_name = modelbase.split(
    '/')[1] if 'bart' in modelbase else modelbase

# where the trained model will be saved
output_path = 'TrainModels/' + modeltype + '/'+pre_trained_model_name+'/'

#tests = json.load(open(test_path,encoding='utf-8'))


rand_seed = 453
seed_everything(rand_seed)
device = torch.device(
    'cuda') if torch.cuda.is_available() else torch.device('cpu')

arguments = train_arguments = {'output_dir': output_path,
                               'warmup_ratio': 0.2,
                               #'disable_tqdm':False,
                               'per_device_train_batch_size': 8,
                               'num_train_epochs': 4,
                               'lr_scheduler_type': 'cosine',
                               'learning_rate': 5e-5,
                               'evaluation_strategy': 'steps',
                               'logging_steps': 500,
                               
                               'seed': rand_seed}

Global seed set to 453


In [4]:
#load the modules from the inference routine
from types import SimpleNamespace
from src.inference_routine import NarratorUtils,ExplanationRecord,LocalLevelExplanationNarration

In [5]:

narrator_utils = NarratorUtils(modelbase,output_path)

# initialise the model
classification_explanator = narrator_utils.initialise_Model()




 Dont forget to call initialise_Model() before running any inference


In [6]:
import copy

import random
# Example of input

ml_task_name = 'Car Insurance Risk'

prediction_probabilities = {'Low': 0.76, 'High': 0.24}
# the features used to make the prediction
feature_names = ['Height', 'Mar_status', 'cur_loc', 'nb_friends', 'last_trip']

bcc = feature_names.copy()
random.shuffle(bcc)

# get the order and directions of influence from the explainable output from the XAI technique
# the methods expects the keys ['explanation_order','positives','negatives','ignore']
# 'positives' is the list of all the features with positive influence on the prediction decision and 'negatives' is the inverse.
# 'ignore' is the list of features identified as having very limited contribution to the prediction decision

attributions = {'explanation_order': ['Height', 'last_trip', 'cur_loc', 'nb_friends', 'Mar_status','Income'],
                'positives': ['Height', 'last_trip', 'Mar_status'],
                'negatives': ['cur_loc', 'nb_friends'],
                'ignore': ['Income']
                
                 }


In [7]:
# if we want to generate the texts via the iterative generation then we have to define the style
# We want our output text to first table about the prediction output
# step 1: talk about the feature order based on the attributions
# step 2: talk about the features with positive contributions to the decision
# step 3: ----- negative features
# step 4: ------- features with limited influence
# step 5: Make conclusion based on all the input information

# this will instruct the narrator to follow our desired output style
iterative_generation_steps = {'step 0': '',
                              'step 1': attributions['explanation_order'],
                              'step 5': attributions['ignore'],
                              'step 2': attributions['positives'][:2],
                              'step 3': attributions['negatives'][:1],
                              'step 4': attributions['negatives'][1:] + attributions['positives'][2:],
                              
                              'step 6': '-'
                              }


full_text_generation_steps = {'step 0': '',
                              'step 1': attributions['explanation_order'],
                              }


generation_instruction = iterative_generation_steps if iterative_gen else full_text_generation_steps


The ML model predicted the label : Low


In [12]:
# Process the explanation output and the text generation instruction
exp_record = ExplanationRecord(ml_task_name,feature_names,prediction_probabilities, attributions,iterative_mode=True)
processed = exp_record.setup_generation_steps(generation_instruction,)

# the final bit 
iterativeGen =LocalLevelExplanationNarration(classification_explanator,narrator_utils,device,iterative_mode=True,)

The ML model predicted the label : Low


In [11]:
iterativeGen.generateTexts(processed)

Global seed set to 456


["According to the model, the most probable label for the given case is Low with a probability of 76.0%, which is higher than that of  High. It is important to note that the classifier is very uncertain about the correctness of the assigned label and this is mainly because the likelihoods across the classes are not equal to zero. Among the input features, only Height, last_trip, and cur_loc are shown to have a positive influence on the model's prediction decision, while nb_friends has a negative influence, shifting the verdict away from Low. Income, on the other hand, has a negative influence, shifting the verdict away from Low towards  High."]