# Approach

In this notebook, we provide our approach for our experiment, which involves explaining machine through planning and realizing explanation moves.

In [None]:
# Install all the required packages
!pip install bitsandbytes
!pip install -q datasets loralib sentencepiece
!pip install -q git+https://github.com/zphang/transformers@c3dc391
!pip uninstall peft -y
!pip install -q git+https://github.com/huggingface/peft.git@0769587a3cd80ad2ae508cc06efbf54ddca821b3
!pip install sentence_transformers

In [None]:
import tqdm
from sentence_transformers import SentenceTransformer, util

In [None]:
#initialize the CRF model
#Model is based on the work of Wchasmuth and Alshomary (A Dialogue Corpus for Learning to Construct Explanations)
import sys
import os
sys.path.insert(0, './lib/src')

import torch
from multi_turn_bert import MultiTurnBert
from transformers import BertTokenizer, BertConfig

from torch.utils.data import DataLoader
from custom_dataset import CustomDataset
import numpy as np
import pandas as pd
import argparse

parser = argparse.ArgumentParser()
args = argparse.Namespace(turn_type='multi', pooling='cls', sp1_token='[EXPLAINER]', sp2_token='[EXPLAINEE]', bert_type='bert-base-uncased',
                          max_len=256, max_turns=56, dropout=0.1, device='cuda', learning_rate=2e-5, warmup_ratio=0.01,
                          batch_size=1, num_workers=2, num_epochs=5, num_classes=-1, ckpt_dir='./cross-val-models', planning=False, start_token='[START]')

# Device setting
if torch.cuda.is_available():
    args.device = torch.device('cuda')
else:
    print("CUDA is unavailable. Starting with CPU.")
    args.device = torch.device('cpu')


# Adding arguments
bert_config = BertConfig().from_pretrained(args.bert_type)
args.hidden_size = bert_config.hidden_size
args.p_dim = args.hidden_size
args.max_len = min(args.max_len, bert_config.max_position_embeddings)


tokenizer_crf = BertTokenizer.from_pretrained(args.bert_type)

num_new_tokens = tokenizer_crf.add_special_tokens(
    {
        'additional_special_tokens': [args.sp1_token, args.sp2_token, args.start_token]
    }
)

vocab = tokenizer_crf.get_vocab()
args.vocab_size = len(vocab)

args.cls_token = tokenizer_crf.cls_token
args.sep_token = tokenizer_crf.sep_token
args.pad_token = tokenizer_crf.pad_token

args.cls_id = vocab[args.cls_token]
args.sep_id = vocab[args.sep_token]
args.pad_id = vocab[args.pad_token]
args.sp1_id = vocab[args.sp1_token]
args.sp2_id = vocab[args.sp2_token]
args.start_id = vocab[args.start_token]
args.o_id   = -1

bert_config = BertConfig().from_pretrained(args.bert_type)
args.hidden_size = bert_config.hidden_size
args.p_dim = args.hidden_size
args.max_len = min(args.max_len, bert_config.max_position_embeddings)

#planning = false -> with last text
#plannung = true -> without last text
args.num_classes=10
model_exp_with_last_text = MultiTurnBert(args).to(args.device)
model_exp_with_last_text.load_state_dict(torch.load('./exp_act_models/ckpt_epoch=1_train_f1=0.8253_valid_f1=0.414_False_with_text'))
model_exp_with_last_text.eval()

args.planning = True
model_exp_without_last_text = MultiTurnBert(args).to(args.device)
model_exp_without_last_text.load_state_dict(torch.load('./exp_act_models/ckpt_epoch=3_train_f1=0.9256_valid_f1=0.424_True_without_text'))
model_exp_without_last_text.eval()


args.planning = False
args.num_classes=4
model_topic_with_last_text = MultiTurnBert(args).to(args.device)
model_topic_with_last_text.load_state_dict(torch.load('./topic_rel_models/ckpt_epoch=1_train_f1=0.8155_valid_f1=0.5814_False_with_text'))
model_topic_with_last_text.eval()

args.planning = True
model_topic_without_last_text = MultiTurnBert(args).to(args.device)
model_topic_without_last_text.load_state_dict(torch.load('./topic_rel_models/ckpt_epoch=4_train_f1=0.9125_valid_f1=0.5385_True_without_text'))
model_topic_without_last_text.eval()

#planning = false -> with last text
#plannung = true -> without last text
def predict_label(dig, args, is_exp, planning):
    args.planning = planning
    args.num_classes = 10 if is_exp else 4
    label_clm = 'exp_act_label' if is_exp else 'topic_func_label'
    dig['label'] = dig[label_clm]#.apply(lambda labels: [int(x[2:4])-1 for x in labels])
    model = model_exp_with_last_text if is_exp and not planning else\
        	model_exp_without_last_text if is_exp and planning else\
            model_topic_with_last_text if not is_exp and not planning else\
            model_topic_without_last_text
    ds = CustomDataset(dig, tokenizer_crf, args, vocab)
    ds_loader = DataLoader(ds, batch_size=args.batch_size, num_workers=args.num_workers, pin_memory=True)
    with torch.no_grad():
        for i, batch in enumerate(ds_loader):
            batch_x, batch_turns, batch_labels, true_turn_lens = batch
            batch_x, batch_turns, batch_labels = \
                batch_x.to(args.device), batch_turns.to(args.device), batch_labels.to(args.device)
            _, outputs = model(batch_x, args.pad_id, batch_turns, turns_labels=batch_labels) 
    return outputs[0][-1]

In [None]:
#The following code loads the Alpaca LoRA pretrained model.
from peft import PeftModel
from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig

tokenizer = LLaMATokenizer.from_pretrained("linhvu/decapoda-research-llama-7b-hf")
model = LLaMAForCausalLM.from_pretrained("linhvu/decapoda-research-llama-7b-hf", load_in_8bit=True, device_map="auto")

model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")

In [None]:
#Setup of Alpaca
def generate_prompt(instruction, input=None):
    if input:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Response:"""

In [None]:
#Evaluate function that we will use to call Alpaca LoRA.
generation_config = GenerationConfig(
    temperature=0.1,
    top_p=0.75,
    num_beams=4,
    repetition_penalty=1.03
)

def evaluate(instruction, input=None):
    prompt = generate_prompt(instruction, input)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256
    )
    response = []
    for s in generation_output.sequences:
        output = tokenizer.decode(s)
        response.append(output.split("### Response:")[1].strip())
    return response

In [None]:
# read and prepare the data for the CRF model
df = pd.read_pickle('./data_turns.pkl')
df['turn_len'] = df.turn_text.apply(lambda x: len(x['text'].split()))
df['turn_text'] = df.apply(lambda row: {
                                            'author': row['turn_text']['author'], 
                                            'text' : row['topic'].replace('_', ' ') + ' [SEP] ' +  row['turn_text']['text']
                                           } ,axis=1)
df = df.groupby('task_id').agg({'turn_text': lambda rows: list(rows),
                                'topic': lambda rows: list(rows)[0],
                                'topic_func_label': lambda rows: list(rows),
                                'dlg_act_label': lambda rows: list(rows),
                                'exp_act_label': lambda rows: list(rows)}).reset_index()
df['num_turns_per_topic'] = df.topic_func_label.apply(lambda x : len(x))

In [None]:
#Split the data in training set 60%, validation set 20%, test set 20%
_, valid, test = \
              np.split(df.sample(frac=1, random_state=42), 
                       [int(.6*len(df)), int(.8*len(df))])

valid_df = valid.copy()

In [None]:
# tamplate of questions for the label e1 Testunderstanding of explanation move dimention 
test_understanding_questions = [
    "Did that explanation make sense to you?",
    "Was my last point clear enough?",
    "Do you feel confident about what we just discussed?",
    "Do you think you can summarize the main idea?",
    "Can you recall the key points we covered?",
    "Was the information I provided relevant to your interests?",
    "Do you agree with the information presented?",
    "Do you have any different views on this topic?",
    "Is there anything you would challenge or question about what we discussed?",
    "Are you finding this topic interesting?",
    "Do you want to learn more about this subject?",
    "Has this discussion sparked any new ideas or thoughts for you?",
    "Is there anything you'd like me to explain differently?",
    "Do you think the examples I used were helpful?",
    "Would you like to go over any part of the topic again?"
]

In [None]:
# pick a random question to the label e1 Testunderstanding
import random

def get_random_question(question_list):
    return random.choice(question_list)

# Example usage
random_test_understanding = get_random_question(test_understanding_questions)
print(random_test_understanding)

In [None]:
# Tamplate of questions for e2 Test Prior Knowledge  of explanation move dimention  regarding Topic dimention

test_prior_knowledge_questions = {

    1 : [ # Main_topic:
      "So do you know what [] is?",
      "Do you know what [] is?",
      "Have you ever heard of [] before?",
      "Do you know what [] means? Have you heard that before?",
      "So have you ever heard of something called []?",
      "Have you heard about [] before?",
      "Are you familiar with the basics of []?",
      "Have you encountered [] before?",
    ],
    2 : [ # related_topic:
      "So do you have a basic sense of what [] is?",
      "Have you read or learned anything specific about []?",
      "How does [] relate to what you already know?",
      "Do you know any applications of [] in real-world scenarios?",
      "What do you already know about []?",
      "In what context have you encountered [] previously?",
      "What are your thoughts or opinions about []?",
      "Does [] interest you in any particular way?"
    ],

    3 : [ # sub_topic:
      "So do you have a basic sense of what [] is?",
      "Have you read or learned anything specific about []?",
      "How does [] relate to what you already know?",
      "Do you know any applications of [] in real-world scenarios?",
      "What do you already know about []?",
      "In what context have you encountered [] previously?",
      "What are your thoughts or opinions about []?",
      "Does [] interest you in any particular way?"
    ],

    4 : [ # other_topic:
      "And you know what we're going to be talking about?",
      "So what's your major?",
      "Do you think of yourself as curious about science?",
      "Are you interested in science?",
      "Do you like science? Is that something you think about?"
    ],
}

In [None]:
# Generate the needed Prompts based on the main topic for realizing explanation moves as responses from Alpaca
def generate_prompts(main_topic):
    
    E2_RELATED_TOPIC_PROMPT = (
        f'''
        Identify three basic, foundational concepts related to "{main_topic}" suitable for placeholders in questions.
        Present them in an enumerated format.
        Each concept should be expressed as a single word or a very brief phrase, ensuring simplicity and direct relevance to "{main_topic}".
        These concepts should be basic enough to be understood by beginners in the subject.
        '''
    )
    E3_EXPLANATION_PROMPT = (
        f'''
        Based on the following dialogue history between an explainer an explainee, play the role of the explainer,
        generate a response that provides a clear, concise, and informative explanation about the main topic "{main_topic}" if needed or the topics discussed in the previous turns.
        The response should logically continue the conversation and make sense in the context of the dialogue.
        Focus on delivering an explanation that enhances understanding of the main topic or related concepts.
        The response should be in plain text format, without any dialogue format or author labels.
        '''
    )
    
    E4_FEEDBACK_PROMPT = (
        f'''
        After analyzing the entire dialogue about the main topic "{main_topic}",
        play the role of the explainer in the next turn and provide a the next turn as a feedback response that addresses the explainee’s current understanding.
        The response should reflect on the progression of the explainee’s comprehension throughout the dialogue and focus on their most recent response.
        The output must be a standalone response, only the turn text content of the explainer, without repeating previous dialogue or turns from it.
        '''
    )

    E5_OTHER_MOVE_PROMPT = (
        f'''
        Based on the full dialogue history provided below about the main topic "{main_topic}",
        generate a relevant and coherent response as the explainer.
        This response should appropriately continue the conversation,
        addressing any new points or questions raised in the entire dialogue,
        and adding value to the discussion in a way that is not strictly explanatory or feedback-oriented.
        Ensure the response is engaging, contextually aware, and maintains the flow of the dialogue.
        '''
    )
    BASELINE_PRPOMT = (
        f'''
        Given the dialogue provided between an explainer and an explainee, generate the next turn for the explainer.
        The response should be contextually aware, directly addressing the previous turn by the explainee.
        It should continue the theme of the conversation in an engaging and informative manner, ensuring that the dialogue flows naturally.
        The explainer's response must be relevant to the topic discussed and should aim to further the conversation, providing insights, clarifications, or thoughtful questions as appropriate.
        Ensure the tone is friendly and conducive to a positive and constructive exchange of ideas.
        '''
    )
    return E2_RELATED_TOPIC_PROMPT, E3_EXPLANATION_PROMPT, E4_FEEDBACK_PROMPT, E5_OTHER_MOVE_PROMPT, BASELINE_PRPOMT

In [None]:
# Function to extract relevant text from a response string, clearing away unnecessary content.
def extract_turn_text(response):
    start_index = response.find("turn text:")
    if start_index != -1:
        text_after_start = response[start_index + 10:]
        return text_after_start.split('\n', 1)[0].strip()
    
    start_index1 = response.find("Explainer:")
    if start_index1 != -1:
        text_after_start1 = response[start_index1 + 10:]
        return text_after_start1.split('\n', 1)[0].strip()
    
    return response

In [None]:
#generate a response using Alpaca's prompts
def generate_response_e3_e4_e5(PROMPT,dig,i):
    To_print = ''
    INPUT = ''
    for j in range(i):
        # Find the position of '[SEP]' in the text
        sep_position = dig.turn_text.iloc[0][j]['text'].find('[SEP]')
        if sep_position != -1:
            # Extract text after '[SEP]'
            text_after_sep = dig.turn_text.iloc[0][j]['text'][sep_position + len('[SEP]'):].strip()
        else:
            # If '[SEP]' is not found, use the original text
            text_after_sep = dig.turn_text.iloc[0][j]['text']
        
        INPUT += f"turn {j+1}, author: {dig.turn_text.iloc[0][j]['author']}, turn text: {text_after_sep}.\n" # turn 1, author: Explainee, turn text: sleep_scientist.
        To_print += f"{j+1}, {dig.turn_text.iloc[0][j]['author']}:, {text_after_sep}.\n" # 1, Explainee:, sleep_scientist.
        # To_print += '- ' + text_after_sep + '\n' # - sleep_scientist
        
    print(INPUT)    
    response = evaluate(PROMPT,INPUT)
    return extract_turn_text(response[0]) if response else ''

In [None]:
# Predict the labels for and realize the explanation move for one turn in a dialogue 
def realization_response(dig, main_topic, E2_RELATED_TOPIC_PROMPT, E3_EXPLANATION_PROMPT, E4_FEEDBACK_PROMPT, E5_OTHER_MOVE_PROMPT):   
    
    curr_turn = len(dig.turn_text[0])
    # first turn from explainer in 5 levels:
    if dig.task_id.iloc[0] < 207 and curr_turn < 3:
        response = get_random_question(test_prior_knowledge_questions[1]).replace("[]", main_topic)
        print(response)
        return response
    
    # first turn from explainer in reddit:
    elif dig.task_id.iloc[0] > 207 and curr_turn < 3:
        response = generate_response_e3_e4_e5(E3_EXPLANATION_PROMPT, dig, curr_turn) 
        print(response)
        return response
    
    # Ab Turn 4:
    #connect with CRF
    #def predict_label(dig, is_exp(false = topic), planning)
    #planning = false -> with last text
    #plannung = true -> without last text
    
    crf_exp_act_result = predict_label(dig, args, True, True)
    crf_topic_result = predict_label(dig, args, False, True)
    exp_act_label = CRF_output_exp_act_label_Dic[crf_exp_act_result]
    topic_label = CRF_output_topic_func_label_Dic[crf_topic_result]
    print ('topic label:', topic_label, 'exp_label: ', exp_act_label)

    if exp_act_label == 1: # Test understanding
         response = get_random_question(test_understanding_questions)
    elif exp_act_label == 2: # Test prior knowledge
        if topic_label == 1:
            response = get_random_question(test_prior_knowledge_questions[topic_label]).replace("[]", main_topic)
        else:  
            related_topic = generate_related_topic(E2_RELATED_TOPIC_PROMPT)
            response = get_random_question(test_prior_knowledge_questions[topic_label]).replace("[]", related_topic)
    elif exp_act_label == 3: # Provide Explanation 
        response = generate_response_e3_e4_e5(E3_EXPLANATION_PROMPT, dig, curr_turn)
    elif exp_act_label == 4: # Provide Feedback
        response = generate_response_e3_e4_e5(E4_FEEDBACK_PROMPT, dig, curr_turn)
    else: # e5 Other
        response = generate_response_e3_e4_e5(E5_OTHER_MOVE_PROMPT, dig, curr_turn)
   
    response = extract_turn_text(response)
    print(response)
    return response

In [None]:
# Run the experiment on one dialogue and save the results
def realization_dialog(original_dig):
    results = []
    main_topic = original_dig.topic
    E2_RELATED_TOPIC_PROMPT, E3_EXPLANATION_PROMPT, E4_FEEDBACK_PROMPT, E5_OTHER_MOVE_PROMPT, BASELINE_PRPOMT = generate_prompts(main_topic)
    cos_list_approach = []
    cos_list_basline = []

    
    dig = pd.DataFrame([{
            'task_id': original_dig.task_id, 
            'turn_text': original_dig.turn_text[0],
            'topic': main_topic,
            'topic_func_label': int(original_dig.topic_func_label[0][2:4]) - 1,
            'exp_act_label': int(original_dig.exp_act_label[0][2:4]) - 1,
        }])
    dig = dig.groupby('task_id').agg({'turn_text': lambda rows: list(rows),
                                    'topic': lambda rows: list(rows)[0],
                                    'topic_func_label': lambda rows: list(rows),
                                    'exp_act_label': lambda rows: list(rows)}).reset_index()
    
    for i in range(1, original_dig.num_turns_per_topic):
        if original_dig.turn_text[i]['author'] == 'Explainer':
            response = realization_response(dig, main_topic, E2_RELATED_TOPIC_PROMPT, E3_EXPLANATION_PROMPT, E4_FEEDBACK_PROMPT, E5_OTHER_MOVE_PROMPT)
            gt = original_dig.turn_text[i]['text'].split(' [SEP] ')[-1]
            response_basline = generate_response_e3_e4_e5(BASELINE_PRPOMT, dig, i)
            response_basline_encoded = model_similarity.encode(response_basline)
            response_encoded = model_similarity.encode(response)
            original_encoded = model_similarity.encode(gt)
            
            # Results aren't great with the all-distilroberta-v1 model
            cos_sim_approach = util.cos_sim(response_encoded, original_encoded)
            cos_sim_baseline = util.cos_sim(response_basline_encoded, original_encoded)
            cos_list_approach.append(cos_sim_approach)
            cos_list_basline.append(cos_sim_baseline)
            print('#' * 20)
            print('Reponse:', response)
            print('Response Basline:', response_basline)
            print('Ground truth:', gt)
            print("Similarity approach:", cos_sim_approach)
            print("Similarity baseline:", cos_sim_baseline)
            print('#' * 20)

            results.append([i, gt, response, response_basline, cos_sim_approach, cos_sim_baseline])
            
        dig.turn_text.iloc[0].append(original_dig.turn_text[i])
        dig.topic_func_label.iloc[0].append(int(original_dig.topic_func_label[i][2:4]) - 1)
        dig.exp_act_label.iloc[0].append(int(original_dig.exp_act_label[i][2:4]) - 1)
    return original_dig.task_id, sum(cos_list_basline) / len(cos_list_basline),  sum(cos_list_approach) / len(cos_list_approach), results

In [None]:
# Run the experiment on the validation set and save the results in a data frame
valid_results = []
for i in range(86, 93):
    print('*' * 20, i, '*' * 20)
    valid_results.append(realization_dialog(valid.iloc[i]))
df_valid_results = pd.DataFrame(valid_results, columns=['task_id', 'avg_cos_sim_baseline', 'avg_cos_sim_approach', 'results'])
df_valid_results.to_pickle('./valid_results.pkl')

In [None]:
# Run the experiment on the test set and save the results in a data frame
test_results = [] 
for i in range(93):
    print('*' * 20, i, '*' * 20)
    test_results.append(realization_dialog(test.iloc[i]))
df_test_results = pd.DataFrame(test_results, columns=['task_id', 'avg_cos_sim_baseline', 'avg_cos_sim_approach', 'results'])
df_test_results.to_pickle('./test_results.pkl')