In [3]:
import yaml
import torch
import pandas as pd
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoTokenizer, AutoModelForCausalLM, DataCollatorForLanguageModeling, Trainer, TrainingArguments
from transformers import StoppingCriteria, StoppingCriteriaList

In [70]:
path_to_config = "../configs/gpt2-small-echo.yaml"

In [71]:
# open yaml config as a strema and load into config_dict
with open(path_to_config, "r") as stream:
    try:
        config_dict = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print("Configuration load failed!")
        print(exc)

In [72]:
device = torch.device("cuda")

In [73]:
# # Load a trained model and vocabulary that you have fine-tuned
model = GPT2LMHeadModel.from_pretrained(config_dict["output_model_dir"])
tokenizer = GPT2Tokenizer.from_pretrained(config_dict["output_tokenizer_dir"])
model.to(device)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dro

In [9]:
df = pd.read_csv("../data/instruct-reflections/holdout-set/holdout-instruct-question-answer.csv")

In [11]:
df

Unnamed: 0.1,Unnamed: 0,PromptAndResponse,instruction,question,answer
0,713,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,what do you mean?
1,358,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,what change?
2,538,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,what change?
3,573,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,"well, i think , i´ll leave the addiction progr..."
4,1293,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,weird but healthier
...,...,...,...,...,...
1325,724,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...","At the beginning, I try to get off the dose of..."
1326,1249,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...",Actually just to do it
1327,1299,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...",Absolutely no steps
1328,439,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...",1. Don't buy cigarettes. 2. smoke less


In [74]:
for idx, row in df.iterrows():
    alpha = "Instruction: " + row["instruction"] + "\n"  "Therapist: " + row["question"] + "\n" + "Client: " + row["answer"] + "\n"
    beta = "Therapist: " + row["question"] + "\n" + "Client: " + row["answer"] + "\n"
    charlie = "Instruction: " + row["instruction"] + "\n\n###\n\n"  "Therapist: " + row["question"] + "\n" + "Client: " + row["answer"] + "\n"
    delta = "instruction: " + row["instruction"] + "\n\n" + "Therapist: " + row["question"] + "\n" + "Client: " + row["answer"] + "\n\n###\n\n\n"
    echo = "### Instruction:\n" + row["instruction"] + "\n\n" + "### Conversation:\n" + "Therapist: " + row["question"] + "\n" + "Client: " + row["answer"] + "\n"
    
    df.loc[idx, 'alphaInput'] = alpha
    df.loc[idx, 'betaInput'] = beta
    df.loc[idx, 'charlieInput'] = charlie
    df.loc[idx, 'deltaInput'] = delta
    df.loc[idx, 'echoInput'] = echo

In [54]:
text = "Instruction: The following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\nTherapist: It's great to hear you want to reduce your smoking. What would it look like when you have reduced your smoking addiction?\nClient: Better health condition, less money spent but more stress.\n"

In [55]:
text = "Therapist: Now, what is one thing about your smoking habit that you would like to change?\nClient: I'd like to quit completely. Maybe not quit entirely, but go down to 1 a day or only smoke during social gatherings.\n"

In [56]:
text = "Instruction: The following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\n###\n\nTherapist: What else do you dislike about smoking?\nClient: The money i spend on cigarettes\n"

In [57]:
text = "Instruction: The following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\nTherapist: What else do you dislike about smoking?\nClient: The money i spend on cigarettes\n\n###\n\n"

In [58]:
text = "### Instruction: The following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\n Conversation:\nTherapist: What else do you dislike about smoking?\nClient: The money i spend on cigarettes\n"

In [17]:
'''
stop_criteria.py contains the class definition for StopTokenCriteria. 
'''

# The model.generate() function accepts as an argument a child of the abstract base class StoppingCriteria
# that defines the stopping criteria of the autoregressive loop embedded in model.generate(). The criteria defined
# is evaluated after each token is generated.
# StopTokenCriteria defines a callable object StoppingCriteria that stops (returns true) when a \n character is the 
# latest character generated and the string "Reflection:" has been previously generated.
class StopTokenCriteria(StoppingCriteria):
    
    def __init__(self, stop_token, tokenizer):
        self.stop_token = stop_token
        self.tokenizer = tokenizer
    
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        
        # if input_ids[-1][-1] (the latest token generated) is equal to the stop token
        stopGenerate = self.tokenizer.decode(input_ids[-1][-1]) == self.stop_token
        
        # if stopGenerate AND if the string "Reflection:" has been generated since the beginning of the
        # autoregressive loop
        stopGenerate = stopGenerate and "Therapist:" in self.tokenizer.decode(input_ids[-1])
        
        return stopGenerate

In [28]:
def inference_model(model, tokenizer, config_dict, text, stopping_token="\n"):
    model.eval()
    generation_config = config_dict['refgen']
    config = config_dict
    use_stopping_criteria = True


    # encode the input text into tokens using the tokenizer
    tokenized_text = tokenizer.encode(
        text, return_tensors="pt", padding=True, truncation=True
    )
    encodings_dict = tokenizer(text, truncation=True, max_length=256, padding="max_length")

    input_ids = torch.tensor(encodings_dict['input_ids'])
    input_ids = tokenized_text.to(device)

    # sample model with generate() using no tokens, just let it generate
    with torch.no_grad():
        sample_outputs = model.generate(input_ids,
                                        bos_token_id=tokenizer.bos_token_id,
                                        temperature = 0.8,
                                        # flag to use a sampling technique or greedy
                                        do_sample=generation_config['do_sample'],
                                        # penalize model for duplicating words
                                        repetition_penalty = 1.1,
                                        pad_token_id=tokenizer.pad_token_id,
                                        # of proposed words, only select from top k of them
                                        top_k=generation_config['top_k'],
                                        # max amount of tokens to generate
                                        max_length=256,
                                        stopping_criteria=StoppingCriteriaList([StopTokenCriteria(stopping_token, tokenizer)] if use_stopping_criteria else []),
                                        # of propsed words, select from the words that add up to top_p value
                                        # e.g. top_p=0.26 x(0.15),y(0.1),z(0.05)
                                        # only select from x and y (0.15+0.1+0.05=0.3 which is too high)
                                        top_p=generation_config['top_p'],
                                        # num of independently computed returned sequences for each element in the batch.
                                        num_return_sequences=1
                                       )
        output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
    return output

In [29]:
df.head(1)

Unnamed: 0.1,Unnamed: 0,PromptAndResponse,instruction,question,answer,alphaInput,betaInput,charlieInput,deltaInput,echoInput
0,713,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,what do you mean?,Instruction: The following is an interaction b...,Therapist: What will it look like when you hav...,Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...


In [62]:
path_to_config

'../configs/gpt2-small-alpha.yaml'

In [20]:
import tqdm

In [82]:
text = df['echoInput'][3]
print(repr(text))

"### Instruction:\nThe following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\n### Conversation:\nTherapist: What will it look like when you have made this change in your smoking habit?\nClient: well, i think , i´ll leave the addiction progressively\n"


In [84]:
out = inference_model(model, tokenizer, config_dict, text)
print(repr(out))
out = out.replace(text, '')
print(repr(out))

out = out.replace("Therapist: ", '')
print(repr(out))

out = out.replace("\n", '')
print(repr(out))


"### Instruction:\nThe following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\n### Conversation:\nTherapist: What will it look like when you have made this change in your smoking habit?\nClient: well, i think, i´ll leave the addiction progressively\nTherapist: It sounds like you believe that quitting smoking will lead to a more intense sense of action. You plan on gradually reducing your smoking habit until you're able to quit completely.\n"
"### Instruction:\nThe following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\n### Conversation:\nTherapist: What will it look like when you have made this change i

In [88]:
for idx, row in df.iterrows():
    print(f"{idx}: {len(df)}")
    text = row['echoInput']

    out = inference_model(model, tokenizer, config_dict, text)
    out = out.replace(text, '')
    out = out.replace("Therapist: ", '')
    out = out.replace("\n", '')
    df.loc[idx, 'echoReflection'] = out

0: 1330
1: 1330
2: 1330
3: 1330
4: 1330
5: 1330
6: 1330
7: 1330
8: 1330
9: 1330
10: 1330
11: 1330
12: 1330
13: 1330
14: 1330
15: 1330
16: 1330
17: 1330
18: 1330
19: 1330
20: 1330
21: 1330
22: 1330
23: 1330
24: 1330
25: 1330
26: 1330
27: 1330
28: 1330
29: 1330
30: 1330
31: 1330
32: 1330
33: 1330
34: 1330
35: 1330
36: 1330
37: 1330
38: 1330
39: 1330
40: 1330
41: 1330
42: 1330
43: 1330
44: 1330
45: 1330
46: 1330
47: 1330
48: 1330
49: 1330
50: 1330
51: 1330
52: 1330
53: 1330
54: 1330
55: 1330
56: 1330
57: 1330
58: 1330
59: 1330
60: 1330
61: 1330
62: 1330
63: 1330
64: 1330
65: 1330
66: 1330
67: 1330
68: 1330
69: 1330
70: 1330
71: 1330
72: 1330
73: 1330
74: 1330
75: 1330
76: 1330
77: 1330
78: 1330
79: 1330
80: 1330
81: 1330
82: 1330
83: 1330
84: 1330
85: 1330
86: 1330
87: 1330
88: 1330
89: 1330
90: 1330
91: 1330
92: 1330
93: 1330
94: 1330
95: 1330
96: 1330
97: 1330
98: 1330
99: 1330
100: 1330
101: 1330
102: 1330
103: 1330
104: 1330
105: 1330
106: 1330
107: 1330
108: 1330
109: 1330
110: 1330


In [91]:
df.to_csv("delta-echo-reflections.csv")

In [89]:
df

Unnamed: 0.1,Unnamed: 0,PromptAndResponse,instruction,question,answer,alphaInput,betaInput,charlieInput,deltaInput,echoInput,deltaReflection,echoReflection
0,713,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,what do you mean?,Instruction: The following is an interaction b...,Therapist: What will it look like when you hav...,Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,"When you manage to change your smoking habit, ...","When you manage to change your smoking habit, ..."
1,358,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,what change?,Instruction: The following is an interaction b...,Therapist: What will it look like when you hav...,Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,It sounds like you envision a future where you...,My life will definitely change
2,538,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,what change?,Instruction: The following is an interaction b...,Therapist: What will it look like when you hav...,Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,It sounds like you envision a future where you...,I understand that you'll have made a change in...
3,573,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,"well, i think , i´ll leave the addiction progr...",Instruction: The following is an interaction b...,Therapist: What will it look like when you hav...,Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,instruction: The following is an interaction b...,### Instruction:The following is an interactio...
4,1293,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,weird but healthier,Instruction: The following is an interaction b...,Therapist: What will it look like when you hav...,Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,It sounds like you believe that making a chang...,It seems that you believe making changes to yo...
...,...,...,...,...,...,...,...,...,...,...,...,...
1325,724,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...","At the beginning, I try to get off the dose of...",Instruction: The following is an interaction b...,"Therapist: Finally, what are the steps you nee...",Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,You're considering gradually reducing the numb...,You're considering relocating cigarettes from ...
1326,1249,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...",Actually just to do it,Instruction: The following is an interaction b...,"Therapist: Finally, what are the steps you nee...",Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,You're ready to take action and make the chang...,It sounds like you're ready to take action and...
1327,1299,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...",Absolutely no steps,Instruction: The following is an interaction b...,"Therapist: Finally, what are the steps you nee...",Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,You're not sure about the steps you need to ta...,It sounds like you're not certain about the ne...
1328,439,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...",1. Don't buy cigarettes. 2. smoke less,Instruction: The following is an interaction b...,"Therapist: Finally, what are the steps you nee...",Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,You've decided to stop purchasing cigarettes a...,You've decided to stop purchasing cigarettes a...
