In [2]:
import yaml
import torch
import pandas as pd
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AutoTokenizer, AutoModelForCausalLM, DataCollatorForLanguageModeling, Trainer, TrainingArguments
from transformers import StoppingCriteria, StoppingCriteriaList

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
path_to_config = "../configs/gpt2-xl-echo.yaml"

In [4]:
# open yaml config as a strema and load into config_dict
with open(path_to_config, "r") as stream:
    try:
        config_dict = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print("Configuration load failed!")
        print(exc)

In [5]:
device = torch.device("cuda")

In [None]:
# # Load a trained model and vocabulary that you have fine-tuned
model = GPT2LMHeadModel.from_pretrained(config_dict["output_model_dir"])
tokenizer = GPT2Tokenizer.from_pretrained(config_dict["output_tokenizer_dir"])
model.to(device)

In [7]:
df = pd.read_csv("../data/instruct-reflections/holdout-set/holdout-instruct-question-answer.csv")

In [6]:
df = pd.read_csv("holdout-instruct-question-answer-with-echo-format.csv")

In [74]:
for idx, row in df.iterrows():
    alpha = "Instruction: " + row["instruction"] + "\n"  "Therapist: " + row["question"] + "\n" + "Client: " + row["answer"] + "\n"
    beta = "Therapist: " + row["question"] + "\n" + "Client: " + row["answer"] + "\n"
    charlie = "Instruction: " + row["instruction"] + "\n\n###\n\n"  "Therapist: " + row["question"] + "\n" + "Client: " + row["answer"] + "\n"
    delta = "instruction: " + row["instruction"] + "\n\n" + "Therapist: " + row["question"] + "\n" + "Client: " + row["answer"] + "\n\n###\n\n\n"
    echo = "### Instruction:\n" + row["instruction"] + "\n\n" + "### Conversation:\n" + "Therapist: " + row["question"] + "\n" + "Client: " + row["answer"] + "\n"
    
    df.loc[idx, 'alphaInput'] = alpha
    df.loc[idx, 'betaInput'] = beta
    df.loc[idx, 'charlieInput'] = charlie
    df.loc[idx, 'deltaInput'] = delta
    df.loc[idx, 'echoInput'] = echo

In [54]:
text = "Instruction: The following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\nTherapist: It's great to hear you want to reduce your smoking. What would it look like when you have reduced your smoking addiction?\nClient: Better health condition, less money spent but more stress.\n"

In [55]:
text = "Therapist: Now, what is one thing about your smoking habit that you would like to change?\nClient: I'd like to quit completely. Maybe not quit entirely, but go down to 1 a day or only smoke during social gatherings.\n"

In [56]:
text = "Instruction: The following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\n###\n\nTherapist: What else do you dislike about smoking?\nClient: The money i spend on cigarettes\n"

In [57]:
text = "Instruction: The following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\nTherapist: What else do you dislike about smoking?\nClient: The money i spend on cigarettes\n\n###\n\n"

In [58]:
text = "### Instruction: The following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\n Conversation:\nTherapist: What else do you dislike about smoking?\nClient: The money i spend on cigarettes\n"

In [8]:
'''
stop_criteria.py contains the class definition for StopTokenCriteria. 
'''

# The model.generate() function accepts as an argument a child of the abstract base class StoppingCriteria
# that defines the stopping criteria of the autoregressive loop embedded in model.generate(). The criteria defined
# is evaluated after each token is generated.
# StopTokenCriteria defines a callable object StoppingCriteria that stops (returns true) when a \n character is the 
# latest character generated and the string "Reflection:" has been previously generated.
class StopTokenCriteria(StoppingCriteria):
    
    def __init__(self, stop_token, tokenizer):
        self.stop_token = stop_token
        self.tokenizer = tokenizer
    
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        
        # if input_ids[-1][-1] (the latest token generated) is equal to the stop token
        stopGenerate = self.tokenizer.decode(input_ids[-1][-1]) == self.stop_token
        
        # if stopGenerate AND if the string "Reflection:" has been generated since the beginning of the
        # autoregressive loop
        stopGenerate = stopGenerate in self.tokenizer.decode(input_ids[-1])
        
        return stopGenerate

In [9]:
def inference_model(model, tokenizer, config_dict, text, stopping_token="."):
    model.eval()
    generation_config = config_dict['refgen']
    config = config_dict
    use_stopping_criteria = True


    # encode the input text into tokens using the tokenizer
    tokenized_text = tokenizer.encode(
        text, return_tensors="pt", padding=True, truncation=True
    )
    encodings_dict = tokenizer(text, truncation=True, max_length=256, padding="max_length")

    input_ids = torch.tensor(encodings_dict['input_ids'])
    input_ids = tokenized_text.to(device)

    # sample model with generate() using no tokens, just let it generate
    with torch.no_grad():
        sample_outputs = model.generate(input_ids,
                                        bos_token_id=tokenizer.bos_token_id,
                                        pad_token_id=tokenizer.eos_token_id,
                                        temperature = 0.6,
                                        top_k=100,
                                        top_p=1.0,
                                        # flag to use a sampling technique or greedy
                                        do_sample=generation_config['do_sample'],
                                        # penalize model for duplicating words
                                        repetition_penalty = 1.1,
                                        # of proposed words, only select from top k of them
                                        #top_k=generation_config['top_k'],
                                        # max amount of tokens to generate
                                        max_length=input_ids.shape[1] + 50,
                                        #num_beams=5, 
                                        stopping_criteria=StoppingCriteriaList([StopTokenCriteria(stopping_token, tokenizer)] if use_stopping_criteria else []),
                                        # of propsed words, select from the words that add up to top_p value
                                        # e.g. top_p=0.26 x(0.15),y(0.1),z(0.05)
                                        # only select from x and y (0.15+0.1+0.05=0.3 which is too high)
                                        # num of independently computed returned sequences for each element in the batch.
                                        num_return_sequences=1
                                       )
        output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
    return output

In [72]:
df.head(1)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,PromptAndResponse,instruction,question,answer,echoInput,gpt2-xl-reflection1,gpt2-xl-reflection2,gpt2-xl-reflection3
0,0,0,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,"well, i think , i´ll leave the addiction progr...",### Instruction:\nThe following is an interact...,"It sounds like when you've made this change, y...",It sounds like you're planning to begin by cha...,It sounds like you believe that smoking is a c...


In [9]:
path_to_config

'../configs/gpt2-xl-echo.yaml'

In [20]:
import tqdm

In [65]:
df['echoInput'][0] = "### Instruction:\nThe following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\n### Conversation:\nTherapist: What will it look like when you have made this change in your smoking habit?\nClient: well, i think, i´ll leave the addiction progressively\n"
text = df['echoInput'][0] 
print(repr(text))

"### Instruction:\nThe following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\n### Conversation:\nTherapist: What will it look like when you have made this change in your smoking habit?\nClient: well, i think, i´ll leave the addiction progressively\n"


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [61]:
out = inference_model(model, tokenizer, config_dict, text)
print(repr(out))
out = out.replace(text, '')
print(repr(out))

out = out.replace("Therapist: ", '')
print(repr(out))

out = out.replace("\n", '')
print(repr(out))


"### Instruction:\nThe following is an interaction between a therapist and a client. Act as the therapist and give a reflection to the client's response. The reflection must be a statement and not a question. The reflection must be a rephrasing of the client's response.\n\n### Conversation:\nTherapist: What will it look like when you have made this change in your smoking habit?\nClient: weird but healthier\nTherapist: It seems like you believe making a change in your smoking habit will lead to a positive enhancement."
'Therapist: It seems like you believe making a change in your smoking habit will lead to a positive enhancement.'
'It seems like you believe making a change in your smoking habit will lead to a positive enhancement.'
'It seems like you believe making a change in your smoking habit will lead to a positive enhancement.'


In [10]:
for idx, row in df.iterrows():
    print(f"{idx}: {len(df)}")
    text = row['echoInput']

    out = inference_model(model, tokenizer, config_dict, text)
    #print(out)
    out = out.replace(text, '')
    out = out.replace("Therapist: ", '')
    out = out.replace("\n", '')
    if idx == 1: break
    #df.loc[idx, 'gpt2-xl-reflection3'] = out

0: 1321


KeyError: 'echoInput'

In [73]:
df.to_csv("gpt2-xl-reflections.csv")

In [89]:
df

Unnamed: 0.1,Unnamed: 0,PromptAndResponse,instruction,question,answer,alphaInput,betaInput,charlieInput,deltaInput,echoInput,deltaReflection,echoReflection
0,713,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,what do you mean?,Instruction: The following is an interaction b...,Therapist: What will it look like when you hav...,Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,"When you manage to change your smoking habit, ...","When you manage to change your smoking habit, ..."
1,358,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,what change?,Instruction: The following is an interaction b...,Therapist: What will it look like when you hav...,Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,It sounds like you envision a future where you...,My life will definitely change
2,538,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,what change?,Instruction: The following is an interaction b...,Therapist: What will it look like when you hav...,Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,It sounds like you envision a future where you...,I understand that you'll have made a change in...
3,573,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,"well, i think , i´ll leave the addiction progr...",Instruction: The following is an interaction b...,Therapist: What will it look like when you hav...,Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,instruction: The following is an interaction b...,### Instruction:The following is an interactio...
4,1293,BOT: What will it look like when you have made...,The following is an interaction between a ther...,What will it look like when you have made this...,weird but healthier,Instruction: The following is an interaction b...,Therapist: What will it look like when you hav...,Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,It sounds like you believe that making a chang...,It seems that you believe making changes to yo...
...,...,...,...,...,...,...,...,...,...,...,...,...
1325,724,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...","At the beginning, I try to get off the dose of...",Instruction: The following is an interaction b...,"Therapist: Finally, what are the steps you nee...",Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,You're considering gradually reducing the numb...,You're considering relocating cigarettes from ...
1326,1249,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...",Actually just to do it,Instruction: The following is an interaction b...,"Therapist: Finally, what are the steps you nee...",Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,You're ready to take action and make the chang...,It sounds like you're ready to take action and...
1327,1299,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...",Absolutely no steps,Instruction: The following is an interaction b...,"Therapist: Finally, what are the steps you nee...",Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,You're not sure about the steps you need to ta...,It sounds like you're not certain about the ne...
1328,439,"BOT: Finally, what are the steps you need to t...",The following is an interaction between a ther...,"Finally, what are the steps you need to take t...",1. Don't buy cigarettes. 2. smoke less,Instruction: The following is an interaction b...,"Therapist: Finally, what are the steps you nee...",Instruction: The following is an interaction b...,instruction: The following is an interaction b...,### Instruction:\nThe following is an interact...,You've decided to stop purchasing cigarettes a...,You've decided to stop purchasing cigarettes a...


In [None]:
inputs = tokenizer("input sentence", return_tensors = "pt")
loss = model(input_ids = inputs["input_ids"], labels = inputs["input_ids"]).loss
ppl = torch.exp(loss)
print(ppl)