In [1]:
import os
import numpy as np
import torch
import torch.nn.functional as F
from transformers import GPT2Tokenizer, TrainingArguments, set_seed
from utils.helper import load_model_recursive
from ppcm_models.pytorch_pretrained_bert.modeling_adapter import GPT2LMHeadModel, GPT2Config

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
class DataArguments():
    def __init__(self):
        self.dataset_path = '/home/bryan/datasets/bookcorpusopen/bookcorpusopen_chunked.arrow'
        self.bookcorpusopen_story_column_name = 'chunk'
        self.preprocessing_num_workers = 8
        self.genre='Romance'
        self.adapter_id=1
        self.match_up_to_n_genres=3
        self.sample_row=None
        
class ModelArguments():
    def __init__(self):
        self.model_size = 'small'
        self.load_checkpoint_adapter = ""
        self.max_seq_len=512
        # self.lr = 2e-4 #, help="Learning rate")

class TrainingArguments(TrainingArguments):
    def __init__(self):
        self.output_dir = "./save"
        self.eval_accumulation_steps = None
        
model_args = ModelArguments()
data_args = DataArguments()
training_args = TrainingArguments()

In [4]:
model_args.model_path = f'ppcm_models/dialoGPT/small/'
config = GPT2Config.from_json_file(os.path.join(model_args.model_path, 'config.json'))

tokenizer = GPT2Tokenizer.from_pretrained(model_args.model_path)

In [5]:
%%time
model_run_names = ['GPT2small_adapterid0_genreThriller_matched3_sampleNone_maxseqlen512_bs8_lr0.001_10.0epoch_wd0.0_ws0'
                  ,'GPT2small_adapterid0_genreMystery_matched3_sampleNone_maxseqlen512_bs8_lr0.001_10.0epoch_wd0.0_ws0'
                  ,'GPT2small_adapterid0_genreHistorical_matched3_sampleNone_maxseqlen512_bs8_lr0.001_10.0epoch_wd0.0_ws0']

for i, model_run_name in enumerate(model_run_names):

    path = './save/'+model_run_name+'/pytorch_model.bin'
    model = load_model_recursive(GPT2LMHeadModel(config), path, model_args, verbose=True)

    # # use this to generate outputs
    # inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
    # outputs = model(inputs['input_ids'], task_id=[0])
    # # or
    # outputs = model.transformer(inputs['input_ids'], task_id=0)

    length = 100
    text = "Hello, my dog is cute"
    generated = tokenizer.encode(text)
    context = torch.tensor([generated])
    
    # Following https://github.com/huggingface/transformers/blob/main/src/transformers/generation_utils.py#L1096-L1100
    torch.manual_seed(14045)
    generation = model.generate(inputs=context,
                               num_beams=3, 
                               length_penalty=3, 
                               early_stopping=1, 
                               num_beam_groups=3, 
                               do_sample=False, 
                               num_return_sequences=2, 
                               bos_token_id=50256,
                               eos_token_id=50256,
                               pad_token_id=50256,
                               output_scores=True,
                               output_attentions=True,
                               output_hidden_states=True,
                               return_dict_in_generate=True,
                               repetition_penalty=1.1,
                               min_length = 0,
                               max_length = length,
                               no_repeat_ngram_size=2,
                               encoder_no_repeat_ngram_size=False,
                               bad_words_ids=[[100]], # tokenizer.decode(100)
                               diversity_penalty=0.2,
                               forced_bos_token_id=50256,
                               forced_eos_token_id=50256,
                               remove_invalid_values=True,
                               exponential_decay_length_penalty=[1.0, 1.2])

    print(tokenizer.decode(generation[0][0]), '\n')

Loading finetuned model from ./save/GPT2small_adapterid0_genreThriller_matched3_sampleNone_maxseqlen512_bs8_lr0.001_10.0epoch_wd0.0_ws0/pytorch_model.bin
Hello, my dog is cute. I'm not sure if he's a good dog, but he is. He's very cute and I think he likes me.""I'm sorry, I don't know. But I can't help it. It's just that I've been so busy. You know, the last time I was here, you were at the hospital. And I didn't want to be there. So I went to the doctor."The man in the black suit was<|endoftext|> 

Loading finetuned model from ./save/GPT2small_adapterid0_genreMystery_matched3_sampleNone_maxseqlen512_bs8_lr0.001_10.0epoch_wd0.0_ws0/pytorch_model.bin
Hello, my dog is cute. I'm not sure if he's a good dog, but he is a very good one. He's very smart and very strong. And he loves me."I'm sorry, I don't know. It's just that I was thinking about it. But I didn't want to get into that. You know, it's not like I want you to know anything. Just that you know what I mean.""You're not going to be