In [1]:
import torch
import datetime
import json
from gptj_utils import GPTJ_PrefixTune

In [2]:
gptj = GPTJ_PrefixTune.from_pretrained(
    './model_webnlg/checkpoint-3250/',
    main_checkpoint_override="/export/data/gptj/j6b_ckpt/",
    deepspeed_config='ds_config_stage2_gptj_gen.json',
)

Load soft prompt tuning model...
Start freezing params...
transformer.wte.learned_embedding
exclude transformer.wte.learned_embedding from freezing!
transformer.wte.wte.weight
transformer.h.0.ln_1.weight
transformer.h.0.ln_1.bias
transformer.h.0.attn.attention.k_proj.weight
transformer.h.0.attn.attention.v_proj.weight
transformer.h.0.attn.attention.q_proj.weight
transformer.h.0.attn.attention.out_proj.weight
transformer.h.0.mlp.c_fc.weight
transformer.h.0.mlp.c_fc.bias
transformer.h.0.mlp.c_proj.weight
transformer.h.0.mlp.c_proj.bias
transformer.h.1.ln_1.weight
transformer.h.1.ln_1.bias
transformer.h.1.attn.attention.k_proj.weight
transformer.h.1.attn.attention.v_proj.weight
transformer.h.1.attn.attention.q_proj.weight
transformer.h.1.attn.attention.out_proj.weight
transformer.h.1.mlp.c_fc.weight
transformer.h.1.mlp.c_fc.bias
transformer.h.1.mlp.c_proj.weight
transformer.h.1.mlp.c_proj.bias
transformer.h.2.ln_1.weight
transformer.h.2.ln_1.bias
transformer.h.2.attn.attention.k_proj.weig

[2021-09-28 21:43:49,738] [INFO] [logging.py:60:log_dist] [Rank 0] DeepSpeed info: version=0.4.1, git-hash=unknown, git-branch=unknown
[2021-09-28 21:44:02,848] [INFO] [utils.py:11:_initialize_parameter_parallel_groups] data_parallel_size: 1, parameter_parallel_size: 1
[2021-09-28 21:44:02,925] [INFO] [engine.py:172:__init__] DeepSpeed Flops Profiler Enabled: False
Using /home/fellow/.cache/torch_extensions as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file /home/fellow/.cache/torch_extensions/cpu_adam/build.ninja...
Building extension module cpu_adam...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module cpu_adam...
Time to load cpu_adam op: 1.6930701732635498 seconds
[2021-09-28 21:44:06,588] [INFO] [engine.py:701:_configure_optimizer] Using DeepSpeed Optimizer param name adam as basic optimizer
[2021-09-28 21:44:06,589] [INFO] [engine.py:706:_configure_op

[2021-09-28 21:44:56,725] [INFO] [config.py:904:print]   tensorboard_job_name ......... DeepSpeedJobName
[2021-09-28 21:44:56,725] [INFO] [config.py:904:print]   tensorboard_output_path ...... 
[2021-09-28 21:44:56,725] [INFO] [config.py:904:print]   train_batch_size ............. 16
[2021-09-28 21:44:56,726] [INFO] [config.py:904:print]   train_micro_batch_size_per_gpu  16
[2021-09-28 21:44:56,726] [INFO] [config.py:904:print]   use_quantizer_kernel ......... False
[2021-09-28 21:44:56,726] [INFO] [config.py:904:print]   wall_clock_breakdown ......... False
[2021-09-28 21:44:56,727] [INFO] [config.py:904:print]   world_size ................... 1
[2021-09-28 21:44:56,727] [INFO] [config.py:904:print]   zero_allow_untested_optimizer  False
[2021-09-28 21:44:56,728] [INFO] [config.py:904:print]   zero_config .................. {
    "stage": 2, 
    "contiguous_gradients": true, 
    "reduce_scatter": true, 
    "reduce_bucket_size": 2.000000e+08, 
    "allgather_partitions": true, 
    

In [3]:
val_dataset_file = "/export/WebNLG/webnlg-dataset/webnlg_challenge_2017/val.json"


In [4]:

def get_inputs(dataset_file, model):

    with open(dataset_file) as f:
        lines_dict = json.load(f)

    full_rela_lst = []
    full_src_lst = []
    full_tgt_lst = []

    for i, example in enumerate(lines_dict['entries']):
        sents = example[str(i + 1)]['lexicalisations']
        triples = example[str(i + 1)]['modifiedtripleset']

        rela_lst = []
        temp_triples = ''
        for j, tripleset in enumerate(triples):
            subj, rela, obj = tripleset['subject'], tripleset['property'], tripleset['object']
            rela_lst.append(rela)
            temp_triples += ' | '
            temp_triples += '{} : {} : {}'.format(subj, rela, obj)

        for sent in sents:
            if sent["comment"] == 'good':
                full_tgt_lst.append(sent["lex"])
                full_src_lst.append(temp_triples)
                full_rela_lst.append(rela_lst)

    edited_sents = []
    for src, tgt in zip(full_src_lst, full_tgt_lst):
        sent = ' {} {} '.format(src, model.tokenizer.bos_token)
        edited_sents.append(sent)
        
    return edited_sents

In [5]:
sents = get_inputs(val_dataset_file, gptj)

In [6]:
sents[0]

'  | Andrews_County_Airport : owner : Andrews_County,_Texas <|endoftext|> '

In [7]:
eos_token_id = gptj.tokenizer.encode("<|endoftext|>")[0]
eos_token_id

50256

In [8]:
idx = 0

In [13]:
with torch.no_grad():
    text = sents[idx]
    start = datetime.datetime.now()
    out = gptj.generate(
        1,
        text=text,
        max_length=256,
        #num_beams=2,
        do_sample=True,
        temperature=0.1,
        top_k=5,
        top_p=0.95,
        no_repeat_ngram_size=2, 
        early_stopping=True,
        num_return_sequences=3,
        use_cache=False,
        eos_token_id=eos_token_id,
        return_only_generated=True,
    )
    duration = datetime.datetime.now() - start
    for o in out:
        print(f"# =================== #\n{o[:]}\n\n\n")
    print(f"\n\nDuration = {duration.total_seconds()}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Andrews County Airport is owned by Andrews County, Texas.  

<|endoftext|>




Andrews County Airport is owned by Andrews County, Texas.  

<|endoftext|>




Andrews County Airport is owned by Andrews County, Texas.  

<|endoftext|>





Duration = 1.606154


In [10]:
sents[idx]


'  | Andrews_County_Airport : owner : Andrews_County,_Texas <|endoftext|> '

In [11]:
gptj.s_wte.n_prompts

2