In [20]:
from model import WSConfig, WSModel
from transformers import PreTrainedTokenizerFast, GenerationConfig

hf_model_dir = "tinystories/runs/ts-1-512seqlen/hfmodel"

test_config = WSConfig.from_pretrained(hf_model_dir)
test_model = WSModel.from_pretrained(hf_model_dir, config=test_config)
test_tokenizer = PreTrainedTokenizerFast.from_pretrained(hf_model_dir)

prompt = "A Chinese dragon lived in a cave."
prompt_ids = test_tokenizer.encode(prompt, return_tensors="pt")
print("Prompt IDs:", prompt_ids)


output_ids = test_model.generate(
    prompt_ids, eos_token_id=test_tokenizer.eos_token_id, max_new_tokens=300
)
print("Output IDs:", output_ids)

output_text = test_tokenizer.decode(output_ids[0])
print("Output Text:", output_text)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt IDs: tensor([[ 155,  497,  694,  270, 1696,  515,  210,  155, 1883,   15]])
Output IDs: tensor([[ 155,  497,  694,  270, 1696,  515,  210,  155, 1883,   15,  157, 1883,
          179,  275,  240,  163,  257,  531,  510,  581,   15,  311,  227,   13,
          155,  285,  371,  294,  297,  440,  162,  157, 1883,   15,  202,  179,
          275,  614,  162,  429,  157, 1883,   15,  118,  549,  223,   13,  215,
         1017,   13, 1173,   15,  655,    2,  166,  262,  162,  429,  157, 1883,
          315,  306,  157,  655,  223,   13,  215, 1360,   13,  297,    2,  166,
          492,  592,  234,  157,  577,  794,  162,  429,  157, 1883,  289,  118,
          549,  163,  157,  655,  319,  162,  157, 1883,   15,  189,  278,  157,
         1883,  179,  275,  240,  163,  275,  627,   15,  297,  223,   13,  215,
         1082,   13, 1173,   15,  655,    2,  530,  323,  157,  577, 1883,  800,
          306,  118,  549,  163,  157,  655,  576,  447,  304,   15,  189,  368,
          348,

In [8]:
from transformers import PreTrainedTokenizerFast, GPTNeoXForCausalLM, GPTNeoXConfig

hf_model_dir = "gpt-neox/huggingface_model/"

test_config = GPTNeoXConfig.from_pretrained(hf_model_dir)
test_model = GPTNeoXForCausalLM.from_pretrained(hf_model_dir, config=test_config)
test_tokenizer = PreTrainedTokenizerFast.from_pretrained(hf_model_dir)

prompt = "To break up with your girlfriend, you must"
prompt_ids = test_tokenizer.encode(prompt, return_tensors="pt")
print("Prompt IDs:", prompt_ids)


output_ids = test_model.generate(prompt_ids, max_new_tokens=200)
print("Output IDs:", output_ids)

output_text = test_tokenizer.decode(output_ids[0])
print("Output Text:", output_text)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Prompt IDs: tensor([[ 251, 1782,  560,  346,  313, 5014, 5218,   12,  258, 1513]])
Output IDs: tensor([[ 251, 1782,  560,  346,  313, 5014, 5218,   12,  258, 1513,  318,  208,
         1181, 3435,  254,  991,  258,  391, 2626,   14,  258,  404,  612,  813,
          251,  540,  208, 1181, 3435,  254,  991,  258,  391, 2626,   14,  352,
          154,  269,  258,  391,  208, 1181, 3435,  254,  991,  258,  391, 2626,
           12,  258,  404,  680,  208,  686,  254,  313, 2521,   14,  154,  269,
          258,  391,  208, 1440,   12,  258,  404,  680,  208,  686,  254,  313,
         2521,   14,  154,  269,  258,  391,  208, 1440,   12,  258,  404, 1605,
          284,  208, 1440,  326, 1699, 2396,   14,  154,  269,  258,  391,  208,
         1440,   12,  258,  404, 1605,  284,  208, 1440,  326, 1699, 2396,   14,
          154,  269,  258,  391,  208, 1440,   12,  258,  404, 1605,  284,  208,
         1440,  326, 1699, 2396,   14,  154,  269,  258,  391,  208, 1440,   12,
          258,