In [11]:
from dataclasses import dataclass, field
from typing import Optional
import huggingface_hub

import torch
from accelerate import Accelerator
from datasets import load_dataset
from peft import LoraConfig
from tqdm import tqdm
from transformers import Adafactor, AutoTokenizer, HfArgumentParser, pipeline, AutoConfig, GPTNeoXForCausalLM
from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer, set_seed
from trl.core import LengthSampler
from transformers import pipeline, TextGenerationPipeline

In [17]:
model = GPTNeoXForCausalLM.from_pretrained("/scratch1/jhoff/checkpoints/generator/runs/final_merged")
tokenizer = AutoTokenizer.from_pretrained("/scratch1/jhoff/checkpoints/generator/runs/final_merged")
model

GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(50304, 768)
    (layers): ModuleList(
      (0-11): 12 x GPTNeoXLayer(
        (input_layernorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attention): GPTNeoXAttention(
          (rotary_emb): RotaryEmbedding()
          (query_key_value): Linear(in_features=768, out_features=2304, bias=True)
          (dense): Linear(in_features=768, out_features=768, bias=True)
        )
        (mlp): GPTNeoXMLP(
          (dense_h_to_4h): Linear(in_features=768, out_features=3072, bias=True)
          (dense_4h_to_h): Linear(in_features=3072, out_features=768, bias=True)
          (act): GELUActivation()
        )
      )
    )
    (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (embed_out): Linear(in_features=768, out_features=50304, bias=False)
)

In [19]:
pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
pipeline

<transformers.pipelines.text_generation.TextGenerationPipeline at 0x7f9e9fddfc40>

In [40]:
for question in ["What is 1+1?", "What is 2+2?", "What is 3+3?"]:
    result = pipeline(f"""Question: {question}
    Answer:""")
    result = result[0]['generated_text']
    print('-' * 8)
    print(f'{result}')
    print('-')
    print(f"Output length: {len(result)}")
    print(f"Output tokens: {tokenizer(result)['input_ids'].__len__()}")
    print('-' * 8)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


--
Question: What is 1+1?
    Answer: 1+1

A:

--
Output length: 43
Output tokens: 20


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


--
Question: What is 2+2?
    Answer: 2+2

A:

--
Output length: 43
Output tokens: 20
--
Question: What is 3+3?
    Answer: 3+3

A:

--
Output length: 43
Output tokens: 20


'Question: What is 1+1?\nAnswer: 1+1\n\nA:\n\n'

In [23]:
model.config

GPTNeoXConfig {
  "_name_or_path": "/scratch1/jhoff/checkpoints/generator/runs/final_merged",
  "architectures": [
    "GPTNeoXForCausalLM"
  ],
  "bos_token_id": 0,
  "classifier_dropout": 0.1,
  "eos_token_id": 0,
  "hidden_act": "gelu",
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 2048,
  "model_type": "gpt_neox",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "rotary_emb_base": 10000,
  "rotary_pct": 0.25,
  "tie_word_embeddings": false,
  "torch_dtype": "float32",
  "transformers_version": "4.30.0.dev0",
  "use_cache": true,
  "use_parallel_residual": true,
  "vocab_size": 50304
}