In [1]:
!pip install transformers

[0m

## Load model and tokenizer

First, we load the model from the hub. We select the "float16" revision, which means that all parameters are stored using 16 bits, rather than the default float32 ones (which require twice as much RAM memory). We also set `low_cpu_mem_usage` to `True` (which was introduced in [this PR](https://github.com/huggingface/transformers/pull/13466)), in order to only load the model once into CPU memory.

Next, we move the model to the GPU and load the corresponding tokenizer, which we'll use to prepare text for the model.

In [2]:
!pip install accelerate


[0m

In [3]:
import torch
from transformers import GPTJForCausalLM, AutoTokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16",torch_dtype=torch.float16)
n_max.to(device)
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
model.eval()

GPTJForCausalLM(
  (transformer): GPTJModel(
    (wte): Embedding(50400, 4096)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-27): 28 x GPTJBlock(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPTJAttention(
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (out_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): GPTJMLP(
          (fc_in): Linear(in_features=4096, out_features=16384, bias=True)
          (fc_out): Linear(in_features=16384, out_features=4096, bias=True)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f)

In [5]:
import random
def generate_math_prompt(num1, num2,n_max=100):
    a = random.randint(0, n_max)
    b = random.randint(0, n_max)
    example = f"Q:{a}+{b}=?\nA:{a+b}"
    question = f"Q:{num1}+{num2}=?\nA:"
    prompt = f"{example}\n{question}"
    return prompt,a,b

prompt,a,b=generate_math_prompt(23,32)
print(prompt)
print(a,b)

Q:4+22=?
A:26
Q:23+32=?
A:
4 22


In [None]:
from tqdm import tqdm
confs=[]
n_max=100
for n1 in range(n_max):
  for n2 in range(n_max):
    for _ in range(100):
      confs.append((n1,n2))

with torch.no_grad():
    for n1,n2 in tqdm(confs):
      prompt,a,b =generate_math_prompt(n1,n2)
      input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
      generated_ids = model.generate(input_ids, do_sample=False, temperature=0.0, max_new_tokens=3,pad_token_id=50256)
      generated_text = tokenizer.decode(generated_ids[0])
      try:
        ans=int(generated_text[len(prompt):].split("\n")[0])
      except:
        ans=-1
      with open("gen_results.txt",'a') as f:
        f.write(f"{n1},{n2},{ans},{a},{b}\n")


 47%|████▋     | 469977/1000000 [15:22:21<17:18:09,  8.51it/s]