In [1]:
from IPython.display import display, HTML, Image
display(HTML("<style>.container { width:85% !important; }</style>"))
%config IPCompleter.use_jedi=False

In [34]:
import torch
from tqdm.notebook import tqdm
from transformers import AutoTokenizer, AutoModel
from datasets import load_dataset
import pandas as pd
import re
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed, StoppingCriteria, StoppingCriteriaList
from transformers import pipeline

In [6]:
tokenizer = AutoTokenizer.from_pretrained("codeparrot/codeparrot-small")
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained("codeparrot/codeparrot-small")
human_eval = load_dataset("openai_humaneval", split='test').filter(lambda x: len(x['canonical_solution'].split('\n'))<3)

Reusing dataset openai_humaneval (/tf/data/cache/HF/datasets/openai_humaneval/openai_humaneval/1.0.0/2955cebd73602e828fa8c0a424c594e5fab4ec863b316ca98f3d8fdb6a626e75)


  0%|          | 0/1 [00:00<?, ?ba/s]

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [11]:
model = model.to(device)

In [130]:
EOF_STRINGS = ["\nclass", "\ndef", "\n@", "\nprint", "\nif", "\n>"]
EOF_SPLIT_REGEX = "(" + "|".join(EOF_STRINGS) + ")"

In [217]:
def generate_run(tokenizer,
                 model,
                 prompt,
                 temperature=0.6,
                 top_k=50,
                 top_p=1.0,
                 typical_p=1.0,
                 do_sample=False,
                 num_beams=1,
                 num_beam_groups=1,
                 num_return_sequences=1,
                 num_tokens_to_generate=128):
    
    len_prompt = len(tokenizer.tokenize(prompt))
    tokenized_promnpt = tokenizer(prompt, return_tensors='pt', padding=True)
    input_ids = tokenized_promnpt.input_ids.to(device)
    attention_mask = tokenized_promnpt.attention_mask.to(device)
    with torch.no_grad():
        generation = model.generate(inputs=input_ids,
                                    temperature=temperature,
                                    top_k=top_k,
                                    top_p=top_p,
                                    typical_p=typical_p,
                                    do_sample=do_sample, 
                                    num_beams=num_beams,
                                    num_beam_groups=num_beam_groups,
                                    max_length=len_prompt+num_tokens_to_generate, 
                                    num_return_sequences=num_return_sequences,
                                   pad_token_id=tokenizer.eos_token_id)
    generation = tokenizer.batch_decode(generation, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    return {'cleaned_generation':[prompt+re.split(EOF_SPLIT_REGEX, i[len(prompt):])[0] for i in generation],
            'full_generation': generation,
            'temperature':temperature,
            'top_k': top_k,
            'top_p':top_p,
            'typical_p':typical_p,
            'do_sample':do_sample,
            'num_beams':num_beams,
            'num_beam_groups':num_beam_groups,
            'num_return_sequences':num_return_sequences,
            'num_tokens_to_generate':num_tokens_to_generate,
           }

In [295]:
temps = [round(x * 0.1, 1) for x in range(1, 51)]
top_p = [round(x * 0.1, 1) for x in range(1, 10)]
typical_p = [round(x * 0.1, 1) for x in range(1, 10)]

In [307]:
%%time
series_list = []
for p in typical_p:
    for row in tqdm(human_eval):
        generation = generate_run(tokenizer, model, row['prompt'], temperature=1.4, 
                                  top_k=50, top_p=1.0, typical_p=p, do_sample=True, 
                                  num_beams=1, num_beam_groups=1, num_return_sequences=200,
                                  num_tokens_to_generate=128)
        series_list.append(pd.Series({**row, **generation}))

  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

  0%|          | 0/37 [00:00<?, ?it/s]

CPU times: user 18min, sys: 7min 29s, total: 25min 29s
Wall time: 25min 28s


In [308]:
results = pd.DataFrame(series_list)
results.to_json('../runs/codeparrot-small/typical_p_sweep_temp_1.4.jsonl', lines=True, orient='records')

In [215]:
%%time
a = generate_run(tokenizer=tokenizer, model=model, prompt="\ndef add(a,b):", num_beams=100, num_beam_groups=10, num_return_sequences=100)

CPU times: user 566 ms, sys: 64 ms, total: 630 ms
Wall time: 627 ms


In [216]:
a['full_generation']

['\ndef add(a,b): return a+b\ndef sub(a,b): return a-b\ndef mul(a,b): return a*b\ndef',
 '\ndef add(a,b): return a+b\ndef sub(a,b): return a-b\ndef mul(a,b): return a*b\ndef',
 '\ndef add(a,b): return a+b\ndef sub(a,b): return a-b\ndef mul(a,b): return a*b\ndef',
 '\ndef add(a,b): return a+b\ndef sub(a,b): return a-b\ndef mul(a,b): return a*b\ndef',
 '\ndef add(a,b): return a+b\ndef sub(a,b): return a-b\ndef mul(a,b): return a*b\ndef',
 '\ndef add(a,b): return a+b\ndef sub(a,b): return a-b\ndef mul(a,b): return a*b\ndef',
 '\ndef add(a,b): return a+b\ndef sub(a,b): return a-b\ndef mul(a,b): return a*b\ndef',
 '\ndef add(a,b): return a+b\ndef sub(a,b): return a-b\ndef mul(a,b): return a*b\ndef',
 '\ndef add(a,b): return a+b\ndef sub(a,b): return a-b\ndef mul(a,b): return a*b\ndef',
 '\ndef add(a,b): return a+b\ndef sub(a,b): return a-b\ndef mul(a,b): return a*b\ndef',
 '\ndef add(a,b):\n    return a+b\n\ndef mul(a,b):\n    return a*b\n\ndef div(a,b):\n    return',
 '\ndef add(a,b):\n   