In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
os.environ['CUDA_VISIBLE_DEVICES']='0,1'
import torch
from tenacity import retry, stop_after_attempt, wait_random_exponential

In [2]:
device = 'cuda'
checkpoint = "/home/wyk/hf_cache/WizardCoder"
tokenizer = AutoTokenizer.from_pretrained(
    checkpoint,
    use_fast=True,
    trust_remote_code=True,
    token=None,
    cache_dir=None
)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint,
    torch_dtype=torch.float16,
    # load_in_4bit=True,
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    device_map='auto',
    token=None,
    cache_dir=None
)
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
# def generate_text(prompt, temperature, max_new_tokens):
#     inputs = tokenizer(prompt, return_tensors='pt', add_special_tokens=False).to(device)
#     outputs = model.generate(
#         inputs['input_ids'],
#         max_new_tokens=max_new_tokens,
#         temperature=temperature,
#         do_sample=False,
#         top_k=50,
#         top_p=0.95,
#         # num_beams=3,
#         num_return_sequences=3,
#         pad_token_id=tokenizer.eos_token_id
#     ).to('cpu')
#     response = tokenizer.decode(outputs[0], skip_special_tokens=True)

#     return response.split('### Response:')[-1].strip()

In [8]:
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def beam_search_generate_text(prompt, num_beams, max_new_tokens):
    inputs = tokenizer(prompt, return_tensors='pt', add_special_tokens=False).to(device)
    outputs = model.generate(
        inputs['input_ids'],
        max_new_tokens=max_new_tokens,
        # temperature=temperature,
        # do_sample=False,
        # top_k=50,
        # top_p=0.95,
        num_beams=num_beams,
        num_return_sequences=num_beams,
        early_stopping=True,
        pad_token_id=tokenizer.eos_token_id
    ).to('cpu')
    responses = [tokenizer.decode(output, skip_special_tokens=True)
                 .split('### Response:')[-1].strip()
                  for output in outputs]

    return responses

In [10]:
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def sample_generate_text(prompt, temperature, max_new_tokens):
    inputs = tokenizer(prompt, return_tensors='pt', add_special_tokens=False).to(device)
    outputs = model.generate(
        inputs['input_ids'],
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        do_sample=True,
        top_k=0,
        top_p=0.92,
        # num_beams=15,
        # num_return_sequences=10,
        # early_stopping=True,
        pad_token_id=tokenizer.eos_token_id
    ).to('cpu')
    responses = [tokenizer.decode(output, skip_special_tokens=True)
                 .split('### Response:')[-1].strip()
                  for output in outputs]

    return responses

In [11]:
generate_func = sample_generate_text
user_message = "create a story"
prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{user_message.strip()}

### Response:"""
temperature = 0.7
max_new_tokens = 1024
responses = []
for i in range(10):
    response = generate_func(
        prompt=prompt,
        temperature=temperature,
        max_new_tokens=max_new_tokens
    )
    responses.append(response)
    print(response)

["Once upon a time, there was a man named John. He lived in a small town called Dallas. He had a job as a farmer, but he had a secret crush on a girl named Sarah. One day, John went to the girl's house to tell her his love. He asked her if she wanted to be his girlfriend, but she said no. John thought about it and decided to stay home with his girlfriend and his secret crush. \r\n\r\nOne day, John's girlfriend, Sarah, went to the grocery store and asked John for some groceries. John was surprised to see that he didn't have any money with him. He asked Sarah to borrow some money from her parents, but she said no. John thought about it and decided to borrow money from his secret crush, Mike. Mike was a college student and John was afraid that Mike might steal his money. \r\n\r\nSarah returned home and asked John if he could borrow some money from her parents. John asked her to pay back the loan, but she said no. John thought about it and decided to borrow money from his secret crush, Tom

In [12]:
generate_func = beam_search_generate_text
user_message = "create a story"
prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{user_message.strip()}

### Response:"""
num_beams = 10
max_new_tokens = 1024
responses = generate_func(
    prompt=prompt,
    num_beams=num_beams,
    max_new_tokens=max_new_tokens
)
responses

['Once upon a time, there was a beautiful princess who lived in a castle with her husband and two daughters. One day, the princess received a letter from her husband. The letter said that her husband was going to marry another princess. The princess was so excited that she rushed to the castle to meet her husband. As she approached the door of the castle, she saw that the door was locked. The princess asked her husband to unlock the door for her. However, her husband refused to unlock the door. Instead, he said that he was going to marry another princess. The princess was so upset that she ran away from the castle. \n\nThe next day, the princess received a letter from her husband. The letter said that her husband was going to marry another princess. The princess was so excited that she rushed to the castle to meet her husband. As she approached the door of the castle, she saw that the door was locked. The princess asked her husband to unlock the door for her. However, her husband refus

In [22]:
def add_mem_optimization(example):
    src_uid = example['src_uid']
    task_description = example['task_description']
    baseline_code_uid = example['mem_baseline_code_uid']
    baseline_code = example['mem_baseline_code']
    baseline_perf = example['mem_baseline_perf']
    testcases = example['testcases']
    lang = example['lang']
    example_input = testcases[0]['input']
    example_output = testcases[0]['output'][0]
    user_message = f"""As an expert software developer with years of experience, please meticulously inspect the following low performance code sample and give a optimized version of the code, making it solve the same exact problem but achieve smaller memory usage.
To pass the testcases, the generated optimized code should strictly follow the same input output format as the original version of code.
The detailed information are as follows:
1. Description of the problem which the sample code solves: {task_description}
2. Programming language: {lang}
3. Original version code: 
```
{baseline_code}
```
4. Example testcase input: {example_input}
5. Example testcase output: {example_output}

Respond only with a string in the following JSON format:
{{“optimized_version_of_the_code”: code string}}"""
    prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{user_message.strip()}

### Response:"""
    for i in range(k):
        try:
            response = generate_func(
                prompt=prompt,
                temperature=temperature,
                max_new_tokens=max_new_tokens
            )
            # print('response: ' + str(response))

            if response is not None:
                optimization = response
            else:
                optimization = ''
        except Exception as e:
            print('Failed to generate text: ' + e.__str__())
            optimization = ''
        example[f'optimization_{i}'] = optimization
        print(f'optimization_{i}: {str(response)}')

    return example
def add_time_optimization(example):
    src_uid = example['src_uid']
    task_description = example['task_description']
    baseline_code_uid = example['time_baseline_code_uid']
    baseline_code = example['time_baseline_code']
    baseline_perf = example['time_baseline_perf']
    testcases = example['testcases']
    lang = example['lang']
    example_input = testcases[0]['input']
    example_output = testcases[0]['output'][0]
    user_message = f"""As an expert software developer with years of experience, please meticulously inspect the following unoptimized inefficient code and give an optimized version of the code, making it solve the same exact problem while achieving faster execution time.
To pass the testcases, the generated optimized code should strictly follow the same input/output format as the original unoptimized code.
The detailed information are as follows:
1. Description of the problem: {task_description}
2. Programming language: {lang}
3. Unoptimized code: 
```
{baseline_code}
```
4. Example testcase input: {example_input}
5. Example testcase output: {example_output}

Respond only with a string in the following JSON format:
{{"optimized_version_of_the_code": ```code string```}}"""
    prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{user_message.strip()}

### Response:"""
    for i in range(k):
        try:
            response = generate_func(
                prompt=prompt,
                temperature=temperature,
                max_new_tokens=max_new_tokens
            )
            # print('response: ' + str(response))

            if response is not None:
                optimization = response
            else:
                optimization = ''
        except Exception as e:
            print('Failed to generate text: ' + e.__str__())
            optimization = ''
        example[f'optimization_{i}'] = optimization
        print(f'optimization_{i}: {str(response)}')

    return example

generate_func = sample_generate_text
temperature = 0.7
max_input_tokens = tokenizer.model_max_length  # 8192
max_new_tokens = 2048 # The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
k = 10 # 对每一个unoptimizaed code生成10个optimization结果
load_path = '/home/wyk/CodeLLMBenchmark/code_opt/code-opt-inference/data/code_opt_dataset.jsonl'
time_opt_output_path = '/home/wyk/CodeLLMBenchmark/code_opt/code-opt-inference/test_time_opt_wizardcoder.jsonl'
mem_opt_output_path = '/home/wyk/CodeLLMBenchmark/code_opt/code-opt-inference/test_mem_opt_wizardcoder.jsonl'
dataset = load_dataset('json', split='train', data_files=str(load_path))
dataset.cleanup_cache_files()  # for multiple evaluation
dataset = dataset.select(range(10))
mem_dataset = dataset.map(add_mem_optimization)
mem_dataset.to_json(mem_opt_output_path)
time_dataset = dataset.map(add_mem_optimization)
time_dataset.to_json(time_opt_output_path)

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

optimization_0: ['{"optimized_version_of_the_code": "n=int(input())\\nM=[[0 for i in range(1001)] for i in range(1001)]\\nans=n-1\\nT=[]\\nfor i in range(n) :\\n    a,b=map(int,input().split())\\n    M[a][b]=1\\n    T.append([a,b])\\nfor i in range(n) :\\n    r=T[i]\\n    if M[r[0]][r[1]]!=-1  :\\n        M[r[0]][r[1]]=-1\\n        l=[[r[0],r[1]]]\\n        while len(l)>0 :\\n            g=l[0]\\n            del(l[0])\\n            for j in range(n) :\\n                if T[j][0]==g[0] and M[T[j][0]][T[j][1]]!=-1  or  T[j][1]==g[1] and M[T[j][0]][T[j][1]]!=-1 :\\n                    l.append([T[j][0],T[j][1]])\\n                    M[T[j][0]][T[j][1]]=-1\\n                    ans=ans-1\\nprint(ans)"}']
optimization_1: ["The original code creates a 1001x1001 matrix to store whether a snow drift has been created or not. It then reads in the input and marks the snow drift at the given coordinates in the matrix. After that, it iterates through all the snow drifts and checks if there is any

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

  StockPickler.save(self, obj, save_persistent_id)
  StockPickler.save(self, obj, save_persistent_id)


Map:   0%|          | 0/10 [00:00<?, ? examples/s]

optimization_0: ["Here's an optimized version of the code that achieves smaller memory usage:\r\n\r\n```python\r\nn = int(input())\r\nM = [[0] * 1001 for _ in range(1001)]\r\nans = n - 1\r\nT = []\r\nfor i in range(n):\r\n    a, b = map(int, input().split())\r\n    M[a][b] = 1\r\n    T.append((a, b))\r\n\r\nfor i in range(n):\r\n    r = T[i]\r\n    if M[r[0]][r[1]]!= -1:\r\n        M[r[0]][r[1]] = -1\r\n        l = [(r[0], r[1])]\r\n        while l:\r\n            g = l.pop(0)\r\n            for j in range(n):\r\n                if T[j][0] == g[0] and M[T[j][0]][T[j][1]]!= -1 or T[j][1] == g[1] and M[T[j][0]][T[j][1]]!= -1:\r\n                    M[T[j][0]][T[j][1]] = -1\r\n                    l.append((T[j][0], T[j][1]))\r\n                    ans -= 1\r\n\r\nprint(ans)\r\n```\r\n\r\nThis optimized version uses a generator expression to create the list of tuples `T` instead of a list comprehension, which reduces memory usage. It also uses a list instead of a set to store the snowdrift

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

284092

In [18]:
dataset[5]

{'src_uid': 'd90da1e932a6aa546bec4e1bd4b1fbec',
 'lang': 'Python 3',
 'mem_baseline_code_uid': '90bfdbc689a0260441a7142672731f11',
 'mem_baseline_code': 'I=lambda:map(int,input().split())\n\nn,m=I()\n\nN=list(I())\n\np=set()\n\nfor i in range(m):\n\n\tx,y=I();x-=1;y-=1\n\n\tp.add((x,y)if x<y else(y,x))\n\nr=1e9\n\nfor i in range(n):\n\n\tfor j in range(i):\n\n\t\tfor k in range(j):\n\n\t\t\tif(j,i)in p and(k,i)in p and(k,j)in p:\n\n\t\t\t\tr = min(r,N[i]+N[j]+N[k])\n\nprint(-1 if r>1e8 else r)',
 'mem_baseline_perf': 94.3448275862,
 'time_baseline_code_uid': 'ba59a8b2eafbfcac803e158020efcca4',
 'time_baseline_code': 'import itertools\nimport math\n\nimport time\ndef timer(f):\n    def tmp(*args, **kwargs):\n        t = time.time()\n        res = f(*args, **kwargs)\n        print("Время выполнения функции: %f" % (time.time()-t))\n        return res\n\n    return tmp\n\n#n = int(input())\n\nn, m = map(int, input().split(\' \'))\narray = list(map(int, input().split(\' \')))\nmatrix = [[0 