In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os 

WORK_AREA = '/Users/asaf/Workspace/biu/complex-utterance-to-code'
os.chdir(WORK_AREA)

paths = ['./src/', './src/api/v6', './notebooks/src']
for path in paths:
    path = os.path.normcase(path)
    if not any(os.path.normcase(sp) == path for sp in sys.path):
        sys.path.append(path)

In [3]:
from typing import Union, List
import requests
import pandas as pd
import numpy as np
import tqdm
from transformers import GPT2TokenizerFast
import math
import tokenize
from nltk.translate import bleu_score
from datetime import datetime
import time
from dotenv import load_dotenv
import json
import re
import pickle
from llm.prompts import build_prompt, build_examples_prompt, build_example_prompt, build_spec_prompt

In [4]:
load_dotenv()

True

In [5]:
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

## Evaluation data

In [6]:
FILE_NAME = 'eval_complex_utterance_to_code_with_intermediate_152_20231112.csv.gz'
BASE_PATH = '/Users/asaf/Workspace/biu/complex-utterance-to-code/build'

def load_eval_data(file_name: str = FILE_NAME, base_path: str = BASE_PATH, limit: int = 0) -> pd.DataFrame:
    eval_df = pd.read_csv(os.path.join(base_path, file_name))
    if limit:
        eval_df = eval_df[:limit]
    eval_df = eval_df.reset_index()  # make sure indexes pair with number of rows

    return eval_df

## Train data

In [7]:
file_path = 'build/train_complex_utterance_to_code_with_intermediate_40k.csv.gz'
examples_df = pd.read_csv(file_path)
examples_df = examples_df.reset_index()  # make sure indexes pair with number of rows
examples_df.head(3)

Unnamed: 0,index,text,code,lang_rep,code_rep
0,0,see if find my first reminders that I have a m...,"person_reminded = Contact.resolve_from_text(""m...",[ root\n\t[ S\n\t\t[ Command\n\t\t\t[ Action\n...,\t[ Module\n\t\t[ person_reminded = Contact.re...
1,1,create a reminder at mindnight to close the wi...,"date_time = DateTime.resolve_from_text(""mindni...",[ root\n\t[ S\n\t\t[ Command\n\t\t\t[ Action\n...,\t[ Module\n\t\t[ date_time = DateTime.resolve...
2,2,show route to my office from Northern Mariana ...,"origin = Location.resolve_from_text(""from Nort...",[ root\n\t[ S\n\t\t[ Command\n\t\t\t[ Conditio...,\t[ Module\n\t\t[ origin = Location.resolve_fr...


## Building a prompt

In [8]:
prompt = build_example_prompt(input_value=examples_df['text'][0], input_label="Text", output_value=examples_df['code'][0], output_label="Code")
print(json.dumps(prompt, indent=2))


[
  {
    "role": "user",
    "content": "Text:\nsee if find my first reminders that I have a meeting at 3pm and there are and see if I got a reminder at mindnight in 2 days to bring the keys"
  },
  {
    "role": "assistant",
    "content": "Code:\nperson_reminded = Contact.resolve_from_text(\"my\")\ncontent = Content.resolve_from_text(\"I have a meeting at 3pm\")\nreminders = Reminders.find_reminders(person_reminded=person_reminded, content=content)\nreminders = first(reminders)\nResponder.respond(response=reminders)\ntest_reminders = bool(reminders)\nResponder.respond(response=test_reminders)\n\nperson_reminded = Contact.resolve_from_text(\"I\")\ndate_time = DateTime.resolve_from_text(\"mindnight in 2 days\")\ncontent = Content.resolve_from_text(\"bring the keys\")\nreminders = Reminders.find_reminders(person_reminded=person_reminded, date_time=date_time, content=content)\ntest_reminders = bool(reminders)\nResponder.respond(response=test_reminders)"
  }
]


In [11]:
input_data = examples_df.iloc[0]
prompt = build_prompt(strategy="rep2rep", prompt_type='examples', input_data=input_data, examples_df=examples_df, examples_limit=2, seed=42)
prompt_str = "".join([p['content'] for p in prompt if p['role'] == 'user'])

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
base_prompt_tokens_len = len(tokenizer(prompt_str, max_length=51200, truncation=True)["input_ids"])
print(f"Base prompt tokens length: {base_prompt_tokens_len}")

Base prompt tokens length: 759
Base prompt tokens length: 755


In [12]:
examples_df.iloc[0]['lang_rep']

'[ root\n\t[ S\n\t\t[ Command\n\t\t\t[ Action\n\t\t\t\t[ hd\n\t\t\t\t\t[ see ]\n\t\t\t\t]\n\t\t\t\t[ advcl\n\t\t\t\t\t[ mark\n\t\t\t\t\t\t[ if ]\n\t\t\t\t\t]\n\t\t\t\t\t[ Command\n\t\t\t\t\t\t[ Action\n\t\t\t\t\t\t\t[ hd\n\t\t\t\t\t\t\t\t[ find ]\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t\t[ Arg\n\t\t\t\t\t\t\t\t[ obj\n\t\t\t\t\t\t\t\t\t[ nmod:poss\n\t\t\t\t\t\t\t\t\t\t[ my ]\n\t\t\t\t\t\t\t\t\t]\n\t\t\t\t\t\t\t\t\t[ amod\n\t\t\t\t\t\t\t\t\t\t[ first ]\n\t\t\t\t\t\t\t\t\t]\n\t\t\t\t\t\t\t\t\t[ hd\n\t\t\t\t\t\t\t\t\t\t[ reminders ]\n\t\t\t\t\t\t\t\t\t]\n\t\t\t\t\t\t\t\t]\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t\t[ mark\n\t\t\t\t\t\t\t\t[ that ]\n\t\t\t\t\t\t\t]\n\t\t\t\t\t\t\t[ ccomp\n\t\t\t\t\t\t\t\t[ Command\n\t\t\t\t\t\t\t\t\t[ Action\n\t\t\t\t\t\t\t\t\t\t[ Arg\n\t\t\t\t\t\t\t\t\t\t\t[ nsubj\n\t\t\t\t\t\t\t\t\t\t\t\t[ I ]\n\t\t\t\t\t\t\t\t\t\t\t]\n\t\t\t\t\t\t\t\t\t\t]\n\t\t\t\t\t\t\t\t\t\t[ hd\n\t\t\t\t\t\t\t\t\t\t\t[ have ]\n\t\t\t\t\t\t\t\t\t\t]\n\t\t\t\t\t\t\t\t\t\t[ Arg\n\t\t\t\t\t\t\t\t\t\t\t[ obj\n

In [13]:
re.sub("\s+", " ", examples_df.iloc[0]['lang_rep'])

'[ root [ S [ Command [ Action [ hd [ see ] ] [ advcl [ mark [ if ] ] [ Command [ Action [ hd [ find ] ] [ Arg [ obj [ nmod:poss [ my ] ] [ amod [ first ] ] [ hd [ reminders ] ] ] ] [ mark [ that ] ] [ ccomp [ Command [ Action [ Arg [ nsubj [ I ] ] ] [ hd [ have ] ] [ Arg [ obj [ det [ a ] ] [ hd [ meeting ] ] [ nmod [ case [ at ] ] [ hd [ 3 ] ] [ nmod:tmod [ pm ] ] ] ] ] ] ] [ cc [ and ] ] [ Command_conj [ Condition [ If [ Body [ Command [ Action [ expl [ there ] ] [ hd [ are ] ] ] ] [ cc [ and ] ] [ Command_conj [ Action [ hd [ see ] ] ] ] ] [ mark [ if ] ] [ Test [ Command [ Action [ Arg [ nsubj [ I ] ] ] [ hd [ got ] ] [ Arg [ obj [ det [ a ] ] [ hd [ reminder ] ] [ nmod [ case [ at ] ] [ hd [ mindnight ] ] ] ] ] [ Arg [ obl [ case [ in ] ] [ nummod [ 2 ] ] [ hd [ days ] ] ] ] [ mark [ to ] ] [ advcl [ Command [ Action [ hd [ bring ] ] [ Arg [ obj [ det [ the ] ] [ hd [ keys ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] '

In [14]:
input_data = examples_df.iloc[0]
prompt = build_prompt(
    strategy="text2code", 
    prompt_type='apispec', 
    input_data=input_data, 
    examples_df=examples_df, 
    examples_limit=2
)
prompt_str = "".join([p['content'] for p in prompt if p['role'] == 'user'])

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
base_prompt_tokens_len = len(tokenizer(prompt_str, max_length=51200, truncation=True)["input_ids"])
print(f"Base prompt tokens length: {base_prompt_tokens_len}")
print(json.dumps(prompt, indent=2))

Base prompt tokens length: 15513
Base prompt tokens length: 15511
[
  {
    "role": "system",
    "content": "You are a skilled programmer. You will be provided with a text description and your task is to convert it into Python code. You must use the provided API specifications and examples to guide your implementation."
  },
  {
    "role": "user",
    "content": "Below are API specifications for implementing the description in code.\n\n# MAP:\n\nclass Map(Action):\n    \"\"\"\n    The Map class contains all the methods of a virtual assistant agent in the map domain.\n    \"\"\"\n\n    @classmethod\n    def find_on_map(cls, location: Location) -> List[MapEntity]:\n        \"\"\"\n        This class method finds places on the map.\n\n        Parameters\n        ----------\n        location : Location\n            The location to search for\n\n        Returns\n        -------\n        List[MapEntity]\n            A list of places in the form of map entities\n        \"\"\"\n        pass

In [15]:
messages = build_prompt(
    prompt_type='apispec', 
    strategy='text2code', 
    input_data=examples_df.iloc[100],
    examples_df=examples_df,
    examples_limit=11
)
messages

Base prompt tokens length: 15752


[{'role': 'system',
  'content': 'You are a skilled programmer. You will be provided with a text description and your task is to convert it into Python code. You must use the provided API specifications and examples to guide your implementation.'},
 {'role': 'user',
  'content': 'Below are API specifications for implementing the description in code.\n\n# MAP:\n\nclass Map(Action):\n    """\n    The Map class contains all the methods of a virtual assistant agent in the map domain.\n    """\n\n    @classmethod\n    def find_on_map(cls, location: Location) -> List[MapEntity]:\n        """\n        This class method finds places on the map.\n\n        Parameters\n        ----------\n        location : Location\n            The location to search for\n\n        Returns\n        -------\n        List[MapEntity]\n            A list of places in the form of map entities\n        """\n        pass\n\n# SHOPPING:\n\nclass Shopping(Action):\n    """\n    The Shopping class contains all the method

Check the prompt tokens length

In [16]:
print("messages:", messages)
prompt_str = "\n".join([p['content'] for p in messages if p['role'] == 'user'])
print("promtp_str", prompt_str[:100])

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
base_prompt_tokens_len = len(tokenizer(prompt_str, max_length=51200, truncation=True)["input_ids"])
print(f"Base prompt tokens length: {base_prompt_tokens_len}")
max_tokens = 512
print(f"Total tokens length: {base_prompt_tokens_len + max_tokens}")
print(f"Total tokens length < 16384: {base_prompt_tokens_len + max_tokens < 16384}")

messages: [{'role': 'system', 'content': 'You are a skilled programmer. You will be provided with a text description and your task is to convert it into Python code. You must use the provided API specifications and examples to guide your implementation.'}, {'role': 'user', 'content': 'Below are API specifications for implementing the description in code.\n\n# MAP:\n\nclass Map(Action):\n    """\n    The Map class contains all the methods of a virtual assistant agent in the map domain.\n    """\n\n    @classmethod\n    def find_on_map(cls, location: Location) -> List[MapEntity]:\n        """\n        This class method finds places on the map.\n\n        Parameters\n        ----------\n        location : Location\n            The location to search for\n\n        Returns\n        -------\n        List[MapEntity]\n            A list of places in the form of map entities\n        """\n        pass\n\n# SHOPPING:\n\nclass Shopping(Action):\n    """\n    The Shopping class contains all the m

## Utils

In [17]:
class TogetherAPI:
    def __init__(self, api_key: str):
        self.api_key = api_key
        
    def chat_complete(self, messages, model_name, n=100, max_tokens=512, temperature=1.0, top_p=0.7, top_k=50, serialize_id=None, serialize_path=None):
        url = "https://api.together.xyz/v1/chat/completions"

        payload = {
            "model": model_name,
            "messages": messages,
            "max_tokens": max_tokens,
            "stop": ["</s>"],
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k,
            "repetition_penalty": 1,
            "n": n,
        }
        headers = {
            "accept": "application/json",
            "content-type": "application/json",
            "Authorization": f"Bearer {self.api_key}"
        }

        response = requests.post(url, json=payload, headers=headers)

        if serialize_path is not None:
            current_time = datetime.now()
            timestamp = current_time.strftime("%Y-%m-%d_%H-%M-%S")
            file_name = f"{serialize_id}_n{n}_{timestamp}.pkl"
            file_path = os.path.join(serialize_path, file_name)
            os.makedirs(serialize_path, exist_ok=True)
            with open(file_path, 'wb') as file:
                pickle.dump(response, file)
                # print(f"Response serialized to {file_path}")

        try:
            if response.status_code != 200:
                print(f"Failed to execute request: {response.status_code}")
                print(response.text)
                raise Exception(f"Failed to execute request: {response.status_code}")
            json_response = json.loads(response.text)
        except JSONDecodeError as e:
            print(f"Failed to parse response:")
            print(response.text)
            print(e)
            raise e
        except Exception as e:
            print("Failed to execute request")
            print(e)
            raise e

        return json_response    

## Predictions

## Code Llama Instruct (7B)

In [18]:
model_name = 'codellama/CodeLlama-7b-Instruct-hf'
platform = 'together_ai'

### Prompt with API Spec

In [19]:
N = 100
id_labels = ['sample_id'] #['test_id', 'sample_id', 'sample_minor_id']
prompt_type = 'apispec' # 'examples' or 'apispec'
strategy = 'text2code'
model_id = model_name.lower().replace('-', '_').replace('/', '_')
examples_limit = 11

test_df = load_eval_data()

results_file_path = f"./build/results/together_ai/test-{str(test_df.shape[0])}-{model_id}-{strategy}-n{N}-{prompt_type}-ex{examples_limit}.csv.gz"
if os.path.exists(results_file_path):
    raise ValueError(f"Results file already exists: {results_file_path}")

test_results_df = pd.read_csv(results_file_path, compression='gzip') if os.path.exists(results_file_path) else test_df.copy()
test_results_df.set_index(id_labels, inplace=True)
test_results_df.sort_index(inplace=True)

print(f"Results will be saved to {results_file_path}")

Results will be saved to ./build/results/together_ai/test-152-together_ai-codellama_codellama_7b_instruct_hf-text2code-n100-apispec-ex11.csv.gz


In [24]:
platform_disabled = False
force = False
total_records = test_df.shape[0]
max_tokens = 512
wait_time_in_seconds = 20
step_size = 20
serialize_response = True

together_api = TogetherAPI(api_key=os.getenv("TOGETHER_API_KEY"))

# generate predictions
responses = []
print(f"Generating predictions for {total_records} records")
for i, row  in tqdm.notebook.tqdm(test_df.iterrows(), total=total_records, desc="Processing records"):   
    # check to see if we already have a result for this record
    index = tuple(row[id_label] for id_label in id_labels) if len(id_labels) > 1 else row[id_labels[0]]
    index = [index]
    if (not force) and ('output' in test_results_df.loc[index]) and (not any(pd.isnull(test_results_df.loc[index, 'output']))):
        # if we do, then skip this record
        time.sleep(0.1)
    elif not platform_disabled:
        # iterate with step size until we reach a total of n
        outputs = []
        n = N
        batch_steps = list(range(0, n, step_size))
        for j, k in tqdm.notebook.tqdm(enumerate(batch_steps), total=len(batch_steps), leave=False, desc=f"Processing record {i}"):
            # run the model, if we don't have a result
            seed = 42 + i*len(batch_steps) + j
            messages = build_prompt(
                strategy=strategy, 
                prompt_type=prompt_type, 
                input_data=row, 
                examples_df=examples_df, 
                examples_limit=examples_limit,
                seed=seed
            )

            serialize_id = '_'.join([f'{i}{str(j)}' for i, j in list(zip(id_labels, index))])
            response = together_api.chat_complete(
                model_name=model_name, 
                messages=messages,
                max_tokens=max_tokens,
                n=min(step_size, n - k),
                serialize_id=serialize_id,
                serialize_path=f'build/results/{platform}/responses/{model_id}/{strategy}/{prompt_type}/'
            )

            outputs_batch = [x['message']['content'] for x in response['choices']]
            outputs += outputs_batch

            time.sleep(wait_time_in_seconds)
        
        # clean the outputs
        # outputs = [output.replace("Code:\n", "").replace("```python", "").replace("```py", "").replace("```", "").strip() for output in outputs]

        # create the 'n' column
        ns = list(np.arange(n))
        
        # duplicate the records
        records_to_duplicate = test_results_df.loc[index] # Fetch the records
        duplicated_records = pd.concat([records_to_duplicate] * (n - 1), ignore_index=False) # Duplicate the records
        test_results_df = pd.concat([test_results_df, duplicated_records], ignore_index=False) # Append the duplicated records back to the original DataFrame (optional)
        
        # set values for output and n
        test_results_df['output'] = test_results_df['output'].astype(object) if 'output' in test_results_df.columns else None
        test_results_df.loc[index, 'output'] = outputs * len(records_to_duplicate)
        test_results_df['n'] = test_results_df['n'].astype(object) if 'n' in test_results_df.columns else None
        test_results_df.loc[index, 'n'] = ns * len(records_to_duplicate)
        
        test_results_df.to_csv(results_file_path, index=True, compression='gzip')
        
        time.sleep(wait_time_in_seconds)

Generating predictions for 152 records


Processing records:   0%|          | 0/152 [00:00<?, ?it/s]

Processing record 29:   0%|          | 0/5 [00:00<?, ?it/s]

Processing record 30:   0%|          | 0/5 [00:00<?, ?it/s]

Processing record 31:   0%|          | 0/5 [00:00<?, ?it/s]

Processing record 32:   0%|          | 0/5 [00:00<?, ?it/s]

KeyError: 'choices'

In [28]:
file_path = 'build/results/together_ai/responses/together_ai-codellama_codellama_7b_instruct_hf/text2code/apispec/together_ai-codellama_codellama_7b_instruct_hf_sample_id33_n20_2024-03-06_10-30-10.pkl'
with open(file_path, 'rb') as file:
    response = pickle.load(file)
    print(response.status_code)

500


In [54]:
file_name = 'together_ai-codellama_codellama_7b_instruct_hf_sample_id0_n33_2024-03-04_15-08-55.pkl'
file_path = f'build/results/together_ai/responses/{file_name}'
# load pickled response
with open(file_path, 'rb') as file:
    response = pickle.load(file)

In [55]:
# cast to json
response_json = json.loads(response.text)

In [56]:
print(len(response_json['choices']))

33


In [57]:
print(response_json)

{'id': '85f217a97d2d7da0-TLV', 'object': 'chat.completion', 'created': 1709557735, 'model': 'codellama/CodeLlama-7b-Instruct-hf', 'prompt': [], 'choices': [{'finish_reason': 'stop', 'logprobs': None, 'index': 0, 'message': {'role': 'assistant', 'content': '  Here is the Python code to check the availability of Pepsi at Walmart and Walgreens:\n```\nimport requests\n\n# URL for Walmart\nwalmart_url = "https://www'}}, {'finish_reason': 'stop_sequence', 'generated_tokens': 45, 'seed': 17986945682938049720, 'prefill': [], 'logprobs': None, 'index': 1, 'message': {'role': 'assistant', 'content': '  Here is the Python code that corresponds to the given text description:\n```\nimport requests\n\n# Define the URL for the Walmart and Walgreens APIs\nwalmart_url = "https://www'}}, {'finish_reason': 'stop_sequence', 'generated_tokens': 48, 'seed': 4677908806569536082, 'prefill': [], 'logprobs': None, 'index': 2, 'message': {'role': 'assistant', 'content': '  Here is the Python code to check the av

In [62]:
row = examples_df.iloc[0]
messages = build_prompt(
    strategy=strategy, 
    prompt_type=prompt_type, 
    input_data=row, 
    examples_df=examples_df, 
    examples_limit=examples_limit
)

Base prompt tokens length: 15847


In [64]:
messages

[{'role': 'system',
  'content': 'You are a skilled programmer. You will be provided with a text description and your task is to convert it into Python code. You must use the provided API specifications and examples to guide your implementation.'},
 {'role': 'user',
  'content': 'Below are API specifications for implementing the description in code.\n\n# MAP:\n\nclass Map(Action):\n    """\n    The Map class contains all the methods of a virtual assistant agent in the map domain.\n    """\n\n    @classmethod\n    def find_on_map(cls, location: Location) -> List[MapEntity]:\n        """\n        This class method finds places on the map.\n\n        Parameters\n        ----------\n        location : Location\n            The location to search for\n\n        Returns\n        -------\n        List[MapEntity]\n            A list of places in the form of map entities\n        """\n        pass\n\n# SHOPPING:\n\nclass Shopping(Action):\n    """\n    The Shopping class contains all the method

In [63]:
import requests

url = "https://api.together.xyz/v1/chat/completions"

payload = {
    "model": "codellama/CodeLlama-7b-Instruct-hf",
    "stop": ["</s>"],
    "messages": messages,
    "max_tokens": 512,
    "temperature": 1.0,
    "top_p": 0.7,
    "top_k": 50,
    "n": 2,
    
    "repetition_penalty": 1,
}
headers = {
    "accept": "application/json",
    "content-type": "application/json",
    "Authorization": "Bearer 5df8c68a02b5a279fe989564a39345588ad164f6d5e1d30b37fc22fc240c08ce"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

{
  "id": "85fbb6c17b957d9b-TLV",
  "object": "chat.completion",
  "created": 1709658580,
  "model": "codellama/CodeLlama-7b-Instruct-hf",
  "prompt": [],
  "choices": [
    {
      "finish_reason": "eos",
      "logprobs": null,
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "  Here is the Python code that should achieve the task described in the text:\n```\n# Find the first reminder with a meeting at 3pm\nreminder = Reminders.find_reminder(person_reminded=\"me\", content=\"meeting at 3pm\")\n\n# Check if a reminder was found\nif reminder:\n    # See if a reminder was sent at midnight in 2 days\n    reminder_2 = Reminders.find_reminder(person_reminded=\"me\", content=\"bring the keys\", date_time=2 days ago)\n\n    # Check if the second reminder was found\n    if reminder_2:\n        # Print a message indicating that both reminders were found\n        print(\"Both reminders were found!\")\n    else:\n        # Print a message indicating that the s

In [74]:
import requests

url = "https://api.together.xyz/v1/chat/completions"

payload = {
    "model": "codellama/CodeLlama-7b-Instruct-hf",
    "stop": ["</s>"],
    "messages": [
        {
            "role": "system",
            "content": "You are a skilled programmer. You will be provided with a text description and your task is to convert it into a dense variation of an Abstract Syntax Tree (AST) suitable for Python code."
        },
        {
            "role": "user",
            "content": "Below are API specifications for implementing the corresponding dense AST for Python code.\n Contact.resolve_from_text(text: str) -> Contact"
        },
        {
            "role": "assistant",
            "content": "ok"
        },
        {
            "role": "user",
            "content": "Below are examples of text descriptions and their corresponding Python code.Text:\nJerry has a car"
        },
        {
            "role": "assistant",
            "content": "Code:\nContact.resolve_from_text('Jerry')"
        },
        {
            "role": "user",
            "content": "Text:\nMy dad bought a new house"
        },
        {
            "role": "assistant",
            "content": "Code:\nContact.resolve_from_text('My dad')"
        },
        {
            "role": "user",
            "content": "Text:\nBrooke is a good friend"
        },
    ],
    "max_tokens": 512,
    "temperature": 1.0,
    "top_p": 0.7,
    "top_k": 50,
    "n": 2,
    
    "repetition_penalty": 1,
}
headers = {
    "accept": "application/json",
    "content-type": "application/json",
    "Authorization": "Bearer 5df8c68a02b5a279fe989564a39345588ad164f6d5e1d30b37fc22fc240c08ce"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

{
  "id": "85f3037d5a4b7d9e-TLV",
  "object": "chat.completion",
  "created": 1709567340,
  "model": "codellama/CodeLlama-7b-Instruct-hf",
  "prompt": [],
  "choices": [
    {
      "finish_reason": "eos",
      "logprobs": null,
      "index": 0,
      "message": {
        "role": "assistant",
        "content": " Code:\nContact.resolve_from_text('Brooke')"
      }
    },
    {
      "finish_reason": "eos_token",
      "generated_tokens": 16,
      "seed": 5136627748245892340,
      "prefill": [],
      "logprobs": null,
      "index": 1,
      "message": {
        "role": "assistant",
        "content": "  Code:\nContact.resolve_from_text('Brooke')"
      }
    }
  ],
  "usage": {
    "prompt_tokens": 195,
    "completion_tokens": 31,
    "total_tokens": 226
  }
}


In [66]:
import requests

url = "https://api.together.xyz/v1/chat/completions"

payload = {
    "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "stop": ["</s>"],
    "messages": [
        {
            "role": "user",
            "content": "What is the capital of France?"
        }
    ]
}
headers = {
    "accept": "application/json",
    "content-type": "application/json",
    "Authorization": "Bearer 5df8c68a02b5a279fe989564a39345588ad164f6d5e1d30b37fc22fc240c08ce"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

{
  "id": "85f25a34aff47da0-TLV",
  "object": "chat.completion",
  "created": 1709560406,
  "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
  "prompt": [],
  "choices": [
    {
      "finish_reason": "eos",
      "logprobs": null,
      "index": 0,
      "message": {
        "role": "assistant",
        "content": " The capital of France is Paris. It's located in the north-central part of the country and is one of the most populous and visited cities in the world, known for its iconic landmarks like the Eiffel Tower, Louvre Museum, Notre-Dame Cathedral, and more. Paris is also the capital of the Île-de-France region and is a major global center for art, fashion, gastronomy, and culture."
      }
    }
  ],
  "usage": {
    "prompt_tokens": 16,
    "completion_tokens": 99,
    "total_tokens": 115
  }
}


In [75]:
messages

[{'role': 'system',
  'content': 'You are a skilled programmer. You will be provided with a text description and your task is to convert it into Python code.'},
 {'role': 'user',
  'content': 'Below are API specifications for implementing the description in code.\n\n# MAP:\n\nclass Map(Action):\n    """\n    The Map class contains all the methods of a virtual assistant agent in the map domain.\n    """\n\n    @classmethod\n    def find_on_map(cls, location: Location) -> List[MapEntity]:\n        """\n        This class method finds places on the map.\n\n        Parameters\n        ----------\n        location : Location\n            The location to search for\n\n        Returns\n        -------\n        List[MapEntity]\n            A list of places in the form of map entities\n        """\n        pass\n\n# SHOPPING:\n\nclass Shopping(Action):\n    """\n    The Shopping class contains all the methods of a virtual assistant agent in the shopping domain.\n    """\n\n    @classmethod\n   

### Prompt with few-shots

In [39]:
N = 100
id_labels = ['sample_id'] #['test_id', 'sample_id', 'sample_minor_id']
prompt_type = 'examples' # 'examples' or 'apispec'
strategy = 'text2code'
model_id = platform + '-' + model_name.lower().replace('-', '_').replace('/', '_')
examples_limit = 100

test_df = load_eval_data()

results_file_path = f"./build/results/together_ai/test-{str(test_df.shape[0])}-{model_id}-{strategy}-n{N}-{prompt_type}-ex{examples_limit}.csv.gz"
if os.path.exists(results_file_path):
    raise ValueError(f"Results file already exists: {results_file_path}")

test_results_df = pd.read_csv(results_file_path, compression='gzip') if os.path.exists(results_file_path) else test_df.copy()
test_results_df.set_index(id_labels, inplace=True)
test_results_df.sort_index(inplace=True)

print(f"Results will be saved to {results_file_path}")

Results will be saved to ./build/results/together_ai/test-152-together_ai-codellama_codellama_7b_instruct_hf-text2code-n100-examples-ex100.csv.gz


In [57]:
row = test_df.iloc[0]
messages = build_prompt(
    strategy=strategy, 
    prompt_type=prompt_type, 
    input_data=row, 
    examples_df=examples_df, 
    examples_limit=42
)
messages

Base prompt tokens length: 1432


[{'role': 'system',
  'content': 'You are a skilled programmer. You will be provided with a text description and your task is to convert it into Python code. You must use the provided API specifications and examples to guide your implementation.'},
 {'role': 'user',
  'content': 'Below are examples of text descriptions and their corresponding Python code implementations.\nText:\nsee if find my first reminders that I have a meeting at 3pm and there are and see if I got a reminder at mindnight in 2 days to bring the keys'},
 {'role': 'assistant',
  'content': 'Code:\nperson_reminded = Contact.resolve_from_text("my")\ncontent = Content.resolve_from_text("I have a meeting at 3pm")\nreminders = Reminders.find_reminders(person_reminded=person_reminded, content=content)\nreminders = first(reminders)\nResponder.respond(response=reminders)\ntest_reminders = bool(reminders)\nResponder.respond(response=test_reminders)\n\nperson_reminded = Contact.resolve_from_text("I")\ndate_time = DateTime.resol

In [59]:
import requests

url = "https://api.together.xyz/v1/chat/completions"

payload = {
    "model": "codellama/CodeLlama-7b-Instruct-hf",
    "stop": ["</s>"],
    "messages": messages,
    "max_tokens": 512,
    "temperature": 1.0,
    "top_p": 0.7,
    "top_k": 50,
    "n": 2,
    
    "repetition_penalty": 1,
}
headers = {
    "accept": "application/json",
    "content-type": "application/json",
    "Authorization": "Bearer 5df8c68a02b5a279fe989564a39345588ad164f6d5e1d30b37fc22fc240c08ce"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

{
  "id": "85fbacb478f47da0-TLV",
  "object": "chat.completion",
  "created": 1709658166,
  "model": "codellama/CodeLlama-7b-Instruct-hf",
  "prompt": [],
  "choices": [
    {
      "finish_reason": "eos",
      "logprobs": null,
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "  Here is the Python code that corresponds to the given text description:\n```\nimport requests\n\n# Check the availability of Pepsi at Walmart\nwalmart_url = \"https://www.walmart.com/search?q=Pepsi\"\nresponse = requests.get(walmart_url)\nif response.status_code == 200:\n    soup = BeautifulSoup(response.text, \"html.parser\")\n    pepsi_in_walmart = soup.find(\"span\", {\"class\": \"product-name\"}).text\n    print(f\"Pepsi is available at Walmart: {pepsi_in_walmart}\")\n\n# Check the availability of Pepsi at Walgreens\nwalgreens_url = \"https://www.walgreens.com/search?q=Pepsi\"\nresponse = requests.get(walgreens_url)\nif response.status_code == 200:\n    soup = Beautiful

Verifying the sanity of the messages prompt

In [58]:
# random a series of n messages
print(messages[0]['content'])

user_messages_indexes = list(set([i for i,m in enumerate(messages) if m['role'] == 'user']))
indexes = np.random.choice(user_messages_indexes, size=10, replace=False)
for idx in indexes:
    print(messages[idx]['content'])
    print()
    print(messages[idx+1]['content'])
    print('_________\n')

print(messages[-1]['content'])

You are a skilled programmer. You will be provided with a text description and your task is to convert it into Python code. You must use the provided API specifications and examples to guide your implementation.
Text:
before you confirm if search the last three reminders that the a/c is broken and to bring the keys and any is to Julie Baxter, delete all reminders to bring the keys, what will be the weather forecast and check the weather at my work place or my parents neighborhood

Code:
content = Content.resolve_from_text("bring the keys")
reminders = Reminders.find_reminders(content=content)
Reminders.delete_reminder(reminders=reminders)

contents = Content.resolve_many_from_text("the a/c is broken and bring the keys")
all_reminders = []
for content in [contents]:
  reminders = Reminders.find_reminders(content=content)
  reminders = last(reminders, 3)
  all_reminders += reminders
Responder.respond(response=all_reminders)
recipient = Contact.resolve_from_text("Julie Baxter")
all_remind

In [50]:
messages[-1]['content'] = "Based on the previous examples, convert the following text description into Python code:\nText:\ntell me if check the weather report at 2:48 PM and if it will be foggy"

In [54]:
import requests

url = "https://api.together.xyz/v1/chat/completions"

payload = {
    "model": "codellama/CodeLlama-7b-Instruct-hf",
    "stop": ["</s>"],
    "messages": [
        {'role': 'system',
        'content': 'You are a skilled programmer. You will be provided with a text description and your task is to convert it into Python code. You must use the provided examples to guide your implementation.'},
        {'role': 'user',
        'content': 'Below are examples of text descriptions and their corresponding Python code implementations.\nText:\ncreate a reminder at mindnight to close the window and to call my dad'},
        {'role': 'assistant',
        'content': 'Code:\ndate_time = DateTime.resolve_from_text("mindnight")\ncontents = Content.resolve_many_from_text("close the window and call my dad")\nfor content in [contents]:\n  Reminders.create_reminder(date_time=date_time, content=content)'},
        {'role': 'user',
        'content': 'Text:\nremind me to buy a chocolate bar'},
 
    ],
    "max_tokens": 512,
    "temperature": 1.0,
    "top_p": 0.7,
    "top_k": 50,
    "n": 2,
    
    "repetition_penalty": 1,
}
headers = {
    "accept": "application/json",
    "content-type": "application/json",
    "Authorization": "Bearer 5df8c68a02b5a279fe989564a39345588ad164f6d5e1d30b37fc22fc240c08ce"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

{
  "id": "85fb9ffdca117d95-TLV",
  "object": "chat.completion",
  "created": 1709657637,
  "model": "codellama/CodeLlama-7b-Instruct-hf",
  "prompt": [],
  "choices": [
    {
      "finish_reason": "eos",
      "logprobs": null,
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "  Here is the Python code for the text description:\n\ndate_time = DateTime.resolve_from_text(\"now\")\ncontents = Content.resolve_many_from_text(\"buy a chocolate bar\")\nfor content in [contents]:\n  Reminders.create_reminder(date_time=date_time, content=content)"
      }
    },
    {
      "finish_reason": "eos_token",
      "generated_tokens": 39,
      "seed": 9757655886262475865,
      "prefill": [],
      "logprobs": null,
      "index": 1,
      "message": {
        "role": "assistant",
        "content": "  Code:\ncontents = Content.resolve_many_from_text(\"buy a chocolate bar\")\nReminders.create_reminder(contents=contents)"
      }
    }
  ],
  "usage": {
    "prom