# How to use this notebook

## Fill in all missing descriptions

If you'd like to fill in all missing descriptions throughout this project:

1. Replace "YOUR OPENAI API SECRET HERE (do not commit this)" two cells below this with your Open API secret key
2. Run the whole notebook from start to finish.

## Fill in missing descriptions for one file

If you'd like to fill in the missing descriptions on a single file:

1. Replace "YOUR OPENAI API SECRET HERE (do not commit this)" two cells below this with your Open API secret key
2. In the cell below the cell that starts with `def fill_in_description(file_path):`, type `fill_in_description(THE_FILE_PATH_YOU_WANT_TO_UPDATE)`
3. Run all cells but the last cell.

In [None]:
import os
import yaml
import time
import openai
import tiktoken

from openai.error import RateLimitError

In [None]:
openai.api_key = "YOUR OPENAI API SECRET HERE (do not commit this)"

In [None]:
model = input('What model should I use? Options are: gpt-3.5-turbo, gpt-4, gpt-4-32k\n each is more expensive than the last.\n')

In [None]:
model_theoretical_max_tokens = {
    'gpt-3.5-turbo': 4096,
    'gpt-4': 8192,
    'gpt-4-32k': 32768
}

In [None]:
def fill_in_description(file_path):
    with open(file_path) as f:
        content = f.read()
    prompt=f"The following YAML has missing descriptions. Please fill in the missing descriptions and return only the YAML:\n\n{content}"
    encoding = tiktoken.encoding_for_model(model)
    prompt_length = len(encoding.encode(prompt))
    theoretical_max_tokens = model_theoretical_max_tokens[model] 
    practical_max_tokens = model_theoretical_max_tokens[model] - prompt_length
    print(f"file is {prompt_length} tokens long")
    if prompt_length * 2 > practical_max_tokens:
        return 'length'
    max_tokens = prompt_length * 2
    response = openai.ChatCompletion.create(model=model,
                                            messages=[
                                                {"role": "user",
                                                 "content": prompt}],
                                            max_tokens=max_tokens)
    finish_reason = response['choices'][0]['finish_reason']
    if finish_reason == 'length':
        return 'length'
    text = response['choices'][0]['message']['content']
    completion_tokens = response['usage']['completion_tokens']
    print(f"OpenAI's response is {completion_tokens} tokens long")
    with open(file_path, 'w') as f:
        f.write(text)

In [None]:
def get_missing_descriptions(d):
    missing_descriptions = 0
    if isinstance(d, dict):
        for k,v in d.items():
            if isinstance(v, list) or isinstance(v, dict):
                missing_descriptions += get_missing_descriptions(v)
            else:
                if k == 'description' and not v:
                    missing_descriptions += 1
    else:
        for v in d:
            if isinstance(v, list) or isinstance(v, dict):
                missing_descriptions += get_missing_descriptions(v)
    return missing_descriptions

dirs_to_walk = ['../documentation',]
for directory in dirs_to_walk:
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.yml'):
                file_path = os.path.join(root,file)
                with open(file_path, "r", encoding="utf-8") as f:
                    doc_yaml = yaml.safe_load(f)
                missing_descriptions = get_missing_descriptions(doc_yaml)
                if missing_descriptions >= 1:
                    print(f"Working on {file}")
                    try:
                        resp = fill_in_description(file_path)
                        if resp == "length":
                            print(f"{file_path}\n  is too long! "
                                  "You will need to fill in manually or through the ChatGPT interface.\n\n")
                        else:
                            print(f"{file} successfully filled in!\n\n")
                    except RateLimitError:
                        print("Hit a rate limit error."
                              " Sleeping for 70 seconds then trying one more time.")
                        time.sleep(70)
                        fill_in_description(file_path)
                        print(f"{file} successfully filled in!\n\n")