In [1]:
import os
import yaml
import time
import openai
import tiktoken

from openai.error import RateLimitError

In [2]:
openai.api_key = os.environ.get('OPENAI_API_KEY')

In [3]:
def fill_in_description(file_path):
    with open(file_path) as f:
        content = f.read()
    prompt=f"The following YAML has missing descriptions. Please fill in the missing descriptions and return only the YAML:\n\n{content}"
    encoding = tiktoken.encoding_for_model('text-davinci-003')
    prompt_length = len(encoding.encode(prompt))
    theoretical_max_tokens = 4096 - prompt_length
    print(f"file is {prompt_length} tokens long")
    if theoretical_max_tokens < 3000:
        return 'length'
    max_tokens = min(prompt_length*3, theoretical_max_tokens)
    response = openai.Completion.create(model="text-davinci-003",
                                        prompt=prompt,
                                        max_tokens=max_tokens)
    finish_reason = response['choices'][0]['finish_reason']
    if finish_reason == 'length':
        return 'length'
    text = response['choices'][0]['text']
    completion_tokens = response['usage']['completion_tokens']
    print(f"OpenAI's response is {completion_tokens} tokens long")
    with open(file_path, 'w') as f:
        f.write(text)

In [4]:
def walk(d):
    missing_descriptions = 0
    if isinstance(d, dict):
        for k,v in d.items():
            if isinstance(v, list) or isinstance(v, dict):
                missing_descriptions += walk(v)
            else:
                if k == 'description' and not v:
                    missing_descriptions += 1
    else:
        for v in d:
            if isinstance(v, list) or isinstance(v, dict):
                missing_descriptions += walk(v)
    return missing_descriptions
                
filled_in = False
for root, dirs, files in os.walk('../documentation'):
    for file in files:
        if file.endswith('.yml'):
            file_path = os.path.join(root,file)
            with open(file_path, "r", encoding="utf-8") as f:
                doc_yaml = yaml.safe_load(f)
            missing_descriptions = walk(doc_yaml)
            if missing_descriptions >= 1:
                print(f"Working on {file}")
                try:
                    resp = fill_in_description(file_path)
                    if resp == "length":
                        print(f"{file_path}\n  is too long! You will need to fill in manually or through the ChatGPT interface.\n\n")
                    else:
                        print(f"{file} successfully filled in!\n\n")
                except RateLimitError:
                    print("Hit a rate limit error."
                          " Sleeping for 70 seconds then trying one more time.")
                    time.sleep(70)
                    fill_in_description(file_path)
                    print(f"{file} successfully filled in!\n\n")

Working on stg_frakture_everyaction_email_summary_unioned.yml
file is 1904 tokens long
../documentation/frakture_everyaction_email/staging/stg_frakture_everyaction_email_summary_unioned.yml
  is too long! You will need to fill in manually or through the ChatGPT interface.


Working on stg_frakture_global_message_paidmedia_sources_campaigns_messages_bridge.yml
file is 112 tokens long
OpenAI's response is 107 tokens long
stg_frakture_global_message_paidmedia_sources_campaigns_messages_bridge.yml successfully filled in!


Working on stg_frakture_global_message_paidmedia_ad_summary_by_date.yml
file is 872 tokens long
OpenAI's response is 2512 tokens long
stg_frakture_global_message_paidmedia_ad_summary_by_date.yml successfully filled in!


Working on stg_frakture_global_message_paidmedia_impressions_daily_rollup.yml
file is 128 tokens long
OpenAI's response is 152 tokens long
stg_frakture_global_message_paidmedia_impressions_daily_rollup.yml successfully filled in!


Working on stg_fraktur

KeyboardInterrupt: 