In [1]:
#import

import os
from dotenv import load_dotenv

from glob import glob
import pandas as pd

import openai
from openai.embeddings_utils import get_embedding
from openai.embeddings_utils import cosine_similarity

In [2]:
# set OpenAI API key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [37]:
# Extract function name from a line beginning with "def "
def get_function_name(code):
    assert code.startswith("def ")
    return code[len("def "): code.index("(")]

# Get all lines until a line outside the function definition is found.
def get_until_no_space(all_lines, i) -> str:
    ret = [all_lines[i]]
    for j in range(i + 1, i + 10000):
        if j < len(all_lines):
            if len(all_lines[j]) == 0 or all_lines[j][0] in [" ", "\t", ")"]:
                ret.append(all_lines[j])
            else:
                break
    return "\n".join(ret)

# Get all functions in a Python file.
def get_functions(filepath):
    whole_code = open(filepath).read().replace("\r", "\n")
    all_lines = whole_code.split("\n")
    for i, l in enumerate(all_lines):
        if l.startswith("def "):
            code = get_until_no_space(all_lines, i)
            function_name = get_function_name(code)
            yield {"code": code, "function_name": function_name, "filepath": filepath}


In [38]:
# get current directory
root_dir = os.path.expanduser("~/Code/src/github.com/godpeny")

# path to code repository directory
code_root = root_dir + "/openai-research"

print(code_root)

# List Comprehension
# x = (dirpath, dirnames, filenames)
code_files = [y for x in os.walk(code_root) for y in glob(os.path.join(x[0], '*.py'))]
print("Total number of py files:", len(code_files))

if len(code_files) == 0:
    print("Double check that you have downloaded the openai-python repo and set the code_root variable correctly.")

all_funcs = []
for code_file in code_files:
    funcs = list(get_functions(code_file))
    for func in funcs:
        all_funcs.append(func)

print("Total number of functions extracted:", len(all_funcs))

/Users/godpeny/Code/src/github.com/godpeny/openai-research
Total number of py files: 4
Total number of functions extracted: 14


In [18]:
# Example for Python List Comprehension
# text = (("Hi", "Steve!"), ("What's", "up?"))
# result = [word for sentence in text for word in sentence]
# print(result)

['Hi', 'Steve!', "What's", 'up?']


In [40]:
all_funcs[0] # print first function for checking

{'code': 'def index():\n    if request.method == "POST":\n        img_req = request.form["img_req"]\n        response = openai.Image.create(\n            prompt=img_req,\n            n=1,\n            size="1024x1024"\n        )\n\n        image_url = response[\'data\'][0][\'url\']\n        return redirect(url_for("index", result=image_url))\n\n    result = request.args.get("result")\n    return render_template("index.html", result=result)\n\n',
 'function_name': 'index',
 'filepath': '/Users/godpeny/Code/src/github.com/godpeny/openai-research/images_ui/app.py'}

In [41]:
df = pd.DataFrame(all_funcs)
df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='text-embedding-ada-002'))
df['filepath'] = df['filepath'].apply(lambda x: x.replace(code_root, ""))
df.to_csv("data/code_search_openai-python.csv", index=False)
df.head()

Unnamed: 0,code,function_name,filepath,code_embedding
0,"def index():\n if request.method == ""POST"":...",index,/images_ui/app.py,"[-0.010694571770727634, 0.0063886335119605064,..."
1,def show():\n try:\n response = open...,show,/images/main.py,"[-0.001927813864313066, -0.016288653016090393,..."
2,def edit():\n try:\n response = open...,edit,/images/main.py,"[-0.009266435168683529, 0.024797502905130386, ..."
3,def variation():\n try:\n response =...,variation,/images/main.py,"[-0.002660350175574422, 0.009002245031297207, ..."
4,def byte():\n try:\n # Read the imag...,byte,/images/main.py,"[-0.0011167502962052822, 0.005551192909479141,..."


In [42]:
def search_functions(df, code_query, n=3, pprint=True, n_lines=7):
    embedding = get_embedding(code_query, engine='text-embedding-ada-002')
    df['similarities'] = df.code_embedding.apply(lambda x: cosine_similarity(x, embedding))

    res = df.sort_values('similarities', ascending=False).head(n)
    if pprint:
        for r in res.iterrows():
            print(r[1].filepath+":"+r[1].function_name + "  score=" + str(round(r[1].similarities, 3)))
            print("\n".join(r[1].code.split("\n")[:n_lines]))
            print('-'*70)
    return res

In [44]:
res = search_functions(df, 'generate image with ui', n=3)

/images/main.py:show  score=0.751
def show():
    try:
        response = openai.Image.create(
            prompt="white dogs",
            n=1,
            size="256x256"
        )
----------------------------------------------------------------------
/images/main.py:edit  score=0.743
def edit():
    try:
        response = openai.Image.create_edit(
            image=open("img.png", "rb"),
            mask=open("mask.png", "rb"),
            prompt="A sunlit indoor lounge area with a pool containing a chair",
            n=1,
----------------------------------------------------------------------
/images_ui/app.py:index  score=0.736
def index():
    if request.method == "POST":
        img_req = request.form["img_req"]
        response = openai.Image.create(
            prompt=img_req,
            n=1,
            size="1024x1024"
----------------------------------------------------------------------
