In [31]:

import os
from glob import glob
import pandas as pd

def get_function_name(code):
    """
    Extract function name from a line beginning with "def "
    """
    assert code.startswith("def ")
    return code[len("def "): code.index("(")]

def get_until_no_space(all_lines, i) -> str:
    """
    Get all lines until a line outside the function definition is found.
    """
    ret = [all_lines[i]]
    for j in range(i + 1, i + 10000):
        if j < len(all_lines):
            if len(all_lines[j]) == 0 or all_lines[j][0] in [" ", "\t", ")"]:
                ret.append(all_lines[j])
            else:
                break
    return "\n".join(ret)

def get_functions(filepath):
    """
    Get all functions in a Python file.
    """
    whole_code = open(filepath).read().replace("\r", "\n")
    all_lines = whole_code.split("\n")
    for i, l in enumerate(all_lines):
        if l.startswith("def "):
            code = get_until_no_space(all_lines, i)
            function_name = get_function_name(code)
            yield {"code": code, "function_name": function_name, "filepath": filepath}


# get user root directory
root_dir = "./"

# path to code repository directory
code_root = root_dir + "/openai-python"
code_files = [y for x in os.walk(code_root) for y in glob(os.path.join(x[0], '*.py'))]
print("Total number of py files:", len(code_files))
all_funcs = []
for code_file in code_files:
    funcs = list(get_functions(code_file))
    for func in funcs:
        all_funcs.append(func)

print("Total number of functions extracted:", len(all_funcs))

Total number of py files: 1
Total number of functions extracted: 2


In [22]:
import os
import openai
# openai.organization = "org-7bwTkQrRbLeS0BPXt3F58gqa"
openai.api_key = "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"
openai.Engine.list()

<OpenAIObject list at 0x29e63f420> JSON: {
  "data": [
    {
      "created": null,
      "id": "babbage",
      "object": "engine",
      "owner": "openai",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "ada",
      "object": "engine",
      "owner": "openai",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "davinci",
      "object": "engine",
      "owner": "openai",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "babbage-code-search-code",
      "object": "engine",
      "owner": "openai-dev",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "text-similarity-babbage-001",
      "object": "engine",
      "owner": "openai-dev",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "text-davinci-001",
      "object": "engine",
      "owner": "openai",
      "

In [87]:
# from openai.embeddings_utils import get_embedding
# import os
# import openai
# # openai.organization = "Personal" 
# openai.api_key = "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"
# engines = openai.Engine.list()
# print(engines)
df = pd.read_csv('functions2.csv',lineterminator='\n')
df['code_embedding'] = df['code'].apply(lambda x: openai.embeddings_utils.get_embedding(x, engine='code-search-babbage-code-001'))
# df['filepath'] = df['filepath'].apply(lambda x: x.replace(code_root, ""))
# df.to_csv("code_search_openai-python.csv", index=False)
df.head()
# len(df)

Unnamed: 0,code,function_name,code_embedding
0,modifier onlyOwner() {\r require(_o...,onlyOwner,"[-0.007844598032534122, -0.018933504819869995,..."
1,modifier lockTheSwap() {\r inSwapAn...,lockTheSwap,"[-0.01262049563229084, -0.038563743233680725, ..."
2,"modifier lockPossible(address target, uint...",lockPossible,"[-0.02415325865149498, -0.009220209904015064, ..."
3,modifier transactionPossible(address sende...,transactionPossible,"[-0.019816352054476738, -0.01993078738451004, ..."


In [78]:
df['code']

0        modifier onlyPauser() {\r        require(i...
1        modifier whenNotPaused() {\r        requir...
2        modifier whenPaused() {\r        require(_...
Name: code, dtype: object

In [90]:
from openai.embeddings_utils import cosine_similarity

def search_functions(df, code_query, n=3, pprint=True, n_lines=7):
    embedding = get_embedding(code_query, engine='code-search-babbage-text-001')
    df['similarities'] = df.code_embedding.apply(lambda x: cosine_similarity(x, embedding))

    res = df.sort_values('similarities', ascending=False).head(n)
    if pprint:
        for r in res.iterrows():
            print(":"+r[1].function_name + "  score=" + str(round(r[1].similarities, 3)))
            print("\n".join(r[1].code.split("\n")[:n_lines]))
            print('-'*70)
    return res
# 'Check if the message sender is the owner'
res = search_functions(df, 'pause', n=3)

:transactionPossible  score=0.635
    }   _;quire(locked >= value, 'Nil: the transfer amount exceeds unlocked amount');
----------------------------------------------------------------------
:lockTheSwap  score=0.632
    }   inSwapAndLiquify = false;
----------------------------------------------------------------------
:lockPossible  score=0.623
    }   _;quire(balanceOf(target) >= value, 'Nil: the lock amount exceeds balance');
----------------------------------------------------------------------
