## CODE BLOCK 1

### Description
pip installs the library for OpenAI.

In [1]:
!pip install openai

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting openai
  Downloading openai-0.27.7-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m00:01[0m
Installing collected packages: openai
Successfully installed openai-0.27.7


## CODE BLOCK 2

### Description
Sets up the importation of Python code libraries and sets up environment variables:
- OpenAPI key setup
- Code repository location

In [1]:
# Import libraries
import os
from glob import glob
import pandas as pd
import openai
from openai.embeddings_utils import get_embedding

# Set OpenAI API key
with open('openai-key.txt', 'r') as f:
    key = f.read().strip()

os.environ['OPENAI_API_KEY'] = key

# Set location of code repo
repoFolder = "code/"
#repoFolder = "openai-cookbook"

# Set code file extension type
extType = "py"

# Set the OpenAI key path
openai.api_key_path = 'openai-key.txt'

## CODE BLOCK 3

### Description
Defines the functions for creating the dataframe for Python repositories broken down by individual functions


In [4]:
# Functions to create dataframe for Python repos broken down by individual functions

def get_function_name(code):
    """
    Extract function name from a line beginning with "def "
    """
    assert code.startswith("def ")
    return code[len("def "): code.index("(")]

def get_until_no_space(all_lines, i) -> str:
    """
    Get all lines until a line outside the function definition is found.
    """
    ret = [all_lines[i]]
    for j in range(i + 1, i + 10000):
        if j < len(all_lines):
            if len(all_lines[j]) == 0 or all_lines[j][0] in [" ", "\t", ")"]:
                ret.append(all_lines[j])
            else:
                break
    return "\n".join(ret)

def get_functions(filepath):
    """
    Get all functions in a Python file.
    """
    whole_code = open(filepath).read().replace("\r", "\n")
    all_lines = whole_code.split("\n")
    for i, l in enumerate(all_lines):
        if l.startswith("def "):
            code = get_until_no_space(all_lines, i)
            function_name = get_function_name(code)
            yield {"code": code, "function_name": function_name, "filepath": filepath}

## CODE BLOCK 4

### Description
Creates the pandas dataframe to perform code embedding

In [15]:
# Create dataframe for code embedding

code_root = repoFolder

code_files = [y for x in os.walk(code_root) for y in glob(os.path.join(x[0], f"*.{extType}"))]
print(f"Total number of {extType} files:", len(code_files))

if extType == "py":
    all_funcs = []
    for code_file in code_files:
        funcs = list(get_functions(code_file))
        for func in funcs:
            all_funcs.append(func)

    print("Total number of functions extracted:", len(all_funcs))

else:
    all_funcs = []
    for code_file in code_files:
        whole_code = open(code_file).read().replace("\r", "\n")
        all_funcs.append({"code": whole_code, "filepath": code_file})

df = pd.DataFrame(all_funcs)
df

# test = pd.DataFrame(all_funcs)
# df = test.head()

# df

Total number of py files: 2
Total number of functions extracted: 46


Unnamed: 0,code,function_name,filepath
0,"def get_data():\n """"""\n Reads the bank i...",get_data,code/dragonbank.py
1,"def set_data(data):\n """"""\n Writes the b...",set_data,code/dragonbank.py
2,"def get_users_as_list():\n """"""\n This fu...",get_users_as_list,code/dragonbank.py
3,"def list_to_linked_list(arr):\n """"""\n Co...",list_to_linked_list,code/dragonbank.py
4,"def heap_sort(input_list, field):\n """"""\n ...",heap_sort,code/dragonbank.py
5,"def swap(input_list, a, b):\n """"""\n Swap...",swap,code/dragonbank.py
6,"def sift_down(input_list, field, start_index, ...",sift_down,code/dragonbank.py
7,"def text_binary_search(input_list, field, quer...",text_binary_search,code/dragonbank.py
8,"def make_text_searchable(text):\n """"""\n ...",make_text_searchable,code/dragonbank.py
9,"def generate_account_number():\n """"""\n G...",generate_account_number,code/dragonbank.py


## CODE BLOCK 5

### Description
Create CSV file representing the correlation between each function of code and what the model (currently text-embedding-ada-002) believes it knows about the code.  It calculates the embeddings for code snippets in the 'code' column of the DataFrame using the 'text-embedding-ada-002' engine. It then saves the DataFrame to a CSV file and displays the initial rows of the DataFrame.

In [16]:
from openai.embeddings_utils import get_embedding
import warnings
warnings.filterwarnings("ignore")

df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='text-embedding-ada-002'))
if extType == "py":
    df['filepath'] = df['filepath'].apply(lambda x: x.replace(code_root, ""))
df.to_csv("code_search_openai-python.csv", index=False)
df.head()

Unnamed: 0,code,function_name,filepath,code_embedding
0,"def get_data():\n """"""\n Reads the bank i...",get_data,dragonbank.py,"[-0.02220141515135765, 0.014151332899928093, 0..."
1,"def set_data(data):\n """"""\n Writes the b...",set_data,dragonbank.py,"[-0.014050030149519444, 0.02985006384551525, 0..."
2,"def get_users_as_list():\n """"""\n This fu...",get_users_as_list,dragonbank.py,"[-0.009142040275037289, 0.04257689416408539, -..."
3,"def list_to_linked_list(arr):\n """"""\n Co...",list_to_linked_list,dragonbank.py,"[0.01748533919453621, 0.022688109427690506, 0...."
4,"def heap_sort(input_list, field):\n """"""\n ...",heap_sort,dragonbank.py,"[0.008028099313378334, 0.03961138427257538, -0..."


## CODE BLOCK 6

### Description
This code implements a function that performs a similarity search between a given code_query, i.e., an ask, and a DataFrame containing code snippets. It calculates the cosine similarity between the query and each code snippet, ranks them based on similarity, and returns the top 3 matches. 

In [17]:
from openai.embeddings_utils import cosine_similarity

def search_functions(df, code_query, n=3, pprint=False, n_lines=7):
    pprint = True
    embedding = get_embedding(code_query, engine='text-embedding-ada-002')
    df['similarities'] = df.code_embedding.apply(lambda x: cosine_similarity(x, embedding))

    res = df.sort_values('similarities', ascending=False).head(n)
    if pprint:
        for r in res.iterrows():
            print(r[1].filepath+":"+r[1].function_name + "  score=" + str(round(r[1].similarities, 3)))
            print("\n".join(r[1].code.split("\n")[:n_lines]))
            print('-'*70)
    return res

#res = search_functions(df, 'Write unit tests for the perform_transaction function', n=3)



dragonbank.py:perform_transaction  score=0.709
def perform_transaction(sender_number, receiver_number, amount):
    """
    Given two account numbers and a transaction amount, this will move
    the money from the sender account to the recipient account.
    """
    users = get_data()

----------------------------------------------------------------------
.ipynb_checkpoints/dragonbank-checkpoint.py:perform_transaction  score=0.709
def perform_transaction(sender_number, receiver_number, amount):
    """
    Given two account numbers and a transaction amount, this will move
    the money from the sender account to the recipient account.
    """
    users = get_data()

----------------------------------------------------------------------
.ipynb_checkpoints/dragonbank-checkpoint.py:set_data  score=0.665
def set_data(data):
    """
    Writes the bank information into the data file
    """
    f = open(FILE_PATH, "w")
    json_data = json.dumps(data)
    f.write(json_data)
----------------

## CODE BLOCK 7

### Description
Defines functions to print messages sent to or from GPT

In [18]:
color_prefix_by_role = {
    "system": "\033[0m",  # gray
    "user": "\033[0m",  # gray
    "assistant": "\033[92m",  # green
}


def print_messages(messages, color_prefix_by_role=color_prefix_by_role) -> None:
    """Prints messages sent to or from GPT."""
    for message in messages:
        role = message["role"]
        color_prefix = color_prefix_by_role[role]
        content = message["content"]
        print(f"{color_prefix}\n[{role}]\n{content}")


def print_message_delta(delta, color_prefix_by_role=color_prefix_by_role) -> None:
    """Prints a chunk of messages streamed back from GPT."""
    if "role" in delta:
        role = delta["role"]
        color_prefix = color_prefix_by_role[role]
        print(f"{color_prefix}\n[{role}]\n", end="")
    elif "content" in delta:
        content = delta["content"]
        print(content, end="")
    else:
        pass

## CODE BLOCK 8

### Description
performs a code search, generates a message with the query and code context, sends it to the GPT model for response generation, and returns the generated response as an empty string.

In [26]:
def ask(
    query: str,
    df: pd.DataFrame = df,
    model: str = "gpt-3.5-turbo",
    print_message: bool = True,
) -> str:
    """Answers a query using GPT and a dataframe of relevant texts and embeddings."""
    res = search_functions(df, query, n=3)

    # Loop through each row of the dataframe
    context = ""
    for index, row in res.iterrows():
        # Access the value in the 'column_name' column for the current row
        filePath = row['filepath']
        code = row['code']
        combined = f'''\n\nCode File Location: {filePath}\nCode: {code}'''
        context = context + combined
    
    message = f'''
    Use the below code to respond to this prompt: {query}
    {context}
    '''

    messages = [
        {"role": "system", "content": "You are a world-class developer with an eagle eye for unintended bugs and edge cases. You carefully explain code with great detail and accuracy. You organize your explanations in markdown-formatted, bulleted lists."},
        {"role": "user", "content": message},
    ]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0.5,
        stream=True
    )
    
    execution = ""
    for chunk in response:
        delta = chunk["choices"][0]["delta"]
        if print_message:
            print_message_delta(delta)
        if "content" in delta:
            execution += delta["content"]
    
    #response_message = response["choices"][0]["message"]["content"]
    response_message = ""
    return response_message

In [20]:
response = ask('Write unit tests for the perform_transaction function.')

.ipynb_checkpoints/dragonbank-checkpoint.py:perform_transaction  score=0.716
def perform_transaction(sender_number, receiver_number, amount):
    """
    Given two account numbers and a transaction amount, this will move
    the money from the sender account to the recipient account.
    """
    users = get_data()

----------------------------------------------------------------------
dragonbank.py:perform_transaction  score=0.716
def perform_transaction(sender_number, receiver_number, amount):
    """
    Given two account numbers and a transaction amount, this will move
    the money from the sender account to the recipient account.
    """
    users = get_data()

----------------------------------------------------------------------
.ipynb_checkpoints/dragonbank-checkpoint.py:set_data  score=0.669
def set_data(data):
    """
    Writes the bank information into the data file
    """
    f = open(FILE_PATH, "w")
    json_data = json.dumps(data)
    f.write(json_data)
----------------

In [24]:
response = ask('Write unit test code for the perform_transaction function. Consider if the sender and receiver accounts does not exist.  If they do not exist, create them')

dragonbank.py:perform_transaction  score=0.758
def perform_transaction(sender_number, receiver_number, amount):
    """
    Given two account numbers and a transaction amount, this will move
    the money from the sender account to the recipient account.
    """
    users = get_data()

----------------------------------------------------------------------
.ipynb_checkpoints/dragonbank-checkpoint.py:perform_transaction  score=0.758
def perform_transaction(sender_number, receiver_number, amount):
    """
    Given two account numbers and a transaction amount, this will move
    the money from the sender account to the recipient account.
    """
    users = get_data()

----------------------------------------------------------------------
dragonbank.py:delete_account  score=0.671
def delete_account(account_number):
    """
    Deletes an account if exists, otherwise displays an error
    """
    users = get_data()
    if account_number not in users:
        print("Did not found the accoun

In [25]:
response = ask('Write unit test code for the perform_transaction function. Only include tests for edge cases')

.ipynb_checkpoints/dragonbank-checkpoint.py:perform_transaction  score=0.683
def perform_transaction(sender_number, receiver_number, amount):
    """
    Given two account numbers and a transaction amount, this will move
    the money from the sender account to the recipient account.
    """
    users = get_data()

----------------------------------------------------------------------
dragonbank.py:perform_transaction  score=0.683
def perform_transaction(sender_number, receiver_number, amount):
    """
    Given two account numbers and a transaction amount, this will move
    the money from the sender account to the recipient account.
    """
    users = get_data()

----------------------------------------------------------------------
.ipynb_checkpoints/dragonbank-checkpoint.py:display_menu  score=0.642
def display_menu():
    """
    Displays the welcome menu and asks the user for a
    command to perform (which then performs).

    This also acts as the UI and receives the informati

In [None]:
response = ask('Write unit test code for the perform_transaction function. Only include tests for edge cases')