In [1]:
# Import libraries
import os
from glob import glob
import pandas as pd
import openai
from openai.embeddings_utils import get_embedding

# Set OpenAI API key
with open('openai-key.txt', 'r') as f:
    key = f.read().strip()

os.environ['OPENAI_API_KEY'] = key

# Set location of code repo
repoFolder = "BF_Planning_Poker/modules"
#repoFolder = "openai-cookbook"

# Set code file extension type
extType = "js"

In [7]:
openai.api_key_path = 'openai-key.txt'

In [2]:
# Functions to create dataframe for Python repos broken down by individual functions

def get_function_name(code):
    """
    Extract function name from a line beginning with "def "
    """
    assert code.startswith("def ")
    return code[len("def "): code.index("(")]

def get_until_no_space(all_lines, i) -> str:
    """
    Get all lines until a line outside the function definition is found.
    """
    ret = [all_lines[i]]
    for j in range(i + 1, i + 10000):
        if j < len(all_lines):
            if len(all_lines[j]) == 0 or all_lines[j][0] in [" ", "\t", ")"]:
                ret.append(all_lines[j])
            else:
                break
    return "\n".join(ret)

def get_functions(filepath):
    """
    Get all functions in a Python file.
    """
    whole_code = open(filepath).read().replace("\r", "\n")
    all_lines = whole_code.split("\n")
    for i, l in enumerate(all_lines):
        if l.startswith("def "):
            code = get_until_no_space(all_lines, i)
            function_name = get_function_name(code)
            yield {"code": code, "function_name": function_name, "filepath": filepath}

In [3]:
# Create dataframe for code embedding

code_root = repoFolder

code_files = [y for x in os.walk(code_root) for y in glob(os.path.join(x[0], f"*.{extType}"))]
print(f"Total number of {extType} files:", len(code_files))

if extType == "py":
    all_funcs = []
    for code_file in code_files:
        funcs = list(get_functions(code_file))
        for func in funcs:
            all_funcs.append(func)

    print("Total number of functions extracted:", len(all_funcs))

else:
    all_funcs = []
    for code_file in code_files:
        whole_code = open(code_file).read().replace("\r", "\n")
        all_funcs.append({"code": whole_code, "filepath": code_file})

df = pd.DataFrame(all_funcs)
df

# test = pd.DataFrame(all_funcs)
# df = test.head()

# df

Total number of js files: 12


Unnamed: 0,code,filepath
0,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/App.js
1,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/landing/Landing.js
2,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/landing/LandingContr...
3,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/room/ResultsService.js
4,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/room/DeckFactory.js
5,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/room/DeckController.js
6,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/room/Room.js
7,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/room/RoomController.js
8,"(function(window, angular, undefined) {\n ""...",BF_Planning_Poker/modules/siteRequest/SiteRequ...
9,"(function(window, angular, undefined) {\n ""...",BF_Planning_Poker/modules/siteRequest/SiteRequ...


In [9]:
from openai.embeddings_utils import get_embedding
import warnings
warnings.filterwarnings("ignore")

df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='text-embedding-ada-002'))
if extType == "py":
    df['filepath'] = df['filepath'].apply(lambda x: x.replace(code_root, ""))
df.to_csv("code_search_openai-python.csv", index=False)
df.head()

Unnamed: 0,code,filepath,code_embedding
0,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/App.js,"[-0.003846886334940791, 0.017710193991661072, ..."
1,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/landing/Landing.js,"[-0.005139595828950405, 0.018938548862934113, ..."
2,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/landing/LandingContr...,"[-0.01231421623378992, 0.012499891221523285, -..."
3,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/room/ResultsService.js,"[-0.010933968238532543, 0.025452183559536934, ..."
4,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/room/DeckFactory.js,"[-0.00672426400706172, 0.004281288478523493, -..."


In [10]:
# df = pd.read_csv("code_search_openai-python.csv")
# df.head()

In [11]:
from openai.embeddings_utils import cosine_similarity

def search_functions(df, code_query, n=3, pprint=False, n_lines=7):
    embedding = get_embedding(code_query, engine='text-embedding-ada-002')
    df['similarities'] = df.code_embedding.apply(lambda x: cosine_similarity(x, embedding))

    res = df.sort_values('similarities', ascending=False).head(n)
    if pprint:
        for r in res.iterrows():
            print(r[1].filepath+":"+r[1].function_name + "  score=" + str(round(r[1].similarities, 3)))
            print("\n".join(r[1].code.split("\n")[:n_lines]))
            print('-'*70)
    return res

res = search_functions(df, 'Write unit tests for the RoomController function', n=3)

In [12]:
res.head()

Unnamed: 0,code,filepath,code_embedding,similarities
7,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/room/RoomController.js,"[-0.006216228473931551, 0.01109306700527668, -...",0.809942
2,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/landing/LandingContr...,"[-0.01231421623378992, 0.012499891221523285, -...",0.787318
5,"(function(window, angular, undefined) {\n ""us...",BF_Planning_Poker/modules/room/DeckController.js,"[-0.012685777619481087, 0.0073215835727751255,...",0.779079


In [17]:
color_prefix_by_role = {
    "system": "\033[0m",  # gray
    "user": "\033[0m",  # gray
    "assistant": "\033[92m",  # green
}


def print_messages(messages, color_prefix_by_role=color_prefix_by_role) -> None:
    """Prints messages sent to or from GPT."""
    for message in messages:
        role = message["role"]
        color_prefix = color_prefix_by_role[role]
        content = message["content"]
        print(f"{color_prefix}\n[{role}]\n{content}")


def print_message_delta(delta, color_prefix_by_role=color_prefix_by_role) -> None:
    """Prints a chunk of messages streamed back from GPT."""
    if "role" in delta:
        role = delta["role"]
        color_prefix = color_prefix_by_role[role]
        print(f"{color_prefix}\n[{role}]\n", end="")
    elif "content" in delta:
        content = delta["content"]
        print(content, end="")
    else:
        pass


In [18]:
def ask(
    query: str,
    df: pd.DataFrame = df,
    model: str = "gpt-3.5-turbo",
    print_message: bool = True,
) -> str:
    """Answers a query using GPT and a dataframe of relevant texts and embeddings."""
    res = search_functions(df, query, n=3)

    # Loop through each row of the dataframe
    context = ""
    for index, row in res.iterrows():
        # Access the value in the 'column_name' column for the current row
        filePath = row['filepath']
        code = row['code']
        combined = f'''\n\nCode File Location: {filePath}\nCode: {code}'''
        context = context + combined
    
    message = f'''
    Use the below code to respond to this prompt: {query}
    {context}
    '''

    messages = [
        {"role": "system", "content": "You are a world-class developer with an eagle eye for unintended bugs and edge cases. You carefully explain code with great detail and accuracy. You organize your explanations in markdown-formatted, bulleted lists."},
        {"role": "user", "content": message},
    ]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0.5,
        stream=True
    )
    
    execution = ""
    for chunk in response:
        delta = chunk["choices"][0]["delta"]
        if print_message:
            print_message_delta(delta)
        if "content" in delta:
            execution += delta["content"]
    
    #response_message = response["choices"][0]["message"]["content"]
    response_message = ""
    return response_message

In [19]:
response = ask('Write unit tests for the RoomController function.')

[92m
[assistant]
Unit Tests for RoomController function:

1. Test if $scope.changeDeck function updates $scope.room and $cookieStore:
```
describe('RoomController', function() {
  beforeEach(module('ATS.Room'));

  var $controller, $rootScope, $scope, $cookieStore, $q, FirebaseService;
  var mockRoom = {
    $save: function() {
      var deferred = $q.defer();
      deferred.resolve();
      return deferred.promise;
    }
  };

  beforeEach(inject(function(_$controller_, _$rootScope_, _$cookieStore_, _$q_, _FirebaseService_) {
    $controller = _$controller_;
    $rootScope = _$rootScope_;
    $scope = $rootScope.$new();
    $cookieStore = _$cookieStore_;
    $q = _$q_;
    FirebaseService = _FirebaseService_;

    spyOn($cookieStore, 'put');
    spyOn(mockRoom, '$save').and.callThrough();

    $controller('RoomController', {
      $rootScope: $rootScope,
      $scope: $scope,
      $routeParams: {},
      $location: {},
      $cookieStore: $cookieStore,
      FirebaseService: Firebas