In [1]:
import os
from glob import glob
import pandas as pd
import json
import openai
from dotenv import load_dotenv 
load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")


In [2]:
code_cache = {}
with open("../cache/code_cache.json", "r") as file:
        code_cache = json.load(file)

code_index = code_cache["cached_index"]
len(code_index)

16

In [15]:
from openai.embeddings_utils import get_embedding
import ast
try:
    df = pd.read_csv("../data/code_search.csv")
    df["code_embedding"] = df["code_embedding"].apply(ast.literal_eval)
except FileNotFoundError:
    df = pd.DataFrame({
        'filepath':code_index.keys(),
        'code':code_index.values()
    })

    df['code_embedding'] = df['code'].apply(lambda x: get_embedding(x, engine='text-embedding-ada-002'))
    df.to_csv("../data/code_search.csv", index=False)
    
df.head()


Unnamed: 0,filepath,code,code_embedding
0,app/models/api-log.js,const mongoose = require('mongoose');\nconst S...,"[0.02196192368865013, 0.01452641375362873, 0.0..."
1,app/models/fact.js,const mongoose = require('mongoose');\nconst S...,"[0.01867215894162655, 0.0121297063305974, -0.0..."
2,app/models/message.js,var mongoose = require('mongoose');\nvar Schem...,"[0.002847205614671111, 0.02546437457203865, -0..."
3,app/models/recipient.js,const mongoose = require('mongoose');\nconst S...,"[-0.004480238538235426, 0.015053601004183292, ..."
4,app/models/unsubscribe-date.js,const mongoose = require('mongoose');\nconst S...,"[0.008232221938669682, -0.008185065351426601, ..."


In [17]:
from openai.embeddings_utils import cosine_similarity

def search_functions(df, code_query, n=3, pprint=True, n_lines=7):
    embedding = get_embedding(code_query, engine='text-embedding-ada-002')
    df['similarities'] = df.code_embedding.apply(lambda x: cosine_similarity(x, embedding))

    res = df.sort_values('similarities', ascending=False).head(n)
    # if pprint:
    #     for r in res.iterrows():
    #         print(r[1].filepath+":"+r[1].function_name + "  score=" + str(round(r[1].similarities, 3)))
    #         print("\n".join(r[1].code.split("\n")[:n_lines]))
    #         print('-'*70)
    return res

query = 'what is the base url of this api'
res = search_functions(df, query, n=3)
relevant_file, relevant_code = res.iloc[0].filepath, res.iloc[0].code

prompt = f"""
{query}
{relevant_file}
{relevant_code}
"""
res


Unnamed: 0,filepath,code,code_embedding,similarities
8,app/routes/catbot.routes.js,const express = require('express');\nconst rou...,"[-0.007619943004101515, 0.0015891162911430001,...",0.733316
0,app/models/api-log.js,const mongoose = require('mongoose');\nconst S...,"[0.02196192368865013, 0.01452641375362873, 0.0...",0.727586
15,app/routes/webhook.routes.js,const express = require('express');\nconst rou...,"[0.004377592355012894, -0.001167632290162146, ...",0.726508


In [18]:
completion = openai.Completion.create(
                        model="text-davinci-003",
                        prompt = prompt,
                        temperature = 0,
                        max_tokens = 2048
            ).choices[0].text
completion

'\nThe base URL of this API is: app/routes/catbot.routes.js'

In [32]:
import sys
sys.path.append("../")
from utils import num_tokens

num_tokens(str(code_index))

19117

In [42]:
fact_endpoint = """
const express = require('express');
const router = express.Router();

const strings = require.main.require('./app/config/strings.js');
const { isAuthenticated, logApiRequest } = require('../middleware');

const Fact = require.main.require('./app/models/fact');
const User = require.main.require('./app/models/user');

router.get('/', async(req, res) => {
    try {
        const facts = await Fact.find().limit(5);
        return res.status(200).json(facts);
    } catch (err) {
        return res.status(err.status || 400).json(err);
    }
})

// Get submitted facts
router.get('/me', async(req, res) => {

    const animalType = req.query.animal_type ? req.query.animal_type.split(',') : ['cat'];

    try {
        const data = await Fact.find({
                user: req.user._id,
                type: { $in: animalType }
            })
            .select('text type')
            .populate('user', 'name')
            .limit(10)

        return res.status(200).json(data);
    } catch (err) {
        return res.status(400).json(err);
    }
});

// Get a random fact
router.get('/random', logApiRequest, async(req, res) => {

    const animalType = req.query.animal_type ? req.query.animal_type.split(',') : ['cat'];
    const amount = req.query.amount;

    if (amount > 500) {
        return res.status(405).json({ message: 'Limited to 500 facts at a time' });
    }

    try {
        const facts = await Fact.getFact({ amount, animalType });
        return res.status(200).json(facts);
    } catch (err) {
        return res.status(err.status).json(err);
    }
});

// Get fact by ID
router.get('/:factID', logApiRequest, async(req, res) => {
    try {
        const fact = await Fact.findById(req.params.factID).populate('user', 'name photo');

        if (!fact) {
            return res.status(404).json({ message: 'Fact not found' });
        }

        return res.status(200).json(fact);
    } catch (err) {
        return res.status(400).json(err);
    }
});

// Submit a fact
router.post('/', isAuthenticated, async(req, res) => {

    if (!req.body.factText) {
        return res.status(400).json({ message: "Missing body paramter: factText" });
    }
    if (!req.body.animalType) {
        return res.status(400).json({ message: "Missing body parameter: animalType" });
    }

    const io = req.app.get('socketio');

    let text = req.body.factText;

    text = text.charAt(0).toUpperCase() + text.slice(1); // Capitalize
    text += text[text.length - 1] == "." ? "" : "."; // Add period to end

    const fact = new Fact({
        user: req.user._id,
        text,
        type: req.body.animalType
    });

    try {
        const savedFact = await fact.save();

        const populatedFact = await User.populate(savedFact, { path: 'user', select: 'name' });

        io.emit('fact', populatedFact);

        return res.status(200).json(populatedFact);
    } catch (err) {
        if (err.code === 11000) {
            err.message = strings.fact.exists;
            return res.status(409).json(err);
        }
        return res.status(400).json(err);
    }
});

module.exports = router;
"""
res = openai.Completion.create(
                        model="text-davinci-003",
                        prompt = "describe the fact endpoints",
                        temperature = 0,
                        max_tokens = 2048,
                        prompt_context=fact_endpoint
            ).choices[0].text

res

InvalidRequestError: Unrecognized request argument supplied: prompt_context