In [None]:
import tiktoken
import pinecone


class CodeAGI(object):
    
    def load_from_github(self, clone_url: str, file_type_list: list):
        raise NotImplementedError
    
    def load_from_directory(self, directory: str, file_type_list: list):
        raise NotImplementedError
        
    def start(self, prompt):
        pass
    
    def build_task_list(self):
        pass
    

In [None]:
import openai
import json
import tiktoken

from envs import env


class CodeBuilder:
    def __init__(self, api_key):
        openai.api_key = api_key

    def query_gpt3(self, prompt, max_tokens=255):
        
        response = openai.Completion.create(
            engine="text-davinci-003",
            prompt=prompt,
            max_tokens=max_tokens,
            n=1,
            stop=None,
            temperature=0.5,
        )

        return json.loads(response.choices[0].text.strip())

    def build_plan_and_task_list(self, build_prompt):
        prompt = f"Given the prompt to build something with code: '{build_prompt}', create a plan and a task list using GPT-3 in JSON format. Use the following JSON template: {{\"plan_list\": [{{\"title\": \"Research Django\", \"action_type\": \"google-search\", \"search_term\": \"Django documentation\"}}], \"task_list\": [{{\"title\": \"Create Poetry Project\", \"action_type\": \"execute_command\", \"command\": \"poetry init\"}}]}}:"
        enc = tiktoken.get_encoding("cl100k_base")
        prompt_token_count = len(enc.encode(prompt))
        print(prompt_token_count)
        max_allowed_tokens = 3500 - prompt_token_count
        print(max_allowed_tokens)
        plan_and_task_list_json = self.query_gpt3(prompt, max_tokens=max_allowed_tokens)
        return plan_and_task_list_json
    
    
# Replace "your_openai_api_key" with your actual OpenAI API key
code_builder = CodeBuilder(env('OPENAI_API_KEY'))

build_prompt = "Create a Flask based SaaS for Dealer Management System using Fauna DB, Fauna GraphQL, HTMX. TailwindCSS, Poetry and Jinja2."
plan_and_task_list = code_builder.build_plan_and_task_list(build_prompt)

print("Plan and Task List:")
print("--------------------")
plan_and_task_list


In [None]:
import requests
from bs4 import BeautifulSoup

docs_url = 'https://docs.bunny.net/reference/bunnynet-api-overview'
links = BeautifulSoup(requests.get(url).content, 'html.parser').body.find_all('a')

In [None]:


len(links)

In [None]:
def chunk_text(text, limit): 
    chunked_text = [] 
    current_chunk = "" 
    for word in text.split(): 
        current_chunk += word + " " 
        if len(current_chunk) > limit: 
            chunked_text.append(current_chunk) 
            current_chunk = "" 
    if current_chunk != "": 
        chunked_text.append(current_chunk) 
    return chunked_text 

def chunk_tag_list(links_list, max_size):
    final_links_list = []
    link_href_list = []
    for link in links_list:
        if not isinstance(link, str):
            if link.attrs['href'] not in link_href_list:
                link_href_list.append(link.attrs['href'])
                final_links_list.append(str(link))
    enc = tiktoken.get_encoding("cl100k_base")
    result_list = []
    tag_list = ""
    for link in final_links_list:
        link_str = f"{link},"
        if len(enc.encode(tag_list + link_str)) < max_size:
            tag_list += link_str
        else:
            result_list.append(tag_list)
            tag_list = link_str

    result_list.append(tag_list)
    return result_list

len(chunk_tag_list(links, 800))

In [None]:
import openai
import json
import tiktoken
import requests
from urllib.parse import urlparse 


from typing import Any

goal = 'create an Python SDK for the bunny.net API'

crawl_prompt_template = """
        Given the following list of links return the URLs that will help the most 
        while researching to achieve the goal to "{goal}".
        If the link is a relative URL ("/some/relative-link") convert it to an absolute URL 
        ("{hostname}/some/relative-link") using this hostname: {hostname}.
        Make sure to output valid JSON using the following JSON template for your output [{{"link": "http://example.com"}}]:
        {body}
        """

summary_prompt_template = """
            {hostname}Goal: {goal}
            Given the body of the page return the summary (with minimal text) and relevant code snippets
            from the page using the following JSON template 
            {{ "summary": "The summary of the page" , "relevant_code_snippets": ["The code snippet"] }}:
            {body}
            """
    
    
def get_result(goal: str, body: Any, prompt_template: str, url: str = ''):
    result_list = []
    enc = tiktoken.get_encoding("cl100k_base")
    up = urlparse(url)
    hostname = f"{up.scheme}://{up.hostname}"
    if isinstance(body, list):
        body_str_list = [str(i) for i in body]
        prompt_token_count = len(",".join(body_str_list))
        chunked_list = chunk_tag_list(body, 800)
        
    else:
        chunked_list = chunk_text(body, 800)
    no = 1
    for i in chunked_list:
        prompt = prompt_template.format(goal=goal, body=i, hostname=hostname)
        print(prompt)
        prompt_token_count = len(enc.encode(prompt))

        max_allowed_tokens = 3500 - prompt_token_count
        try:
            print(f'#{no} Request Made to OpenAI')
            no += 1
            response = openai.Completion.create(
                    engine="text-davinci-003",
                    prompt=prompt,
                    max_tokens=max_allowed_tokens,
                    n=1,
                    stop=None,
                    temperature=0.2,
                )
            
            result_list.extend(json.loads(response.choices[0].text))
        except (ConnectionResetError, requests.ConnectionError, json.JSONDecodeError):
            response = openai.Completion.create(
                    engine="text-davinci-003",
                    prompt=prompt,
                    max_tokens=max_allowed_tokens,
                    n=1,
                    stop=None,
                    temperature=0.2,
                )
            result_list.extend(json.loads(response.choices[0].text))
    return result_list

In [None]:
get_result(goal, links, crawl_prompt_template, 'https://docs.bunny.net/reference/bunnynet-api-overview')

In [None]:
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
      {"role": "system", "content": "AI assistant that builds software based on prompts that always returns valid JSON output."},
      {"role": "user", "content": """
      Given the following list of links return the URLs that will help the most 
        while researching to achieve the goal to "create an Python SDK for the bunny.net API".
        If the link is a relative URL ("/some/relative-link") convert it to an absolute URL 
        ("https://docs.bunny.net/some/relative-link") using this hostname: https://docs.bunny.net.
        Make sure to output valid JSON using the following JSON template for your output [{"link": "http://example.com"}]:
        <a class="Button Button_md Header-jumpTo3IWKQXmhSI5D rm-JumpTo Button_primary" href="#content" target="_self">Jump to Content</a>,<a class="Header-logo1Xy41PtkzbdG rm-Logo" href="/" target="_self"><img alt="bunny.net Developer Hub" class="Header-logo-img3YvV4lcGKkeb rm-Logo-img" src="https://files.readme.io/649bcfb-small-bunnynet-logo-dark.png"/></a>,<a class="Button Button_md Button_slate_text rm-Header-top-link Header-link2tXYTgXq85zW" href="https://status.bunnycdn.com/" target="_self" to="https://status.bunnycdn.com/">Service Status</a>,<a class="Button Button_md Button_slate_text rm-Header-top-link Header-link2tXYTgXq85zW" href="https://support.bunny.net/hc/en-us" target="_self" to="https://support.bunny.net/hc/en-us">Support Hub</a>,<a class="Button Button_md Button_slate_text rm-Header-top-link Header-link2tXYTgXq85zW" href="/cdn-cgi/l/email-protection#dfacaaafafb0adab9fbdaab1b1a6f1b1baab" target="_self" to="mailto:support@bunny.net"><span class="__cf_email__" data-cfemail="097a7c7979667b7d496b7c67677027676c7d">[email protected]</span></a>,<a class="NavItem-item1gDDTqaXGhm1 NavItem-item_mobile1qG3gd-Mkck-" href="/docs" target="_self"><i class="NavItem-item-anchorzz3banOxXKjr icon-guides"></i><span class="NavItem-textSlZuuL489uiw">Documentation</span></a>,<a aria-current="page" class="NavItem-item1gDDTqaXGhm1 NavItem-item_mobile1qG3gd-Mkck- active" href="/reference" target="_self"><i class="NavItem-item-anchorzz3banOxXKjr icon-references"></i><span class="NavItem-textSlZuuL489uiw">API Reference</span></a>,<a class="NavItem-item1gDDTqaXGhm1 NavItem-item_mobile1qG3gd-Mkck- NavItem_dropdown-muted1xJVuczwGc74" href="/cdn-cgi/l/email-protection#dba8aeababb4a9af9bb9aeb5b5a2f5b5beaf" target="_self" to="mailto:support@bunny.net"><span class="__cf_email__" data-cfemail="10636560607f62645072657e7e693e7e7564">[email protected]</span></a>,<a class="NavItem-item1gDDTqaXGhm1 NavItem-item_mobile1qG3gd-Mkck- NavItem_dropdown-muted1xJVuczwGc74" href="/login?redirect_uri=/reference/bunnynet-api-overview" target="_self" to="/login?redirect_uri=/reference/bunnynet-api-overview">Log In</a>,<a aria-current="page" class="Sidebar-link2Dsha-r-GKh2 childless text-wrap rm-Sidebar-link active" href="/reference/bunnynet-api-overview" target="_self"><span class="Sidebar-link-textLuTE1ySm4Kqn"><span>API Overview</span></span></a>,
      """}
  ],
    temperature=0.3
)

In [None]:
response

In [None]:
chat_id = response['id']
json_response = json.loads(response['choices'][0]['message']['content'])

In [None]:
json_response

In [None]:
chat_id

In [1]:
from codeagi.actions import ResearchAction

ra = ResearchAction()('Read FaunaDB documentation', 'Create a Fauna-based blogging engine')

Search Term:  FaunaDB documentation
Result List:  ['https://docs.fauna.com/fauna/current/', 'https://docs.fauna.com/fauna/current/', 'https://fauna.com/home', 'https://pythonhosted.org/faunadb/', 'https://docs.rs/faunadb/', 'https://github.com/fauna/faunadb-python', 'https://apitracker.io/a/fauna', 'https://yarnpkg.com/package/faunadb', 'https://readthedocs.org/projects/faunadb-python/', 'https://fauna.com/features', 'https://yarnpkg.com/package/faunadb', 'https://readthedocs.org/projects/faunadb-python/']
Prompt:  
        Which link out of the list below is the best to use for research for FaunaDB documentation? 
        Generate a JSON response with the following template: {"link": "result_list"}
        ['https://docs.fauna.com/fauna/current/', 'https://docs.fauna.com/fauna/current/', 'https://fauna.com/home', 'https://pythonhosted.org/faunadb/', 'https://docs.rs/faunadb/', 'https://github.com/fauna/faunadb-python', 'https://apitracker.io/a/fauna', 'https://yarnpkg.com/package/faun

Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)

In [2]:
ra

['https://www.fauna.com/blog/building-a-blog-with-fauna-graphql-and-react',
 'https://www.fauna.com/blog/how-to-build-a-serverless-blog-with-fauna-db-and-react-hooks',
 'https://www.fauna.com/blog/building-a-blog-with-fauna-and-gatsby',
 'https://www.fauna.com/blog/building-a-serverless-blog-with-fauna-db-and-next-js',
 'https://www.fauna.com/blog/building-a-serverless-blog-with-fauna-db-and-gridsome',
 'https://docs.fauna.com',
 'https://docs.fauna.com/fauna/current/tutorials/blogging-engine/',
 'https://docs.fauna.com/fauna/current/api/fql/',
 'https://community.fauna.com/',
 'https://support.fauna.com/',
 'https://dashboard.fauna.com/accounts/register',
 'https://docs.fauna.com/fauna/current/learn/introduction/what_is_fauna',
 'https://docs.fauna.com/fauna/current/learn/introduction/document_relational',
 'https://docs.fauna.com/fauna/current/learn/introduction/key_concepts',
 'https://docs.fauna.com/fauna/current/learn/introduction/data-model',
 'https://docs.fauna.com/fauna/curren