### Some Setup Code

In [2]:
import os
from langchain.agents.agent_toolkits import create_python_agent
from langchain.callbacks import get_openai_callback
from langchain.schema import BaseOutputParser, ChatResult
from langchain.memory import ConversationBufferMemory
from langchain.agents.conversational.prompt import FORMAT_INSTRUCTIONS
import re
import json
from typing import Any


class NewAgentOutputParser(BaseOutputParser):
    def get_format_instructions(self) -> str:
        return FORMAT_INSTRUCTIONS

    def parse(self, text: str) -> Any:
        print("-" * 20)
        cleaned_output = text.strip()
        # Regex patterns to match action and action_input
        action_pattern = r'"action":\s*"([^"]*)"'
        action_input_pattern = r'"action_input":\s*"([^"]*)"'

        # Extracting first action and action_input values
        action = re.search(action_pattern, cleaned_output)
        action_input = re.search(action_input_pattern, cleaned_output)

        if action:
            action_value = action.group(1)
            print(f"First Action: {action_value}")
        else:
            print("Action not found")

        if action_input:
            action_input_value = action_input.group(1)
            print(f"First Action Input: {action_input_value}")
        else:
            print("Action Input not found")

        print("-" * 20)
        if action_value and action_input_value:
            return {"action": action_value, "action_input": action_input_value}

        # Problematic code left just in case
        if "```json" in cleaned_output:
            _, cleaned_output = cleaned_output.split("```json")
        if "```" in cleaned_output:
            cleaned_output, _ = cleaned_output.split("```")
        if cleaned_output.startswith("```json"):
            cleaned_output = cleaned_output[len("```json"):]
        if cleaned_output.startswith("```"):
            cleaned_output = cleaned_output[len("```"):]
        if cleaned_output.endswith("```"):
            cleaned_output = cleaned_output[: -len("```")]
        cleaned_output = cleaned_output.strip()
        response = json.loads(cleaned_output)
        return {"action": response["action"], "action_input": response["action_input"]}
        # end of problematic code
    
    
def extract_file_paths(directory_structure, extensions=None):
    lines = directory_structure.split('\n')
    paths = []
    stack = []

    for line in lines:
        if not line.strip():
            continue
        depth = line.count('  ')
        filename = line.strip(' -')

        while depth <= len(stack) - 1:
            stack.pop()

        if '.' in filename and (not extensions or filename.split('.')[-1] in extensions):
            file_path = '/'.join(stack + [filename])
            paths.append(file_path)
        else:
            stack.append(filename)

    return paths

def get_chat_result_text(result:ChatResult):
    return result.generations[0].text

### Step 1: Describe the Project

In [3]:
SYSTEM_MESSAGE = """You're an expert software engineer specializing in Azure and .NET development. You can generate complete production-ready 
code examples complete with xml comments and helpful code comments. Your code quality is very readable on its own, but where increased 
complexity is present you also add in code comments. Unit testable via dependency injection should be considered. 
While you have deep knowledge of best practices, you realize that sometimes real-world practicality outweighs academics."""   

PROJECT_DESCIPTION="""I'd like to create a service which will fetch an object from S3 storage, parse it (it will contain 20K rows), 
and populate a supabase table with the data. The data represents a list of simple products and their attributes. The object is a json file.
Once the data is in the supabase table, I need to send an email letting "john@biz.com" know the data has been loaded. 
I also need to create a simple API endpoint which will allow me to search the table by product name.
"""

PROJECT_STYLE="""Follow SOLID principals. 
Add xml comments to all public methods and classes. 
Components should be loosely coupled and follow the layered architecture.
Dependency Injection is very important.
There should be a single solution file which contains all .NET Core projects.
Use Nuget packages where appropriate to reduce custom code.
Do Not Generate Tests. I will write my own tests.
"""

DIRECTORY_STRUCTURE_EXAMPLE = """Here is an example of the format I'm looking for:
- MainProject.Services
  - Interfaces
    - IServiceA.cs
  - Services
    - ServiceA.cs"""


### Step 2: Generate the project structure

In [5]:
from langchain.prompts import StringPromptTemplate, ChatMessagePromptTemplate
from pydantic import BaseModel, validator

class CodeProjectOutlinePromptTemplate(StringPromptTemplate, BaseModel):
    
    def __init__(self, **data):
        super().__init__(**data)
        
    """ A custom prompt template that takes in code project information and proposes a file structure """

    @validator("input_variables")
    def validate_input_variables(cls, v):
        """ Validate that the input variables are correct. """
        if len(v) != 1 or "project_description" not in v:
            raise ValueError("function_name must be the only input_variable.")
        return v

    def format(self, **kwargs) -> str:
        # Generate the prompt to be sent to the language model
        prompt = f"""
{kwargs["project_description"]}
Show me the complete directory structure containing the code files necessary to meet the described need. Do not explain. Do not generate tests files.
Output nothing but the directory structure.

Here is an example of the output format I'm looking for:
- ParentDirectory
  - ChildDirectory
    - ChildFile
- ParentDirectory
    - ChildFile

Output nothing but the directory structure in plain text.
"""
        return prompt
    
    def _prompt_type(self):
        return "project-file-structure"
    


In [17]:
from langchain.chat_models.openai import ChatOpenAI

# gpt-3.5-turbo | gpt-4
llm=ChatOpenAI(model_name="gpt-3.5-turbo", max_tokens=2048, temperature=0)

Run the following until it produces something you like

In [None]:
from langchain.schema import AIMessage, HumanMessage, SystemMessage

project_outline_prompt_template = CodeProjectOutlinePromptTemplate(input_variables=["project_description"])
project_outline_prompt = project_outline_prompt_template.format(project_description=f"{PROJECT_DESCIPTION}\n\n{PROJECT_STYLE}")

response = llm._generate(messages=[HumanMessage(content=project_outline_prompt)])

project_outline = get_chat_result_text(response)
print(project_outline)

In [None]:
import shutil

WORKING_DIRECTORY = "./sandbox"

files_paths = extract_file_paths(project_outline, ["cs"])

messages = [
    SystemMessage(content=SYSTEM_MESSAGE),
    HumanMessage(content=project_outline_prompt),
    AIMessage(content=project_outline)
]

# Code is usually presented with markdown code blocks even when asked to exclude them. 
# We need to remove these.
code_markdown_pattern = r"(?<!\\)```(?:[a-z]+\n)?|\n?```(?<!\\)"

# clean workspace
if(os.path.exists(WORKING_DIRECTORY)):
    print('sandbox exists. deleting...')
    shutil.rmtree(WORKING_DIRECTORY)
    

for file_path in files_paths[:8]:
    print(file_path)
    sandbox_path = os.path.join(WORKING_DIRECTORY, file_path)
    os.makedirs(os.path.dirname(sandbox_path), exist_ok=True)
    
    code_prompt = HumanMessage(content="Show me the code for: " + file_path + ".\n\nOutput only the code in plain text, no markdown or explanations.")
    
    temp_messages = messages.copy()
    temp_messages.append(code_prompt)
    
    result = llm._generate(messages=temp_messages)
    code = get_chat_result_text(result)
    code = re.sub(code_markdown_pattern, "", code)
    
    with open(sandbox_path, 'w') as f:
        f.write(code)

