### Some Setup Code

In [1]:
import os
from langchain.schema import ChatResult
import re
from typing import Any

def extract_file_paths(directory_structure, extensions=None):
    lines = directory_structure.split('\n')
    paths = []
    stack = []

    for line in lines:
        if not line.strip():
            continue
        depth = line.count('  ')
        filename = line.strip(' -')

        while depth <= len(stack) - 1:
            stack.pop()

        if '.' in filename and (not extensions or filename.split('.')[-1] in extensions):
            file_path = '/'.join(stack + [filename])
            paths.append(file_path)
        else:
            stack.append(filename)

    return paths

def get_chat_result_text(result:ChatResult):
    return result.generations[0].text


def parse_csharp_class(class_code):
    result = []
    
    # Extract namespace
    namespace = re.search(r'namespace\s+([\w\.]+)', class_code)
    if namespace:
        result.append(f"Namespace: {namespace.group(1)}")
        
    # Extract class name
    class_name = re.search(r'public\s+class\s+(\w+)', class_code)
    if class_name:
        result.append(f"Class: {class_name.group(1)}")
        
    # Extract constructor
    constructor = re.search(r'public\s+(\w+)\s*\((.*?)\)', class_code)
    if constructor:
        constructor_name = constructor.group(1)
        constructor_params = constructor.group(2).strip()
        result.append(f"Constructor: {constructor_name}({constructor_params})")
    
    # Extract methods
    methods = re.findall(r'public\s+async\s+([\w<>]+)\s+(\w+)\s*\((.*?)\)', class_code)
    for method in methods:
        result.append(f"Method: {method[0]} {method[1]}({method[2]})")
    
    return result

### Step 1: Describe the Project

In [2]:
SYSTEM_MESSAGE = """You're an expert software engineer specializing in the AWS platform and Microsoft .NET development. 
You can generate complete production-ready code examples complete with xml comments and helpful code comments. 
Your code quality and readability is impeccable on its own, but where increased 
complexity is present you also add in code comments. While you have deep knowledge of best practices, 
you realize that sometimes real-world practicality outweighs academics. The names you choose for variables, classes and other
components are always meaningful and descriptive. Your ability to organize projects is second to none."""   

PROJECT_DESCIPTION="""I'd like to create a service which will fetch an object from S3 storage, parse it (it will contain 20K rows), 
and populate a supabase table with the data. The data represents a list of simple products and their attributes. The object is a json file.
Once the data is in the supabase table, I need to send an email letting "john@biz.com" know the data has been loaded. 
I also need to create a simple API endpoint which will allow me to search the table by product name.
"""

# PROJECT_STYLE="""
# Components should be loosely coupled and follow the layered architecture. 
# Dependency Injection is very important.
# There should be a single solution file which contains all .NET Core projects.
# Use Nuget packages where appropriate to reduce custom code.
# Do Not Generate Tests. I will write my own tests.
# """

PROJECT_STYLE=""" 
Components should be loosely coupled.
I want this to be an AWS lambda based project using .net core.
Use names that are typical for AWS and .NET lambda projects.
Do Not Generate Unit Tests.
"""

### Step 2: Generate the project structure

In [3]:
from langchain.prompts import StringPromptTemplate, ChatMessagePromptTemplate
from pydantic import BaseModel, validator
from langchain.chat_models.openai import ChatOpenAI

class CodeProjectOutlinePromptTemplate(StringPromptTemplate, BaseModel):
    
    def __init__(self, **data):
        super().__init__(**data)
        
    """ A custom prompt template that takes in code project information and proposes a file structure """

    @validator("input_variables")
    def validate_input_variables(cls, v):
        """ Validate that the input variables are correct. """
        if len(v) != 1 or "project_description" not in v:
            raise ValueError("function_name must be the only input_variable.")
        return v

    def format(self, **kwargs) -> str:
        # Generate the prompt to be sent to the language model
        prompt = f"""
{kwargs["project_description"]}
Show me the complete directory structure containing the code files necessary to meet the described need. Do not explain.
Output nothing but the directory structure.

Here is an example of the output format I'm looking for:
- ParentDirectory
  - ChildDirectory
    - ChildFile
- ParentDirectory
    - ChildFile

Output nothing but the directory structure in plain text.
"""
        return prompt
    
    def _prompt_type(self):
        return "project-file-structure"
    
# gpt-3.5-turbo | gpt-4
llm=ChatOpenAI(model_name="gpt-4", max_tokens=2048, temperature=0)

PROJECT_OUTLINE_FILE = "project_outline.txt"

Run the following until it produces something you like

In [4]:
from langchain.schema import AIMessage, HumanMessage, SystemMessage

project_outline_prompt_template = CodeProjectOutlinePromptTemplate(input_variables=["project_description"])
project_outline_prompt = project_outline_prompt_template.format(project_description=f"{PROJECT_DESCIPTION}\n\n{PROJECT_STYLE}")

response = llm._generate(messages=[HumanMessage(content=project_outline_prompt)])

project_outline = get_chat_result_text(response)
print(project_outline)

with open(PROJECT_OUTLINE_FILE, "w") as f:
    f.write(project_outline)

- S3DataLoaderLambda
  - src
    - DataLoadFunction
      - Function.cs
    - DataLoadLibrary
      - S3Helper.cs
      - SupabaseHelper.cs
      - ApiResponse.cs
      - SearchResults.cs
  - dependencies
    - Amazon.Lambda.Core.dll
    - Amazon.Lambda.Serialization.Json.dll
    - Amazon.Lambda.SQSEvents.dll
    - Npgsql.dll
  - Infrastructure
    - template.yaml
  - README.md


In [9]:
import shutil

WORKING_DIRECTORY = "./sandbox"

if not os.path.exists(PROJECT_OUTLINE_FILE):
    raise Exception("Project outline file not found. Please run the previous cell to create one.")

files_paths = extract_file_paths(project_outline, ["cs", "json", "csproj", "html", "js"])

messages = [
    SystemMessage(content=SYSTEM_MESSAGE),
    HumanMessage(content=project_outline_prompt),
    AIMessage(content=project_outline)
]

Dependency_prompt = """
Sort the files listed in the directory tree by their dependencies from least to most.

Return facts about each file that would be important to other files that would depend on them 
to be aware of during code generation such that names and 
signatures, and constructors are consistent across files. 
"""

code_prompt = HumanMessage(content=Dependency_prompt)

temp_messages = messages.copy()
temp_messages.append(code_prompt)

result = llm._generate(messages=temp_messages)
definitions = get_chat_result_text(result)
messages.append(AIMessage(content=definitions))

print(definitions)

1. ApiResponse.cs
- Namespace: DataLoadLibrary
- Class: ApiResponse
- Properties: StatusCode, Message

2. SearchResults.cs
- Namespace: DataLoadLibrary
- Class: SearchResults
- Properties: ProductId, ProductName, Attributes

3. S3Helper.cs
- Namespace: DataLoadLibrary
- Class: S3Helper
- Constructor: S3Helper(AmazonS3Client s3Client, string bucketName)
- Method: Task<List<Product>> FetchProductsFromS3(string key)

4. SupabaseHelper.cs
- Namespace: DataLoadLibrary
- Class: SupabaseHelper
- Constructor: SupabaseHelper(string supabaseUrl, string supabaseKey)
- Method: Task ImportProductsToSupabase(List<Product> products)
- Method: Task<List<SearchResults>> SearchProductsByName(string searchTerm)

5. Function.cs
- Namespace: DataLoadFunction
- Class: Function
- Constructor: Function(S3Helper s3Helper, SupabaseHelper supabaseHelper)
- Method: Task<ApiResponse> FunctionHandler(SQSEvent sqsEvent, ILambdaContext context)



In [None]:
# Code is usually presented with markdown code blocks even when asked to exclude them. 
# We need to remove these.
code_markdown_pattern = r"(?<!\\)```(?:[a-z]+\n)?|\n?```(?<!\\)"

# clean workspace
if(os.path.exists(WORKING_DIRECTORY)):
    print('sandbox exists. deleting...')
    shutil.rmtree(WORKING_DIRECTORY)
    

for file_path in files_paths: # Limiting to 10 files for now
    print(file_path)
    sandbox_path = os.path.join(WORKING_DIRECTORY, file_path)
    os.makedirs(os.path.dirname(sandbox_path), exist_ok=True)
    
    code_prompt = HumanMessage(content="Show me the code for: " + file_path + ".\n\nOutput only the code in plain text, no markdown or explanations.")
    
    temp_messages = messages.copy()
    temp_messages.append(code_prompt)
    
    result = llm._generate(messages=temp_messages)
    code = get_chat_result_text(result)
    code = re.sub(code_markdown_pattern, "", code)
    
    # add code interface to chat history to help 
    # improve consistency of  future file generation
    # ----------------------------------------------
    interface_representation = "\n".join(parse_csharp_class(code)) # Remember this is specific to C# code - make better later
    messages.append(HumanMessage(content=interface_representation))
    
    with open(sandbox_path, 'w') as f:
        f.write(code)



In [26]:
class_code = '''
namespace ProductSearchAPI.SupabaseService
{
    public class SupabaseClient
    {
        private readonly SupabaseClientService _supabaseService;

        public SupabaseClient(string connectionString)
        {
            _supabaseService = new SupabaseClientService(connectionString);
        }

        public async Task<List<Product>> SearchProductsByNameAsync(string productName)
        {
            IQueryable<Product> query = _supabaseService.From<Product>();

            if (!string.IsNullOrWhiteSpace(productName))
            {
                query = query.Where(p => p.Name.Contains(productName));
            }

            List<Product> products = await query.ExecuteAsync();
            return products;
        }
        
        public async Task<List<Product>> SearchProductsByIDAsync(int productID)
        {
            IQueryable<Product> query = _supabaseService.From<Product>();

            if (!string.IsNullOrWhiteSpace(productName))
            {
                query = query.Where(p => p.Id == productID);
            }

            List<Product> products = await query.ExecuteAsync();
            return products;
        }
    }
}
'''

interface_representation = "\n".join(parse_csharp_class(class_code))

print(interface_representation)

Namespace: ProductSearchAPI.SupabaseService
Class: SupabaseClient
Constructor: SupabaseClient(string connectionString)
Method: Task<List<Product>> SearchProductsByNameAsync(string productName)
Method: Task<List<Product>> SearchProductsByIDAsync(int productID)
