In [1]:
deployment = "gpt4"
model = "gpt-4"

# 代码生成与验证

In [2]:
import openai
def work_on(input):
    response = openai.ChatCompletion.create(
        engine=deployment, # engine = "deployment_name".
        messages=[
            {"role": "system", "content": "You are a software engineer."},   
            {"role": "user", "content": input}
        ],
        temperature = 0.9, 
        max_tokens = 500
      )
    return response.choices[0].message.content

In [3]:
prompt = """
实现一个 Python 函数(find_declarations)以从给定的 Golang 源代码中查找结构/接口声明。

该函数需要两个输入参数：
1. 包含源文件的目录。
2. 由包名称和结构/接口名称组成的 JSON 字符串，格式如下：{“package_name”:<package name>,“element”:<struct/interface>}

函数的输出是声明。当未找到请求的结构/接口时，该函数应返回“Not Found”。
"""

In [26]:
print (work_on(prompt))

这是一个相对复杂的问题，需要用到Python的文件IO，JSON处理和正则表达式。不过在这里我为你提供一个简单的解决方案。它可能无法处理所有可能的Golang源代码格式和布局，但是基本上能满足大部分常见用例。

首先，安装 `go/parser` 和 `go/token` 库以便解析Go语言的源代码。

然后，实现如下的Python函数：

```python
import json
import os
import re

# 正则表达式匹配Go语言的结构或接口声明
RE_STRUCT_OR_INTERFACE = re.compile(r'type\s+{element}\s+(struct|interface)\s+{{[^}}]*}}')

def find_declarations(directory, data_json):
    # 将JSON数据解析为Python的字典
    data = json.loads(data_json)
    
    # 得到包名和元素名
    package_name = data['package_name']
    element = data['element']
    
    # 定义一个文件名模板，因为Go代码文件通常以.go为后缀
    filename_template = '{package_name}.go'
    
    # 在给定的目录中查找Go源文件
    for root, dirs, files in os.walk(directory):
        for file in files:
            # 如果找到匹配的Go源文件
            if file == filename_template:
                # 打开文件并读取内容
                with open(os.path.join(root, file)) as f:
                    content = f.read()
                    
                    # 使用正则表达式查找结构/接口声明
                    match = RE_STRUCT_

In [33]:
import os
import json
import re

def find_declarations(dir_path, json_str):
    # Parse the json string to get the package name and the struct/interface name
    json_obj = json.loads(json_str)
    package_name = json_obj["package_name"]
    element_name = json_obj["element"]

    # Compile the regex pattern for the package declaration and the struct/interface declaration
    package_pattern = re.compile(r"package\s+" + package_name)
    element_pattern = re.compile(r"(type\s+" + element_name + r"\s+(struct|interface)\s*{[^}]*})")

    # Traverse all the files in the directory
    for root, dirs, files in os.walk(dir_path):
        for file in files:
            if file.endswith(".go"):
                with open(os.path.join(root, file), "r") as f:
                    content = f.read()

                    # Check if the file belongs to the specified package
                    if package_pattern.search(content):
                        # Check if the file contains the specified struct/interface declaration
                        match = element_pattern.search(content)
                        if match:
                            return match.group(1)

    return "Not Found"

In [5]:
src_dir = "./llm_friendly_code"
interface_dec = '{"package_name":"demo","element":"TokenCreator"}'
expected = """
type TokenCreator interface {
	CreateToken(data string) string
}
"""


In [None]:
print (find_declarations(src_dir, interface_dec))

In [7]:
from io import StringIO
import sys
from typing import Dict, Optional
from langchain.chat_models import AzureChatOpenAI
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents.tools import Tool
from langchain.llms import OpenAI


class PythonREPL:
    """Simulates a standalone Python REPL."""

    def __init__(self):
        pass        

    def run(self, command: str) -> str:
        command = '\n'.join(line for line in command.split('\n') if not line.startswith('```'))
        """Run command and returns anything printed."""
        # sys.stderr.write("EXECUTING PYTHON CODE:\n---\n" + command + "\n---\n")
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()
        try:
            exec(command, globals())
            sys.stdout = old_stdout
            output = mystdout.getvalue()
        except Exception as e:
            sys.stdout = old_stdout
            output = str(e)
        # sys.stderr.write("PYTHON OUTPUT: \"" + output + "\"\n")
        return output
      
llm = AzureChatOpenAI(deployment_name=deployment, temperature=0.2, max_tokens = 600)
python_repl = Tool(
        "Python REPL",
        PythonREPL().run,
        """A Python shell. Use this to execute python commands. 
        Input should be a valid python command.
        If you expect output it should be printed out.
        For example: to verify the the following python function
        ---
        def add(a, b):
            return (a+b)
        ---
        we can invoke the tool with the input 
        "
        def add(a, b):
            return (a+b)
        print (add(1,2))
        
        "
        """,
    )
tools = [python_repl]
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True, handle_parsing_errors=True)

In [23]:
PythonREPL().run('print("Hello Python")')

'Hello Python\n'

In [8]:
prompt_1 = prompt + f"\n 给出回答前请用以下例子测试生成的函数:\n 函数输入：{src_dir}, {interface_dec}; 函数输出：{expected}"
agent.run(prompt_1)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThis problem requires me to write a Python function that can parse Golang source code to find structure/interface declarations. The function takes two inputs: the directory of the source files and a JSON string specifying the package name and the structure/interface name. The function should return the declaration of the structure/interface if found, or "Not Found" if not.

To solve this problem, I'll need to use the os and json libraries in Python. The os library will allow me to navigate the file system and read the source files, and the json library will allow me to parse the JSON string input.

I'll also need to write a regular expression to match the structure/interface declarations in the Golang source code. This regular expression will need to account for the package name and the structure/interface name, which will be provided in the JSON string input.

I'll start by writing a function that takes the directory and JSO

'```python\nimport os\nimport json\nimport re\n\ndef find_declarations(directory, json_string):\n    # Parse the JSON string\n    json_data = json.loads(json_string)\n    package_name = json_data["package_name"]\n    element_name = json_data["element"]\n\n    # Regular expression to match structure/interface declarations\n    regex = re.compile(r"(type\\s+" + element_name + r"\\s+(struct|interface)\\s+{\\s+[^}]*\\s+})")\n\n    # Read the source files in the directory\n    for root, dirs, files in os.walk(directory):\n        for file in files:\n            if file.endswith(".go"):\n                with open(os.path.join(root, file), "r") as f:\n                    content = f.read()\n                    # Check if the file is in the correct package\n                    if "package " + package_name in content:\n                        # Search for the structure/interface declaration\n                        match = regex.search(content)\n                        if match:\n              

In [9]:
import os
import json
import re

def find_declarations(directory, json_string):
    # Parse the JSON string
    json_data = json.loads(json_string)
    package_name = json_data["package_name"]
    element_name = json_data["element"]

    # Regular expression to match structure/interface declarations
    regex = re.compile(r"(type\s+" + element_name + r"\s+(struct|interface)\s+{\s+[^}]*\s+})")

    # Read the source files in the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".go"):
                with open(os.path.join(root, file), "r") as f:
                    content = f.read()
                    # Check if the file is in the correct package
                    if "package " + package_name in content:
                        # Search for the structure/interface declaration
                        match = regex.search(content)
                        if match:
                            return match.group(1)

    return "Not Found"

# 解决依赖问题

## 本地文件搜索

In [22]:
from langchain.agents import initialize_agent, Tool
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import LLMRequestsChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms import AzureOpenAI
import time
import os
import json
import re

src_dir = "./llm_friendly_code"

def find_declarations(directory, json_string):
    # Parse the JSON string
    json_data = json.loads(json_string)
    package_name = json_data["package_name"]
    element_name = json_data["element"]

    # Regular expression to match structure/interface declarations
    regex = re.compile(r"(type\s+" + element_name + r"\s+(struct|interface)\s+{\s+[^}]*\s+})")

    # Read the source files in the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".go"):
                with open(os.path.join(root, file), "r") as f:
                    content = f.read()
                    # Check if the file is in the correct package
                    if "package " + package_name in content:
                        # Search for the structure/interface declaration
                        match = regex.search(content)
                        if match:
                            return match.group(1)

    return "Not Found"

def get_dep(json_string):
    return find_declarations(src_dir, json_string)

llm = AzureChatOpenAI(deployment_name=deployment, temperature=0)

# 将“依赖获取”工具添加到 ReAct 过程中    
tools = [
    Tool(
        name="find_declarations", func=get_dep,
        description="""Only invoke it when you need to find the declaration of the interface/struct/method that is not in the given code. 
        The input is a JSON string includes package name and the struct/interface name, 
          and its the format {{"package_name":<package name>,"element":<struct/interface>}}
        """)
]


In [23]:
def write_code(input):
    agent = initialize_agent(tools, llm, agent="zero-shot-react-description", handle_parsing_errors=True, max_iterations=10, verbose=True)
    print(agent.run(input))

In [24]:
write_code(
"""
 You are a very senior golang programmer.
 Write the golang code to implement the demo.TokenCreator interface.
 The implementation is to generate the token with uuid and its package is demo
"""
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to know what methods the demo.TokenCreator interface has in order to implement it.
Action: find_declarations
Action Input: {"package_name":"demo","element":"TokenCreator"} [0m
Observation: [36;1m[1;3mtype TokenCreator interface {
	CreateToken(data string) string
}[0m
Thought:[32;1m[1;3mI now know the final answer
Final Answer: 
Here is a simple implementation of the `demo.TokenCreator` interface in Go. This implementation uses the `github.com/google/uuid` package to generate a UUID as the token.

```go
package demo

import (
	"github.com/google/uuid"
)

type MyTokenCreator struct{}

func (m *MyTokenCreator) CreateToken(data string) string {
	return uuid.New().String()
}
```

In this code, `MyTokenCreator` is a struct that implements the `TokenCreator` interface. The `CreateToken` method generates a new UUID and returns it as a string.[0m

[1m> Finished chain.[0m
Here is a simple implementation of the `demo.Tok

In [26]:
def read_file(file_path):
    with open(file_path, 'r') as file:
        content = file.read()
    return content

In [27]:
def write_unit_tests(filepath):
    agent = initialize_agent(tools, llm, agent="zero-shot-react-description", handle_parsing_errors=True, max_iterations=10, verbose=True)
    return (agent.run("""
    You are a very senior golang programmer.
    Please, follow the instructions:
     1. consider on the edge cases and errors
     2. make sure the generated unit test code in the same package as the tested code.
    write an unit tests for the following code: \n ---""" + read_file(filepath)
     ))

In [30]:
print(write_unit_tests(src_dir+"/demo/skeleton.go"))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to write unit tests for the ProcessingServiceImpl struct methods. To do this, I need to understand the dependencies of this struct, which are Storage, Processor, and TokenCreator. I need to mock these dependencies in my tests. I also need to consider edge cases and errors for the methods Process and Retrive. 

Action: find_declarations
Action Input: {"package_name":"demo","element":"Storage"}
[0m
Observation: [36;1m[1;3mtype Storage interface {
	// RetiveData is to retrive the data by the associated token.
	RetiveData(token string) (string, error)

	// StoreData is to persist the data,
	// input paramters:
	//   token is used to retrive the associated data
	//
	StoreData(token string, data string) error
}[0m
Thought:[32;1m[1;3mThe Storage interface has two methods: RetiveData and StoreData. I need to mock these methods in my tests. 

Action: find_declarations
Action Input: {"package_name":"demo","element":"Proces

In [32]:
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
loader = GenericLoader.from_filesystem(
    src_dir,
    glob="**/*",
    suffixes=[".go"],
    parser=LanguageParser(language=Language.GO, parser_threshold=500)
)
documents = loader.load()
print(len(documents))

from langchain.text_splitter import RecursiveCharacterTextSplitter
go_splitter = RecursiveCharacterTextSplitter.from_language(language=Language.GO, 
                                                               chunk_size=2000, 
                                                               chunk_overlap=200)
texts = go_splitter.split_documents(documents)
print(len(texts))

from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import AzureChatOpenAI
embedding = OpenAIEmbeddings(deployment="embedding",chunk_size=1)
persist_directory = 'codes/'
db = Chroma.from_documents(texts, embedding, persist_directory=persist_directory)
db.persist()

Using embedded DuckDB with persistence: data will be stored in: codes/


10
22


## 采用向量数据库实现

In [36]:
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain
llm = AzureChatOpenAI(deployment_name = "gpt4", model_name="gpt-4", temperature=0, max_tokens=1000)
memory = ConversationSummaryMemory(llm=llm,memory_key="chat_history",return_messages=True)
retriever = db.as_retriever(
    search_type="mmr",  # Also test "similarity"
    search_kwargs={"k": 8},
)
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
def find_declarations(directory, json_string):
    # Parse the JSON string
    json_data = json.loads(json_string)
    package_name = json_data["package_name"]
    element_name = json_data["element"]
    return qa(f"give me the declaration of {element_name} in package {package_name}")

In [37]:
print(find_declarations(src_dir, interface_dec))

{'question': 'give me the declaration of TokenCreator in package demo', 'chat_history': [SystemMessage(content='', additional_kwargs={})], 'answer': 'The declaration of TokenCreator in the demo package is an interface with a single method:\n\n```go\ntype TokenCreator interface {\n\tCreateToken(data string) string\n}\n```\n\nThis interface defines a contract for any type that has a method `CreateToken` which takes a string as input and returns a string.'}


In [38]:
write_code(
"""
 You are a very senior golang programmer.
 Write the golang code to implement the demo.Storage interface.
 The implementation's package also is demo
"""
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mFirst, I need to know what methods the demo.Storage interface has. 
Action: find_declarations
Action Input: {"package_name":"demo","element":"Storage"}[0m
Observation: [36;1m[1;3m{'question': 'give me the declaration of Storage in package demo', 'chat_history': [SystemMessage(content='The human asks for the declaration of TokenCreator in the demo package. The AI explains that TokenCreator is an interface with a single method, `CreateToken`, which takes a string as input and returns a string. This interface defines a contract for any type that has a `CreateToken` method.', additional_kwargs={})], 'answer': 'The declaration of Storage in the demo package is an interface with two methods:\n\n```go\ntype Storage interface {\n\t// RetiveData is to retrive the data by the associated token.\n\tRetiveData(token string) (string, error)\n\n\t// StoreData is to persist the data,\n\t// input paramters:\n\t//   token is used to retrive