In [1]:
deployment = "gpt4"
model = "gpt-4"

# 代码生成与验证

In [2]:
import openai
def work_on(input):
    response = openai.ChatCompletion.create(
        engine=deployment, # 如果直接访问OpenAI GPT服务的同学，这里不要使用engine这个参数，要使用model，如： model=“gpt-4”
        messages=[
            {"role": "system", "content": "You are a software engineer."},   
            {"role": "user", "content": input}
        ],
        temperature = 0.9, 
        max_tokens = 500
      )
    return response.choices[0].message.content

In [12]:
prompt = """
实现一个 Python 函数(find_declarations)以从给定的 Golang 源代码中查找结构/接口声明。

该函数需要两个输入参数：
1. 包含源文件的目录。
2. 由包名称和结构/接口名称组成的 JSON 字符串，格式如下：{“package_name”:<package name>,“element”:<struct/interface>}

函数的输出是声明。当未找到请求的结构/接口时，该函数应返回“Not Found”。
"""

In [14]:
print (work_on(prompt))

因为 Golang 和 Python 的语法及其结构都是有显著差异的，直接解析 Golang 的代码在 Python 中实现会很复杂，这需要用到抽象语法树（AST）相关知识。然而，这种方法在这里可能过于复杂，所以我们将采取一种简化的方法，使用正则表达式搜索结构和接口的声明。

以下是 `find_declarations` 函数的一个可能的实现：

```python
import os
import re
import json

def find_declarations(directory, json_str):
    # parse the json string
    obj = json.loads(json_str)
    package_name = obj['package_name']
    element = obj['element']

    # traverse the files in the directory
    for filename in os.listdir(directory):
        if not filename.endswith('.go'):
            continue

        # read the file
        with open(os.path.join(directory, filename)) as f:
            content = f.read()

        # find the package declaration
        match = re.search(r'package\s+' + re.escape(package_name), content)
        if not match:
            continue

        # find the structure/interface declaration
        match = re.search(r'(type\s+' + re.escape(element) + r'\s+(struct|interface)\s*\{)', content)
        if match:
            offs

In [15]:
import os
import re
import json

def find_declarations(directory, json_str):
    # parse the json string
    obj = json.loads(json_str)
    package_name = obj['package_name']
    element = obj['element']

    # traverse the files in the directory
    for filename in os.listdir(directory):
        if not filename.endswith('.go'):
            continue

        # read the file
        with open(os.path.join(directory, filename)) as f:
            content = f.read()

        # find the package declaration
        match = re.search(r'package\s+' + re.escape(package_name), content)
        if not match:
            continue

        # find the structure/interface declaration
        match = re.search(r'(type\s+' + re.escape(element) + r'\s+(struct|interface)\s*\{)', content)
        if match:
            offset = match.end()
            braces = 1
            while offset < len(content) and braces > 0:
                if content[offset] == '{':
                    braces += 1
                elif content[offset] == '}':
                    braces -= 1
                offset += 1

            # return the declaration
            declaration = content[match.start():offset]
            return declaration

    # not found
    return 'Not Found'

In [3]:
src_dir = "./llm_friendly_code"
interface_dec = '{"package_name":"demo","element":"TokenCreator"}'
expected = """
type TokenCreator interface {
	CreateToken(data string) string
}
"""
interface_dec1 = '{"package_name":"demo","element":"Processor"}'

In [17]:
print (find_declarations(src_dir, interface_dec1))

Not Found


In [12]:
from io import StringIO
import sys
from typing import Dict, Optional
from langchain.chat_models import AzureChatOpenAI
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents.tools import Tool
from langchain.llms import OpenAI


class PythonREPL:
    """Simulates a standalone Python REPL."""

    def __init__(self):
        pass        

    def run(self, command: str) -> str:
        command = '\n'.join(line for line in command.split('\n') if not line.startswith('```'))
        """Run command and returns anything printed."""
        # sys.stderr.write("EXECUTING PYTHON CODE:\n---\n" + command + "\n---\n")
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()
        try:
            exec(command, globals())
            sys.stdout = old_stdout
            output = mystdout.getvalue()
        except Exception as e:
            sys.stdout = old_stdout
            output = str(e)
        # sys.stderr.write("PYTHON OUTPUT: \"" + output + "\"\n")
        return output
# from langchain.chat_models import ChatOpenAI #直接访问OpenAI的GPT服务

#llm = ChatOpenAI(model_name="gpt-4", temperature=0) #直接访问OpenAI的GPT服务
llm = AzureChatOpenAI(deployment_name = deployment, model_name=model, temperature=0, max_tokens=1000) #通过Azure的OpenAI服务

python_repl = Tool(
        "Python REPL",
        PythonREPL().run,
        """A Python shell. Use this to execute python commands. 
        Input should be a valid python command.
        If you expect output it should be printed out.
        For example: to verify the the following python function
        ---
        def add(a, b):
            return (a+b)
        ---
        we can invoke the tool with the input 
        "
        def add(a, b):
            return (a+b)
        print (add(1,2))
        
        "
        """,
    )
tools = [python_repl]
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True, handle_parsing_errors=True)

In [13]:
PythonREPL().run('print("Hello Python")')

'Hello Python\n'

In [46]:
prompt_1 = prompt + f"\n 给出回答前请用以下例子测试生成的函数:\n 函数输入：{src_dir}, {interface_dec}; 函数输出：{expected}"
print(agent.run(prompt_1))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThis problem requires me to write a Python function that can parse Golang source code to find structure/interface declarations. The function needs to take in two parameters: a directory containing the source files and a JSON string specifying the package name and the structure/interface name. The function should return the declaration of the structure/interface if it is found, or "Not Found" if it is not.

To solve this problem, I will need to use the os and json libraries in Python. The os library will allow me to navigate the file system and open files, while the json library will allow me to parse the JSON string.

I will also need to write a regular expression to match the structure/interface declaration in the Golang source code. This regular expression will need to account for the possibility of whitespace and comments in the code.

The general approach will be to iterate over all the files in the given directory, open 

In [14]:
import os
import json
import re

def find_declarations(directory, json_string):
    # Parse JSON string
    data = json.loads(json_string)
    package_name = data["package_name"]
    element = data["element"]

    # Compile regular expression
    pattern = re.compile(r"(type\s+" + re.escape(element) + r"\s+interface\s+{[^}]*})", re.DOTALL)

    # Iterate over all files in directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Open file and read contents
            try:
                with open(os.path.join(root, file), "r") as f:
                    contents = f.read()

                    # Search for declaration
                    match = pattern.search(contents)
                    if match:
                        return match.group(1)
            except UnicodeDecodeError:
                # Skip files that are not text files
                continue

    # Return "Not Found" if declaration was not found in any file
    return "Not Found"

In [7]:
print (find_declarations(src_dir, interface_dec))

type TokenCreator interface {
    CreateToken(data string) string
}


In [9]:
interface_dec = '{"package_name":"demo","element":"Storage"}'
print (find_declarations(src_dir, interface_dec))

type Storage interface {
	// RetiveData is to retrive the data by the associated token.
	RetieveData(token string) (string, error)

	// StoreData is to persist the data,
	// input paramters:
	//   token is used to retrive the associated data
	//
	StoreData(token string, data string) error
}


# 解决依赖问题

## 本地文件搜索

In [15]:
from langchain.agents import initialize_agent, Tool
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import LLMRequestsChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms import AzureOpenAI
import time
import os
import json
import re

src_dir = "./llm_friendly_code"

def find_declarations(directory, json_string):
    # Parse the JSON string
    json_data = json.loads(json_string)
    package_name = json_data["package_name"]
    element_name = json_data["element"]

    # Regular expression to match structure/interface declarations
    regex = re.compile(r"(type\s+" + element_name + r"\s+(struct|interface)\s+{\s+[^}]*\s+})")

    # Read the source files in the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".go"):
                with open(os.path.join(root, file), "r") as f:
                    content = f.read()
                    # Check if the file is in the correct package
                    if "package " + package_name in content:
                        # Search for the structure/interface declaration
                        match = regex.search(content)
                        if match:
                            return match.group(1)

    return "Not Found"

def get_dep(json_string):
    return find_declarations(src_dir, json_string)

# from langchain.chat_models import ChatOpenAI #直接访问OpenAI的GPT服务

#llm = ChatOpenAI(model_name="gpt-4", temperature=0) #直接访问OpenAI的GPT服务
llm = AzureChatOpenAI(deployment_name = deployment, model_name=model, temperature=0, max_tokens=1000) #通过Azure的OpenAI服务


# 将“依赖获取”工具添加到 ReAct 过程中    
tools = [
    Tool(
        name="find_declarations", func=get_dep,
        description="""Only invoke it when you need to find the declaration of the interface/struct/method that is not in the given code. 
        The input is a JSON string includes package name and the struct/interface name, 
          and its the format {{"package_name":<package name>,"element":<struct/interface>}}
        """)
]


In [16]:
def write_code(input):
    agent = initialize_agent(tools, llm, agent="zero-shot-react-description", handle_parsing_errors=True, max_iterations=10, verbose=True)
    print(agent.run(input))

In [17]:
write_code(
"""
 You are a very senior golang programmer.
 Write the golang code to implement the demo.TokenCreator interface.
 The implementation is to generate the token with uuid and its package is demo
"""
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mFirst, I need to know what methods are in the demo.TokenCreator interface. 
Action: find_declarations
Action Input: {"package_name":"demo","element":"TokenCreator"} [0m
Observation: [36;1m[1;3mtype TokenCreator interface {
	CreateToken(data string) string
}[0m
Thought:[32;1m[1;3mI now know the final answer
Final Answer: 
Here is a simple implementation of the `demo.TokenCreator` interface in Go. This implementation uses the `github.com/google/uuid` package to generate a UUID for the token.

```go
package demo

import (
	"github.com/google/uuid"
)

type MyTokenCreator struct{}

func (m *MyTokenCreator) CreateToken(data string) string {
	return data + "-" + uuid.New().String()
}
```

In this code, `MyTokenCreator` is a struct that implements the `TokenCreator` interface from the `demo` package. The `CreateToken` method concatenates the input data with a newly generated UUID to create a token.[0m

[1m> Finished chain.[0

In [18]:
def read_file(file_path):
    with open(file_path, 'r') as file:
        content = file.read()
    return content

In [19]:
def write_unit_tests(filepath):
    agent = initialize_agent(tools, llm, agent="zero-shot-react-description", handle_parsing_errors=True, max_iterations=10, verbose=True)
    return (agent.run("""
    You are a very senior golang programmer.
    Please, follow the instructions:
     1. consider on the edge cases and errors
     2. make sure the generated unit test code in the same package as the tested code.
    write an unit tests for the following code: \n ---""" + read_file(filepath)
     ))

In [21]:
print(write_unit_tests(src_dir+"/demo/skeleton.go"))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to write unit tests for the methods Process and Retrieve of the ProcessingServiceImpl struct. To do this, I need to understand the interfaces Processor, TokenCreator, and Storage. I will use the find_declarations tool to find the declarations of these interfaces.
Action: find_declarations
Action Input: {"package_name":"demo","element":"Processor"}[0m
Observation: [36;1m[1;3mtype Processor interface {
	// Process is to the raw data
	Process(raw string) (string, error)
}[0m
Thought:[32;1m[1;3mThe Processor interface has a Process method that takes a string as input and returns a string and an error. Now I will find the declaration of the TokenCreator interface.
Action: find_declarations
Action Input: {"package_name":"demo","element":"TokenCreator"}[0m
Observation: [36;1m[1;3mtype TokenCreator interface {
	CreateToken(data string) string
}[0m
Thought:[32;1m[1;3mThe TokenCreator interface has a CreateToken meth

## 采用向量数据库保存遗留代码

In [22]:
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
loader = GenericLoader.from_filesystem(
    src_dir,
    glob="**/*",
    suffixes=[".go"],
    parser=LanguageParser(language=Language.GO, parser_threshold=500)
)
documents = loader.load()
print(len(documents))

from langchain.text_splitter import RecursiveCharacterTextSplitter
go_splitter = RecursiveCharacterTextSplitter.from_language(language=Language.GO, 
                                                               chunk_size=500, 
                                                               chunk_overlap=50)
texts = go_splitter.split_documents(documents)
print(len(texts))

from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import AzureChatOpenAI
# embedding = OpenAIEmbeddings() #如果直接访问OpenAI的服务
embedding = OpenAIEmbeddings(deployment="embedding", chunk_size=10)
persist_directory = 'codes/'
db = Chroma.from_documents(texts, embedding, persist_directory=persist_directory)
db.persist()

10
68


## 采用向量数据库实现

In [29]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
# from langchain.chat_models import ChatOpenAI #直接访问OpenAI的GPT服务

#llm = ChatOpenAI(model_name="gpt-4", temperature=0) #直接访问OpenAI的GPT服务
llm = AzureChatOpenAI(deployment_name = deployment, model_name=model, temperature=0.3, max_tokens=1000) #通过Azure的OpenAI服务

retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 8})
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever,
                                 return_source_documents=False, verbose=True)

def find_declarations(directory, json_string):
    # Parse the JSON string
    json_data = json.loads(json_string)
    package_name = json_data["package_name"]
    element_name = json_data["element"]
    ret = qa(f"give me the declaration of {element_name} in package {package_name}")
    return ret['result']

In [24]:
ret = find_declarations(src_dir, '{"package_name":"demo","element":"Storage"}')
print(ret)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
In the package demo, the Storage interface is declared as follows:

```go
// Storage is to persist the data
type Storage interface {
	// RetiveData is to retrive the data by the associated token.
	RetieveData(token string) (string, error)

	// StoreData is to persist the data,
	// input paramters:
	//   token is used to retrive the associated data
	//
	StoreData(token string, data string) error
}
```


In [25]:
write_code(
"""
 You are a very senior golang programmer.
 Write the golang code to implement the demo.Storage interface with the local memory.
 The implementation's package also is demo
"""
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mFirst, I need to know what methods are in the demo.Storage interface.
Action: find_declarations
Action Input: {"package_name":"demo","element":"Storage"}[0m

[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m

Observation: [36;1m[1;3mIn the package demo, the Storage interface is declared as follows:

```go
// Storage is to persist the data
type Storage interface {
	// RetiveData is to retrive the data by the associated token.
	RetieveData(token string) (string, error)

	// StoreData is to persist the data,
	// input paramters:
	//   token is used to retrive the associated data
	//
	StoreData(token string, data string) error
}
```[0m
Thought:[32;1m[1;3mNow I know the methods of the Storage interface. I need to implement these methods in a struct in the demo package.
Final Answer: 
Here is the implementation of the demo.Storage interface with local memory:

```go
package demo

import (
	"errors"
)

typ

In [30]:
print(write_unit_tests(src_dir+"/demo/skeleton.go"))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to write unit tests for the methods Process and Retrieve of the ProcessingServiceImpl struct. To do this, I need to understand the interfaces Processor, TokenCreator, and Storage, which are not defined in the given code. I will use the find_declarations tool to find these interfaces.
Action: find_declarations
Action Input: {"package_name":"demo","element":"Processor"}[0m

[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m

Observation: [36;1m[1;3mIn the package demo, the Processor is declared as an interface with the following definition:

```go
type Processor interface {
	// Process is to the raw data
	Process(raw string) (string, error)
}
```[0m
Thought:[32;1m[1;3mNow I know what the Processor interface looks like. I need to find the declarations for the TokenCreator and Storage interfaces next.
Action: find_declarations
Action Input: {"package_name":"demo","element":"TokenCreator"}[0m

[1