In [4]:
deployment = "gpt4"
model = "gpt-4"

# 代码生成与验证

In [5]:
import openai
def work_on(input):
    response = openai.ChatCompletion.create(
        engine=deployment, # engine = "deployment_name".
        messages=[
            {"role": "system", "content": "You are a software engineer."},   
            {"role": "user", "content": input}
        ],
        temperature = 0.9, 
        max_tokens = 500
      )
    return response.choices[0].message.content

In [6]:
prompt = """
实现一个 Python 函数(find_declarations)以从给定的 Golang 源代码中查找结构/接口声明。

该函数需要两个输入参数：
1. 包含源文件的目录。
2. 由包名称和结构/接口名称组成的 JSON 字符串，格式如下：{“package_name”:<package name>,“element”:<struct/interface>}

函数的输出是声明。当未找到请求的结构/接口时，该函数应返回“Not Found”。
"""

In [7]:
print (work_on(prompt))

以下是一个简单的实现。这个函数会递归的搜索所有的go文件，并使用正则表达式查找对应的结构或者接口声明。这个简单的方法在某些复杂的情况下可能会失败，比如当结构或接口的声明跨越了多行的时候。

```python
import os
import re
import json

def find_declarations(dir, json_str):
    data = json.loads(json_str)
    package_name = data["package_name"]
    element = data["element"]

    pattern = 'package {}\s*(type {} (struct|interface))'.format(package_name, element)

    for root, dirs, files in os.walk(dir):
        for file in files:
            if file.endswith(".go"):
                with open(os.path.join(root, file), "r") as f:
                    lines = f.read().split("\n")
                    for line in lines:
                        match = re.findall(pattern, line)
                        if match:
                            return match[0]

    return "Not Found"
```

使用这个函数的方式如下：

```python
print(find_declarations("/path/to/your/golang/dir", '{"package_name":"main", "element":"YourStruct"}'))
```

你需要注意的是，这个函数只在简单的情况下有效。在复杂的情况下，比如多行的结构或接口声明，或者结构/接口名与package名在同一行但不是在行首的情况下，

In [11]:
import os
import re
import json

def find_declarations(dir, json_str):
    data = json.loads(json_str)
    package_name = data["package_name"]
    element = data["element"]

    pattern = 'package {}\s*(type {} (struct|interface))'.format(package_name, element)

    for root, dirs, files in os.walk(dir):
        for file in files:
            if file.endswith(".go"):
                with open(os.path.join(root, file), "r") as f:
                    lines = f.read().split("\n")
                    for line in lines:
                        match = re.findall(pattern, line)
                        if match:
                            return match[0]

    return "Not Found"

In [12]:
src_dir = "./llm_friendly_code"
interface_dec = '{"package_name":"demo","element":"TokenCreator"}'
expected = """
type TokenCreator interface {
	CreateToken(data string) string
}
"""


In [13]:
print (find_declarations(src_dir, interface_dec))

Not Found


In [14]:
from io import StringIO
import sys
from typing import Dict, Optional
from langchain.chat_models import AzureChatOpenAI
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents.tools import Tool
from langchain.llms import OpenAI


class PythonREPL:
    """Simulates a standalone Python REPL."""

    def __init__(self):
        pass        

    def run(self, command: str) -> str:
        command = '\n'.join(line for line in command.split('\n') if not line.startswith('```'))
        """Run command and returns anything printed."""
        # sys.stderr.write("EXECUTING PYTHON CODE:\n---\n" + command + "\n---\n")
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()
        try:
            exec(command, globals())
            sys.stdout = old_stdout
            output = mystdout.getvalue()
        except Exception as e:
            sys.stdout = old_stdout
            output = str(e)
        # sys.stderr.write("PYTHON OUTPUT: \"" + output + "\"\n")
        return output
      
llm = AzureChatOpenAI(deployment_name=deployment, temperature=0.2, max_tokens = 600)
python_repl = Tool(
        "Python REPL",
        PythonREPL().run,
        """A Python shell. Use this to execute python commands. 
        Input should be a valid python command.
        If you expect output it should be printed out.
        For example: to verify the the following python function
        ---
        def add(a, b):
            return (a+b)
        ---
        we can invoke the tool with the input 
        "
        def add(a, b):
            return (a+b)
        print (add(1,2))
        
        "
        """,
    )
tools = [python_repl]
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True, handle_parsing_errors=True)

In [15]:
PythonREPL().run('print("Hello Python")')

'Hello Python\n'

In [17]:
prompt_1 = prompt + f"\n 给出回答前请用以下例子测试生成的函数:\n 函数输入：{src_dir}, {interface_dec}; 函数输出：{expected}"
print(agent.run(prompt_1))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThis problem requires me to create a Python function that can parse Golang source code and find the declaration of a specific struct or interface. The function needs to take in two parameters: the directory of the source files and a JSON string that specifies the package name and the struct/interface name. 

To solve this problem, I can use the os and json libraries in Python to navigate the file system and parse the JSON string. I can then use regular expressions to find the declaration in the Golang source code. 

Action: [Python REPL]
Action Input: 

```python
import os
import json
import re

def find_declarations(dir, json_str):
    # Parse the JSON string
    info = json.loads(json_str)
    package_name = info["package_name"]
    element = info["element"]

    # Traverse the directory
    for root, dirs, files in os.walk(dir):
        for file in files:
            # Only consider .go files
            if file.endswith("

In [9]:
import os
import json
import re

def find_declarations(dir, json_str):
    # Parse the JSON string
    info = json.loads(json_str)
    package_name = info["package_name"]
    element = info["element"]

    # Traverse the directory
    for root, dirs, files in os.walk(dir):
        for file in files:
            # Only consider .go files
            if file.endswith(".go"):
                with open(os.path.join(root, file), "r") as f:
                    content = f.read()

                    # Check if the file belongs to the specified package
                    package_match = re.search(r"package\s+" + package_name, content)
                    if package_match:
                        # Find the declaration of the struct/interface
                        element_match = re.search(r"(type\s+" + element + r"\s+(struct|interface)\s+{[^}]*})", content)
                        if element_match:
                            return element_match.group(1)

    return "Not Found"

In [18]:
print (find_declarations(src_dir, interface_dec))

type TokenCreator interface {
	CreateToken(data string) string
}


# 解决依赖问题

## 本地文件搜索

In [19]:
from langchain.agents import initialize_agent, Tool
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import LLMRequestsChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms import AzureOpenAI
import time
import os
import json
import re

src_dir = "./llm_friendly_code"

def find_declarations(directory, json_string):
    # Parse the JSON string
    json_data = json.loads(json_string)
    package_name = json_data["package_name"]
    element_name = json_data["element"]

    # Regular expression to match structure/interface declarations
    regex = re.compile(r"(type\s+" + element_name + r"\s+(struct|interface)\s+{\s+[^}]*\s+})")

    # Read the source files in the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".go"):
                with open(os.path.join(root, file), "r") as f:
                    content = f.read()
                    # Check if the file is in the correct package
                    if "package " + package_name in content:
                        # Search for the structure/interface declaration
                        match = regex.search(content)
                        if match:
                            return match.group(1)

    return "Not Found"

def get_dep(json_string):
    return find_declarations(src_dir, json_string)

llm = AzureChatOpenAI(deployment_name=deployment, temperature=0)

# 将“依赖获取”工具添加到 ReAct 过程中    
tools = [
    Tool(
        name="find_declarations", func=get_dep,
        description="""Only invoke it when you need to find the declaration of the interface/struct/method that is not in the given code. 
        The input is a JSON string includes package name and the struct/interface name, 
          and its the format {{"package_name":<package name>,"element":<struct/interface>}}
        """)
]


In [20]:
def write_code(input):
    agent = initialize_agent(tools, llm, agent="zero-shot-react-description", handle_parsing_errors=True, max_iterations=10, verbose=True)
    print(agent.run(input))

In [21]:
write_code(
"""
 You are a very senior golang programmer.
 Write the golang code to implement the demo.TokenCreator interface.
 The implementation is to generate the token with uuid and its package is demo
"""
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo implement the interface, I first need to know what methods the interface has. I will use the find_declarations tool to find the declaration of the demo.TokenCreator interface.

Action: find_declarations
Action Input: {"package_name":"demo","element":"TokenCreator"}[0m
Observation: [36;1m[1;3mtype TokenCreator interface {
	CreateToken(data string) string
}[0m
Thought:[32;1m[1;3mNow I know that the TokenCreator interface has a method called CreateToken which takes a string as input and returns a string. I will now write the implementation of this interface. The implementation will generate a token using uuid.

Final Answer: 
```go
package demo

import (
	"github.com/google/uuid"
)

type TokenCreatorImpl struct{}

func (t TokenCreatorImpl) CreateToken(data string) string {
	return uuid.New().String() + data
}
```[0m

[1m> Finished chain.[0m
```go
package demo

import (
	"github.com/google/uuid"
)

type TokenCreatorIm

In [22]:
def read_file(file_path):
    with open(file_path, 'r') as file:
        content = file.read()
    return content

In [23]:
def write_unit_tests(filepath):
    agent = initialize_agent(tools, llm, agent="zero-shot-react-description", handle_parsing_errors=True, max_iterations=10, verbose=True)
    return (agent.run("""
    You are a very senior golang programmer.
    Please, follow the instructions:
     1. consider on the edge cases and errors
     2. make sure the generated unit test code in the same package as the tested code.
    write an unit tests for the following code: \n ---""" + read_file(filepath)
     ))

In [24]:
print(write_unit_tests(src_dir+"/demo/skeleton.go"))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to write unit tests for the methods Process and Retrive of the ProcessingServiceImpl struct. To do this, I need to understand the interfaces Storage, Processor, and TokenCreator that are used in these methods. I will use the find_declarations tool to find the declarations of these interfaces. 
Action: find_declarations
Action Input: {"package_name":"demo","element":"Storage"}[0m
Observation: [36;1m[1;3mtype Storage interface {
	// RetiveData is to retrive the data by the associated token.
	RetiveData(token string) (string, error)

	// StoreData is to persist the data,
	// input paramters:
	//   token is used to retrive the associated data
	//
	StoreData(token string, data string) error
}[0m
Thought:[32;1m[1;3mI now know the Storage interface has two methods: RetiveData and StoreData. I will now find the declarations of the Processor and TokenCreator interfaces.
Action: find_declarations
Action Input: {"package_na

## 采用向量数据库保存遗留代码

In [25]:
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
loader = GenericLoader.from_filesystem(
    src_dir,
    glob="**/*",
    suffixes=[".go"],
    parser=LanguageParser(language=Language.GO, parser_threshold=500)
)
documents = loader.load()
print(len(documents))

from langchain.text_splitter import RecursiveCharacterTextSplitter
go_splitter = RecursiveCharacterTextSplitter.from_language(language=Language.GO, 
                                                               chunk_size=2000, 
                                                               chunk_overlap=200)
texts = go_splitter.split_documents(documents)
print(len(texts))

from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import AzureChatOpenAI
embedding = OpenAIEmbeddings(deployment="embedding",chunk_size=1)
persist_directory = 'codes/'
db = Chroma.from_documents(texts, embedding, persist_directory=persist_directory)
db.persist()

10
22


## 采用向量数据库实现

In [26]:
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain
llm = AzureChatOpenAI(deployment_name = "gpt4", model_name="gpt-4", temperature=0, max_tokens=1000)
memory = ConversationSummaryMemory(llm=llm,memory_key="chat_history",return_messages=True)
retriever = db.as_retriever(
    search_type="mmr",  # Also test "similarity"
    search_kwargs={"k": 8},
)
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
def find_declarations(directory, json_string):
    # Parse the JSON string
    json_data = json.loads(json_string)
    package_name = json_data["package_name"]
    element_name = json_data["element"]
    return qa(f"give me the declaration of {element_name} in package {package_name}")

In [27]:
ret = find_declarations(src_dir, interface_dec)
print(ret["answer"])

The declaration of TokenCreator in the demo package is an interface with a single method:

```go
type TokenCreator interface {
	CreateToken(data string) string
}
```

This interface defines a contract for any type that has a method `CreateToken` which takes a string as input and returns a string.


In [28]:
write_code(
"""
 You are a very senior golang programmer.
 Write the golang code to implement the demo.Storage interface.
 The implementation's package also is demo
"""
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mFirst, I need to know what the demo.Storage interface looks like in order to implement it. 
Action: find_declarations
Action Input: {"package_name":"demo","element":"Storage"} [0m
Observation: [36;1m[1;3m{'question': 'give me the declaration of Storage in package demo', 'chat_history': [SystemMessage(content='The human asks for the declaration of TokenCreator in the demo package. The AI explains that TokenCreator is an interface with a single method, `CreateToken`, which takes a string as input and returns a string. This interface defines a contract for any type that has a `CreateToken` method.', additional_kwargs={})], 'answer': 'Sure, here is the declaration of the Storage interface in the demo package:\n\n```go\ntype Storage interface {\n\t// RetiveData is to retrive the data by the associated token.\n\tRetiveData(token string) (string, error)\n\n\t// StoreData is to persist the data,\n\t// input paramters:\n\t//   toke