In [2]:
import os
from pathlib import Path
from typing import Protocol, List, Dict, Any
from dotenv import load_dotenv

In [21]:
from langchain_core.tools import tool

@tool
def list_directory(path: str) -> Dict[str, List[str]]:
    """path의 directories와 files를 전체 경로로 리스트로 반환"""
    dirs, files = [], []
    with os.scandir(path) as it:
        for entry in it:
            if entry.is_dir():
                dirs.append(os.path.abspath(entry.path))
            elif entry.is_file():
                files.append(os.path.abspath(entry.path))
    return {"directories": dirs, "files": files}

@tool
def create_directory(path: str) -> bool:
    """path에 directory 생성. 이미 있으면 True, 없으면 생성 후 True, 실패시 False"""
    try:
        os.makedirs(path, exist_ok=True)
        return True
    except Exception:
        return False

@tool
def read_files(path: str) -> Dict[str, str]:
    """path의 모든 파일을 읽어서 {파일명: 내용} dict로 반환 (디렉토리면 무시)"""
    result = {}
    for entry in os.scandir(path):
        if entry.is_file():
            with open(entry.path, "r", encoding="utf-8") as f:
                result[entry.name] = f.read()
    return result

@tool
def read_file(path: str) -> str:
    """path에서 파일을 찾아서 내용을 반환"""
    with open(path, "r", encoding="utf-8") as f:
        return f.read()

@tool
def write_file(path: str, content: str) -> bool:
    """path에 파일을 생성(또는 덮어씀). 성공시 True, 실패시 False"""
    try:
        with open(path, "w", encoding="utf-8") as f:
            f.write(content)
        return True
    except Exception:
        return False

tools = [list_directory, create_directory, read_files, write_file, read_file]

In [17]:
result = list_directory.invoke("../llm0707")
result

{'directories': ['/Users/1110444/edu/llm0707/.langgraph_api',
  '/Users/1110444/edu/llm0707/data copy',
  '/Users/1110444/edu/llm0707/langchain_qdrant',
  '/Users/1110444/edu/llm0707/.venv',
  '/Users/1110444/edu/llm0707/chroma_db',
  '/Users/1110444/edu/llm0707/.git',
  '/Users/1110444/edu/llm0707/data',
  '/Users/1110444/edu/llm0707/src'],
 'files': ['/Users/1110444/edu/llm0707/llms.txt',
  '/Users/1110444/edu/llm0707/gradio_rag_app.py',
  '/Users/1110444/edu/llm0707/README copy.md',
  '/Users/1110444/edu/llm0707/DAY02_002_Prompt_Engineering_CoT.ipynb',
  '/Users/1110444/edu/llm0707/DAY02_005_Housing_FAQ_Bot.ipynb',
  '/Users/1110444/edu/llm0707/DAY05_008_LangGraph_ReAct.ipynb',
  '/Users/1110444/edu/llm0707/DAY05_001_LangGraph_StateGraph.ipynb',
  '/Users/1110444/edu/llm0707/DAY05_006_ETF_Recommendation.ipynb',
  '/Users/1110444/edu/llm0707/.DS_Store',
  '/Users/1110444/edu/llm0707/DAY06_006_LangGraph_SelfRAG.ipynb',
  '/Users/1110444/edu/llm0707/DAY05_004_ETF_Text2SQL_RAG.ipynb',
 

In [None]:
read_file.invoke(result["files"][8])

'import os\nfrom pathlib import Path\nfrom typing import Protocol, List, Dict, Any\nfrom dotenv import load_dotenv\n\n# LLM Backend 추상화\nclass LLMBackend(Protocol):\n    def generate(self, prompt: str) -> str:\n        ...\n\n# OpenAI Backend 구현\nclass OpenAIBackend:\n    def __init__(self, api_key: str, model: str = "gpt-3.5-turbo"):\n        import openai\n        self.openai = openai\n        self.api_key = api_key\n        self.model = model\n        self.openai.api_key = api_key\n\n    def generate(self, prompt: str) -> str:\n        response = self.openai.chat.completions.create(\n            model=self.model,\n            messages=[{"role": "user", "content": prompt}],\n            max_tokens=1024,\n            temperature=0.7,\n        )\n        content = response.choices[0].message.content\n        return content.strip() if content else ""\n\n# 파일 시스템 도구들\n\ndef list_directory(path: str) -> Dict[str, List[str]]:\n    """path의 directories와 files를 전체 경로로 리스트로 반환"""\n    dirs, f

In [16]:
read_files.invoke(".")

{'uv.lock': 'version = 1\nrevision = 2\nrequires-python = ">=3.9, <3.13"\nresolution-markers = [\n    "python_full_version >= \'3.12\'",\n    "python_full_version == \'3.11.*\'",\n    "python_full_version == \'3.10.*\'",\n    "python_full_version < \'3.10\'",\n]\n\n[[package]]\nname = "annotated-types"\nversion = "0.7.0"\nsource = { registry = "https://pypi.org/simple" }\nsdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }\nwheels = [\n    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },\n]\n\n[[package]]\n

In [None]:
# request = {
#     "jsonrpc": "2.0",
#     "id": 2,
#     "method": "call_tool",
#     "params": {
#         "name": "list_directory",
#         "arguments": {
#             "path": "/Users/1110444/edu/"
#         }
#     }
# }

# # JSON 메시지와 헤더 조립
# payload = json.dumps(request)
# message = f"Content-Length: {len(payload)}\r\n\r\n{payload}"

# # 전송
# proc.stdin.write(message + '\n\n')
# proc.stdin.flush()

# # 응답 읽기
# # def read_response(proc):
# #     # 먼저 헤더 읽기
# #     headers = {}
# #     while True:
# #         line = proc.stdout.readline()
# #         if line == "\r\n" or line == "\n":
# #             break
# #         key, value = line.strip().split(": ", 1)
# #         headers[key.lower()] = value

# #     # 본문 길이만큼 읽기
# #     content_length = int(headers["content-length"])
# #     body = proc.stdout.read(content_length)
# #     return json.loads(body)

# # response = read_response(proc)
# # print("MCP 응답:", response)

# def read_exact(proc_stdout, n):
#     buf = ''
#     while len(buf) < n:
#         chunk = proc_stdout.read(n - len(buf))
#         if not chunk:
#             raise RuntimeError("EOF reached before reading full content")
#         buf += chunk
#     return buf

# def read_response(proc):
#     # 1. 헤더 파싱
#     headers = {}
#     # while True:
#         # line = proc.stdout.readline()
#         # print(line)
#         # if line in ("\r\n", "\n", ""):
#         #     break
#         # key, value = line.strip().split(": ", 1)
#         # headers[key.lower()] = value

#     # 2. Content-Length 기준 본문 읽기
#     # content_length = int(headers["content-length"])
#     # body = read_exact(proc.stdout, content_length)
#     return json.loads(proc.stdout.readline())

# response = read_response(proc)
# print("MCP 응답:", response)
# response

MCP 응답: {'jsonrpc': '2.0', 'id': 2, 'error': {'code': -32601, 'message': 'Method not found'}}


{'jsonrpc': '2.0',
 'id': 2,
 'error': {'code': -32601, 'message': 'Method not found'}}

In [None]:
from langchain_openai import ChatOpenAI

# 모델 생성
model = ChatOpenAI(model="gpt-4.1-mini")

# 모델에 도구 등록 
model_with_tools = model.bind_tools(tools)

# 사용자 쿼리를 입력하여 ToolCall 생성
response = model_with_tools.invoke("현재 디렉토리에 있는 파일을 모두 출력하세요.")

In [19]:
list_directory.invoke(response.tool_calls[0]['args'])

{'directories': ['.venv', '.git'],
 'files': ['uv.lock',
  'Dockerfile',
  'pyproject.toml',
  'stdio.ipynb',
  'README.md',
  '.gitignore',
  '.env',
  '.python-version',
  'main.py',
  'test.ipynb']}

In [None]:
response.pretty_print()
tools = [list_directory, create_directory, read_files, write_file, read_file]

Tool Calls:
  list_directory (call_jsv2fl5eUaCF2hEHtLNaCgPE)
 Call ID: call_jsv2fl5eUaCF2hEHtLNaCgPE
  Args:


In [18]:
system_prompt = """
# LLM System Prompt for README & llms.txt Generation

## System Prompt

You are an expert technical writer specializing in creating comprehensive documentation for software projects. Your primary task is to generate high-quality README files and llms.txt files that serve as the foundation for project understanding and LLM integration.

### Core Responsibilities

1. **README Generation**: Create clear, comprehensive, and well-structured README files that follow industry best practices
2. **llms.txt Creation**: Generate structured documentation files optimized for LLM consumption and understanding
3. **Documentation Standards**: Ensure all generated content follows markdown conventions and accessibility guidelines

### README Generation Guidelines

#### Structure Requirements
- **Project Title**: Clear, descriptive project name
- **Description**: Concise overview of what the project does and why it exists
- **Installation**: Step-by-step setup instructions
- **Usage**: Basic usage examples with code snippets
- **Features**: Key functionality and capabilities
- **API Documentation**: If applicable, include endpoint descriptions
- **Contributing**: Guidelines for contribution
- **License**: License information
- **Changelog**: Version history and updates

#### Content Standards
- Use clear, concise language accessible to both technical and non-technical users
- Include practical code examples that users can copy and run
- Provide troubleshooting sections for common issues
- Add badges for build status, version, license, etc.
- Include screenshots or diagrams when helpful
- Ensure all links are working and properly formatted

#### Technical Writing Best Practices
- Use active voice and present tense
- Break up long sections with subheadings
- Include table of contents for longer documents
- Use consistent formatting throughout
- Provide context for technical terms
- Include prerequisites and dependencies

### llms.txt Generation Guidelines

#### Purpose and Format
The llms.txt file should serve as a comprehensive knowledge base for LLM consumption, following the llms.txt standard format.

#### Structure Requirements
```
# Project Name

## Overview
[Comprehensive project description optimized for LLM understanding]

## Architecture
[Technical architecture and design patterns]

## API Reference
[Complete API documentation with examples]

## Code Examples
[Practical code snippets and usage patterns]

## Configuration
[Configuration options and environment setup]

## Troubleshooting
[Common issues and solutions]

## Development
[Development workflow and contribution guidelines]
```

#### Content Optimization for LLMs
- Use structured headings and consistent formatting
- Include comprehensive context for all concepts
- Provide complete code examples with explanations
- Use clear, unambiguous language
- Include error handling and edge cases
- Add metadata about dependencies and versions
- Structure information hierarchically

### Quality Assurance Checklist

Before finalizing documentation:
- [ ] All code examples are tested and functional
- [ ] Links are verified and working
- [ ] Spelling and grammar are correct
- [ ] Formatting is consistent throughout
- [ ] Information is up-to-date and accurate
- [ ] Documentation serves both human and LLM readers effectively

### Input Processing Instructions

When given a project or codebase:
1. **Analyze**: Understand the project's purpose, architecture, and key features
2. **Structure**: Organize information logically and hierarchically
3. **Generate**: Create both README.md and llms.txt files
4. **Optimize**: Ensure content is optimized for both human readability and LLM consumption
5. **Validate**: Review for completeness and accuracy

### Output Format

Always provide:
1. **README.md**: Human-friendly documentation following markdown standards
2. **llms.txt**: LLM-optimized documentation following the llms.txt format
3. **Summary**: Brief explanation of the generated documentation structure

### Additional Considerations

- Adapt tone and technical depth based on the project's target audience
- Include relevant badges and shields for professional appearance
- Ensure accessibility compliance (alt text for images, proper heading hierarchy)
- Consider internationalization if the project has global reach
- Include performance benchmarks or metrics when relevant
- Add security considerations and best practices when applicable

Remember: Great documentation is not just about what you include, but how you present it. Prioritize clarity, completeness, and usability for both human developers and AI systems.
"""


In [23]:
from langgraph.prebuilt import create_react_agent
from langchain_openai import ChatOpenAI


# Agent 생성
model = ChatOpenAI(model="gpt-4.1-mini")

agent = create_react_agent(model, tools)
agent.invoke({
    "messages": [
         {"role": "system", "content": system_prompt},
        {"role": "user", "content": "'/Users/1110444/edu/etf-bot' 프로젝트의 파일을 확인해서 README.md 파일과 llms.txt 파일을 생성하세요."}
    ]
})

{'messages': [SystemMessage(content="\n# LLM System Prompt for README & llms.txt Generation\n\n## System Prompt\n\nYou are an expert technical writer specializing in creating comprehensive documentation for software projects. Your primary task is to generate high-quality README files and llms.txt files that serve as the foundation for project understanding and LLM integration.\n\n### Core Responsibilities\n\n1. **README Generation**: Create clear, comprehensive, and well-structured README files that follow industry best practices\n2. **llms.txt Creation**: Generate structured documentation files optimized for LLM consumption and understanding\n3. **Documentation Standards**: Ensure all generated content follows markdown conventions and accessibility guidelines\n\n### README Generation Guidelines\n\n#### Structure Requirements\n- **Project Title**: Clear, descriptive project name\n- **Description**: Concise overview of what the project does and why it exists\n- **Installation**: Step-by

In [4]:

def read_all_files_relative(project_path: str) -> dict:
    """
    Recursively read all files under project_path and return a dict {relative_path: content}.
    relative_path is relative to project_path.
    """
    result = {}
    for root, _, files in os.walk(project_path):
        for file in files:
            abs_path = os.path.join(root, file)
            rel_path = os.path.relpath(abs_path, project_path)
            try:
                with open(abs_path, "r", encoding="utf-8") as f:
                    result[rel_path] = f.read()
            except Exception:
                # 바이너리/읽기불가 파일은 건너뜀
                continue
    return result

files = read_all_files_relative('/Users/1110444/edu/etf-bot')

In [None]:
llm = HuggingFaceBackend(model_name="microsoft/Phi-3-mini-4k-instruct", device="cpu")

In [None]:
summaries = {}
    for rel_path, code in files.items():
        prompt = f"Summarize the following code file ({rel_path}):\n\n{code}"
        summary = llm.generate(prompt)
        summaries[rel_path] = summary
        print(f"===== {rel_path} =====\n{summary}\n")
    
    print(summaries)