diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..171bc45 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,93 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This is a Python-based tool that converts Google Devsite documentation (from bazel.build/docs) into Hugo/Docsy format for easier navigation and modification. The converter transforms Devsite frontmatter, directory layout, and styling to be compatible with the Hugo static site generator and Docsy theme. + +## Core Commands + +### Running the Converter +```bash +# Basic conversion +python cli.py convert --source /path/to/devsite/source --output /path/to/hugo/output + +# With dry run validation +python cli.py convert --source /path/to/devsite/source --output /path/to/hugo/output --dry-run + +# Incremental conversion (only changed files) +python cli.py convert --source /path/to/devsite/source --output /path/to/hugo/output --incremental + +# View converter info +python cli.py info +``` + +### Environment Setup +```bash +# Install dependencies +pip install -r requirements.txt + +# Or using the project setup +pip install -e . +``` + +### Docker Usage +```bash +# Run the Docker container +docker run -it -p 1313:1313 alan707/bazel-docs:latest bash + +# Inside container: convert docs +python /app/cli.py convert --source /app/work/bazel-source/site/en/ --output /app/docs/ + +# Inside container: setup Hugo modules and run server +cd /app/docs +hugo mod init github.com/alan707/bazel-docs && \ + hugo mod get github.com/google/docsy@v0.12.0 && \ + hugo mod tidy +hugo server --bind 0.0.0.0 --baseURL "http://localhost:1313" +``` + +## Architecture + +### Core Components + +1. **CLI Interface (`cli.py`)**: Click-based command line interface with convert and info commands +2. **Main Converter (`devsite_to_hugo_converter.py`)**: Orchestrates the conversion process using parser and generator +3. **Devsite Parser (`utils/devsite_parser.py`)**: Parses Google Devsite structure, including `_book.yaml` and `_index.yaml` files +4. **Hugo Generator (`utils/hugo_generator.py`)**: Generates Hugo site structure and configuration using Jinja2 templates + +### Configuration System + +The `config.yaml` file controls all aspects of the conversion: + +- **Content Mapping**: Maps Devsite sections to Hugo categories (tutorials, how-to-guides, explanations, reference) +- **External Links**: Handles redirects to legacy Bazel API documentation +- **Code Language Detection**: Automatic language detection for code blocks using pattern matching +- **CSS Conversion**: Transforms CSS/SCSS for Docsy theme compatibility +- **File Patterns**: Controls which files are included/excluded during conversion + +### Template System + +Uses Jinja2 templates in the `templates/` directory: +- `hugo_config.yaml.jinja2`: Generates Hugo site configuration +- `section_index.jinja2`: Creates section index pages + +### Content Organization + +The converter maps Devsite sections to Hugo content types: +- Tutorials → tutorials category (weight 1-3) +- Install/Configure/Build guides → how-to-guides category +- Concepts/Extending → explanations category +- Reference materials → reference category + +## Development Notes + +### Code Language Detection +The system automatically detects programming languages for code blocks without explicit language identifiers using pattern matching defined in `config.yaml`. Supports Starlark (Bazel), Bash, Python, C++, Java, JavaScript, TypeScript, and more. + +### Link Conversion +The converter handles both internal link conversion within the Hugo site and external link redirection to maintain compatibility with existing Bazel API documentation. + +### CSS/SCSS Processing +PostCSS and Autoprefixer are used for CSS processing (see package.json dependencies), though the main conversion logic is in Python. \ No newline at end of file diff --git a/config.yaml b/config.yaml index c21dbe3..026b095 100644 --- a/config.yaml +++ b/config.yaml @@ -13,6 +13,16 @@ hugo: baseURL: "https://bazel-docs-68tmf.ondigitalocean.app/" languageCode: "en-us" theme: "docsy" + +# External links configuration +external_links: + # Base URL for legacy Bazel API documentation + bazel_api_base: "https://bazel.build" + # Paths that should be redirected to external API docs + external_paths: + - "/rules/" + - "/reference/" + - "/docs/build-ref" content_mapping: # set 'enable_category_indices' to true to generate _index.md files for categories diff --git a/devsite_to_hugo_converter.py b/devsite_to_hugo_converter.py index 18eead4..f5a5504 100644 --- a/devsite_to_hugo_converter.py +++ b/devsite_to_hugo_converter.py @@ -233,7 +233,7 @@ def _convert_single_file(self, source_file: Path, output_file: Path, dry_run: bo hugo_frontmatter['title'] = title_from_h1 # Convert body content - hugo_body = self._convert_body_content(body) + hugo_body = self._convert_body_content(body, source_file) # Remove duplicate H1 title if it matches frontmatter title if 'title' in hugo_frontmatter: @@ -311,7 +311,7 @@ def _convert_frontmatter(self, frontmatter: Dict) -> Dict: return hugo_frontmatter - def _convert_body_content(self, body: str) -> str: + def _convert_body_content(self, body: str, source_file: Path) -> str: """Convert Devsite-specific content to Hugo format""" # Remove [TOC] directive (let Docsy handle TOC automatically) body = re.sub(r'\[TOC\]', '', body) @@ -342,7 +342,7 @@ def _convert_body_content(self, body: str) -> str: body = body.strip() # Remove trailing whitespace # Convert internal links - body = self._convert_internal_links(body) + body = self._convert_internal_links(body, source_file) # Fix directory structure formatting body = self._fix_directory_structures(body) @@ -383,7 +383,7 @@ def _remove_duplicate_h1_title(self, body: str, return body - def _convert_internal_links(self, content: str) -> str: + def _convert_internal_links(self, content: str, source_file: Path) -> str: """Convert internal links to Hugo format""" # Pattern for markdown links - handle multi-line links link_pattern = r'\[([^\]]+)\]\(([^)]+)\)' @@ -404,19 +404,68 @@ def replace_link(match): if link_url.startswith('#'): return match.group(0) + # Handle external API links (absolute paths to external documentation) + if link_url.startswith('/') and self._should_redirect_to_external(link_url): + external_base = self.config.get('external_links', {}).get('bazel_api_base', 'https://bazel.build') + return f'[{link_text}]({external_base}{link_url})' + # Handle relative links to .md files - if link_url.endswith('.md'): + if link_url.endswith('.md') or '.md#' in link_url: + # Split URL and anchor + if '#' in link_url: + url_part, anchor_part = link_url.split('#', 1) + anchor = f'#{anchor_part}' + else: + url_part = link_url + anchor = '' + # Normalize the path - normalized_path = link_url.replace('.md', '') - # Remove leading './' if present - if normalized_path.startswith('./'): - normalized_path = normalized_path[2:] + normalized_path = url_part.replace('.md', '') + + # Handle relative paths by resolving against source file location + if not normalized_path.startswith('/'): + # Get the source file's directory relative to the source root + source_dir = source_file.parent + # Find the source root (work/bazel-source/site/en) + source_root = None + for parent in source_file.parents: + if parent.name == 'en' and parent.parent.name == 'site': + source_root = parent + break + + if source_root: + # Get relative directory from source root + rel_source_dir = source_dir.relative_to(source_root) + + # Remove leading './' if present + if normalized_path.startswith('./'): + normalized_path = normalized_path[2:] + + # Resolve relative path + if str(rel_source_dir) == '.': + # File is in root, just use the filename + full_path = normalized_path + else: + # Combine source directory with relative path + full_path = str(rel_source_dir / normalized_path) + + # Get category mapping for this path + path_parts = full_path.split('/') + if path_parts: + section_name = path_parts[0] + if section_name in self.config.get('content_mapping', {}): + mapping = self.config['content_mapping'][section_name] + category_type = mapping['type'] + return f'[{link_text}](/{category_type}/{full_path}/{anchor})' + else: + return f'[{link_text}]({full_path}/{anchor})' + # Remove leading '/' if present (absolute paths within site) if normalized_path.startswith('/'): normalized_path = normalized_path[1:] # Use simple relative links to avoid shortcode issues - return f'[{link_text}](/{normalized_path}/)' + return f'[{link_text}](/{normalized_path}/{anchor})' # Handle relative links to directories (assume they have index pages) if '/' in link_url and not '.' in link_url.split('/')[-1]: @@ -434,6 +483,11 @@ def replace_link(match): return re.sub(link_pattern, replace_link, content, flags=re.DOTALL) + def _should_redirect_to_external(self, link_url: str) -> bool: + """Check if a link should be redirected to external Bazel API docs""" + external_paths = self.config.get('external_links', {}).get('external_paths', []) + return any(link_url.startswith(path) for path in external_paths) + def _fix_directory_structures(self, content: str) -> str: """Fix directory structure formatting to use proper code blocks""" # Pattern to match directory structures with Unicode tree characters