diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..b5ccfe4 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,83 @@ +# AGENTS.md + +## Project Overview + +This is a simple Git mirror server written in Go that creates and serves read-only mirrors of Git repositories. The server uses Git's smart HTTP protocol via `git http-backend` to provide efficient repository access. + +### Key Features +- Creates read-only mirrors of Git repositories +- Automatically updates mirrors at configurable intervals +- Serves mirrors over HTTP using Git's smart protocol +- Supports both HTTPS and SSH origin repositories +- Docker support for easy deployment +- Counter-based multi-pack and bitmap index generation for improved performance +- Per-repository configuration for fetch intervals and index refresh frequencies + +### Architecture +The application consists of three main Go files: +1. `main.go` - Entry point, HTTP server setup, and background update processes +2. `config.go` - Configuration parsing from TOML files +3. `mirror.go` - Git mirror operations (clone, update, bitmap generation) + +## Contribution Guide for AI Agents + +### Code Style +- Follow existing Go conventions and formatting +- Use lowercase for package-private functions (only capitalize when needed across packages) +- Error messages should be clear and include relevant context +- Logging should be informative but not excessive + +### Testing +- Run `go test` to ensure existing tests pass +- Run `go vet` to check for code issues +- Run `gofmt` to ensure proper formatting +- Test Docker image build process + +### Common Commands +- `go build` - Build the application +- `go run main.go config.toml` - Run with a config file +- `go test` - Run tests +- `go vet ./...` - Check for potential issues +- `gofmt -s -l .` - Check code formatting + +### Git Workflow +1. Create a feature branch from main +2. Make focused changes for a single feature +3. Ensure code compiles and runs correctly +4. Update documentation as needed +5. Commit with clear, descriptive messages +6. Push and create a pull request + +### Docker Image Management +- Docker images are published to espressif/git-mirror on DockerHub +- Images are tagged with version numbers (e.g., v1.2.3) +- Major.minor and major version tags are also created (e.g., v1.2, v1) +- The latest tag is updated only on official releases +- Pre-release versions (with suffixes) only get the exact version tag + +### Configuration Changes +- Add new configuration options to the `config` struct in `config.go` +- Set appropriate defaults in the `parseConfig` function +- Update `example-config.toml` with documentation for new options +- Ensure backward compatibility with existing configurations + +### Error Handling +- Always check and handle errors appropriately +- Log errors with sufficient context for debugging +- Don't ignore errors from important operations like bitmap index generation +- Use `fmt.Errorf` with context when returning errors + +### Background Processes +- Mirror updates run in goroutines with configurable intervals +- Multi-pack index and bitmap index operations are counter-based, running after a specific number of fetches +- Each repo maintains its own fetch counter to trigger index refresh operations +- Multi-pack index refreshes are lightweight and run more frequently (default: 0, disabled) +- Bitmap index rebuilds involve full repacks and run less frequently (default: 0, disabled) +- Both operations can be disabled per-repo by setting their interval to 0 +- Ensure proper synchronization when accessing shared resources (counters use mutexes) +- Use semaphores or other synchronization primitives when limiting concurrent operations + +### Documentation Updates +- Update README.md when adding significant features +- Keep example configuration files up to date +- Document new command-line options or behaviors diff --git a/README.md b/README.md index c6d2499..88f631b 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Create `config.toml` similar to: Origin = "https://github.com/espressif/git-mirror-server.git" ``` -By default it will update the mirror every **1 minute** and will serve the mirror over HTTP using port **8080**. You can specify as many repos as you want by having multiple `[[repo]]` sections. +By default it will update the mirror every **15 minutes** and will serve the mirror over HTTP using port **8080**. You can specify as many repos as you want by having multiple `[[repo]]` sections. Run `git-mirror` with the path to the config file: @@ -62,9 +62,39 @@ services: restart: always ``` -## Advanced configuration +## Configuration Options -See [the example config](example-config.toml) for more advanced configurations. +### Global Settings + +- **`ListenAddr`** (string, default: `:8080`) - The address and port the web server listens on for serving mirrors +- **`Interval`** (duration, default: `15m`) - Default interval for updating mirrors; can be overridden per repository +- **`BasePath`** (string, default: `.`) - Base path for storing mirror data, can be absolute or relative +- **`MaxConcurrentConnections`** (int, default: `32`) - Limits the number of concurrent HTTP connections to prevent overload +- **`MultiPackIndexInterval`** (int, default: `0`) - Number of fetches after which to refresh the multi-pack index; Disabled by default (`0`) +- **`BitmapIndexInterval`** (int, default: `0`) - Number of fetches after which to rebuild the bitmap index with full repack; Disabled by default (`0`) + +### Repository Settings + +Each `[[Repo]]` section supports: + +- **`Origin`** (string, required) - The URL of the repository to mirror (supports HTTPS and SSH) +- **`Name`** (string, optional) - Custom name for accessing the mirror; auto-generated from Origin if not specified +- **`Interval`** (duration, optional) - Override the global update interval for this specific repository +- **`MultiPackIndexInterval`** (int, optional) - Override the global multi-pack index refresh interval for this repository +- **`BitmapIndexInterval`** (int, optional) - Override the global bitmap index rebuild interval for this repository + +### Performance Optimization + +The server uses two index refresh strategies: + +1. **Multi-pack index** - Lightweight operation that improves fetch performance without repacking. Runs more frequently (default: 0, disabled). +2. **Bitmap index** - Full repack with bitmap generation for maximum performance. More resource-intensive, runs less frequently (default: 0, disabled). + +Both can be disabled per repository by setting their interval to `0`, or customized based on repository size and usage patterns. + +### Example Configuration + +See [the example config](example-config.toml) for a complete configuration example. ## Authentication and authorization diff --git a/config.go b/config.go index 832f1aa..d20f58a 100644 --- a/config.go +++ b/config.go @@ -18,16 +18,19 @@ type duration struct { type config struct { ListenAddr string Interval duration - BitmapInterval duration + MultiPackIndexInterval int + BitmapIndexInterval int BasePath string MaxConcurrentConnections int Repo []repo } type repo struct { - Name string - Origin string - Interval duration + Name string + Origin string + Interval duration + MultiPackIndexInterval int + BitmapIndexInterval int } func (d *duration) UnmarshalText(text []byte) (err error) { @@ -52,10 +55,13 @@ func parseConfig(filename string) (cfg config, repos map[string]repo, err error) cfg.ListenAddr = ":8080" } if cfg.Interval.Duration == 0 { - cfg.Interval.Duration = time.Minute + cfg.Interval.Duration = 15 * time.Minute } - if cfg.BitmapInterval.Duration == 0 { - cfg.BitmapInterval.Duration = 10 * time.Hour + if cfg.MultiPackIndexInterval < 0 { + cfg.MultiPackIndexInterval = 0 + } + if cfg.BitmapIndexInterval < 0 { + cfg.BitmapIndexInterval = 0 } if cfg.BasePath == "" { cfg.BasePath = "." @@ -103,6 +109,12 @@ func parseConfig(filename string) (cfg config, repos map[string]repo, err error) if r.Interval.Duration == 0 { r.Interval.Duration = cfg.Interval.Duration } + if r.MultiPackIndexInterval < 0 { + r.MultiPackIndexInterval = cfg.MultiPackIndexInterval + } + if r.BitmapIndexInterval < 0 { + r.BitmapIndexInterval = cfg.BitmapIndexInterval + } repos[r.Name] = r } return diff --git a/example-config.toml b/example-config.toml index 5aa41a4..ad501ce 100644 --- a/example-config.toml +++ b/example-config.toml @@ -1,12 +1,19 @@ # ListenAddr is the address the web server listens on for serving the mirrors. # Defaults to :8080 ListenAddr = ":8080" + # Interval is the default interval for updating mirrors, can be overridden per -# repo. Defaults to 15 seconds. +# repo. Defaults to 15 minutes. Interval = "15m" -# BitmapInterval is the default interval for rebuilding git bitmaps. -# Defaults to 10 hours. It is a global setting only. -BitmapInterval = "10h" + +# MultiPackIndexInterval is the number of fetches after which to refresh the +# multi-pack index. Disabled by default (0). Can be overridden per repo. +MultiPackIndexInterval = 10 + +# BitmapIndexInterval is the number of fetches after which to rebuild the +# bitmap index with full repack. Disabled by default (0). Can be overridden per repo. +BitmapIndexInterval = 50 + # Base path for storing mirrors, absolute or relative. Defaults to "." BasePath = "/opt/git-mirror/data" @@ -21,9 +28,12 @@ MaxConcurrentConnections = 32 [[Repo]] Origin = "https://github.com/espressif/git-mirror-server.git" -# It is also possible to set custom names for accessing the repos. -# +# It is also possible to set custom names and custom intervals for accessing the repos. # Will be mirrored at http://localhost:8080/custom-name [[Repo]] Name = "custom-name" Origin = "git@github.com:toml-lang/toml.git" +# Optional: Override default intervals for this specific repo +# Interval = "10m" +# MultiPackIndexInterval = 5 +# BitmapIndexInterval = 25 diff --git a/main.go b/main.go index aedd77a..b970b99 100644 --- a/main.go +++ b/main.go @@ -25,6 +25,8 @@ func main() { } // Run background threads to keep mirrors up to date. + // Multi-pack index and bitmap index refreshes are now handled + // automatically within the mirror function based on fetch counts. for _, r := range repos { go func(r repo) { for { @@ -44,21 +46,6 @@ func main() { }(r) } - // Run full repack with bitmap generation once in a while - go func() { - for { - time.Sleep(cfg.BitmapInterval.Duration) - for _, r := range repos { - log.Printf("updating bitmap for %s", r.Name) - if err := refreshBitmapIndex(cfg, r); err != nil { - log.Printf("error updating bitmap for %s: %s", r.Name, err) - } else { - log.Printf("bitmap updated for %s", r.Name) - } - } - } - }() - // Set up git http-backend CGI handler gitBackend := &cgi.Handler{ Path: "/usr/bin/git", diff --git a/mirror.go b/mirror.go index db660e3..68228df 100644 --- a/mirror.go +++ b/mirror.go @@ -6,11 +6,30 @@ import ( "os" "os/exec" "path" + "sync" ) +// Counter tracks fetch counts for each repo +type repoCounter struct { + mu sync.Mutex + fetchCount uint64 +} + +var repoCounters = make(map[string]*repoCounter) +var repoCountersMu sync.Mutex + func mirror(cfg config, r repo) (string, error) { repoPath := path.Join(cfg.BasePath, r.Name) outStr := "" + + // Initialize counter for this repo if it doesn't exist + repoCountersMu.Lock() + if repoCounters[r.Name] == nil { + repoCounters[r.Name] = &repoCounter{} + } + counter := repoCounters[r.Name] + repoCountersMu.Unlock() + if _, err := os.Stat(repoPath); err == nil { // Directory exists, update. cmd := exec.Command("git", "remote", "update", "--prune") @@ -20,11 +39,7 @@ func mirror(cfg config, r repo) (string, error) { if err != nil { return "", fmt.Errorf("failed to update remote in %s: %w", repoPath, err) } - if err := refreshMultiPackIndex(cfg, r); err != nil { - log.Printf("error refreshing multi-pack index for %s: %s", r.Name, err) - } else { - log.Printf("successfully refreshed multi-pack index for %s", r.Name) - } + } else if os.IsNotExist(err) { // Clone parent := path.Dir(repoPath) @@ -37,15 +52,34 @@ func mirror(cfg config, r repo) (string, error) { if err != nil { return "", fmt.Errorf("failed to clone %s: %w", r.Origin, err) } + return string(out), err + } else { + return "", fmt.Errorf("failed to stat %s, %s", repoPath, err) + } + + // Check if we need to run multi-pack index + if r.MultiPackIndexInterval > 0 && counter.fetchCount%uint64(r.MultiPackIndexInterval) == 0 { + if err := refreshMultiPackIndex(cfg, r); err != nil { + log.Printf("error refreshing multi-pack index for %s: %s", r.Name, err) + } else { + log.Printf("successfully refreshed multi-pack index for %s (fetch #%d)", r.Name, counter.fetchCount) + } + } + + // Check if we need to run bitmap index + if r.BitmapIndexInterval > 0 && counter.fetchCount%uint64(r.BitmapIndexInterval) == 0 { if err := refreshBitmapIndex(cfg, r); err != nil { log.Printf("error refreshing bitmap index for %s: %s", r.Name, err) } else { - log.Printf("successfully refreshed bitmap index for %s", r.Name) + log.Printf("successfully refreshed bitmap index for %s (fetch #%d)", r.Name, counter.fetchCount) } - return string(out), err - } else { - return "", fmt.Errorf("failed to stat %s, %s", repoPath, err) } + + // Increment fetch counter (only on successful fetch) + counter.mu.Lock() + counter.fetchCount++ + counter.mu.Unlock() + return outStr, nil }