diff --git a/README.md b/README.md index 3ed57bb..54455de 100644 --- a/README.md +++ b/README.md @@ -186,6 +186,39 @@ Calibrated on Windows Vista development data showing 4% weekly code churn. A PR **Reference**: Nagappan, N., et al. (2008). Organizational Structure and Software Quality. *ICSE '08*. +### 6. PR Tracking Overhead: Empirical Organizational Studies + +Models the cost of managing and triaging open PR backlogs. This captures planning and coordination overhead, **excluding actual code review time** (counted separately in future review costs). Based on research showing developers spend significant time on PR discovery, triage, and project management activities beyond reviewing code. + +**Formula**: +``` +tracking_hours_per_day = openPRs × log₂(activeContributors + 1) × 0.005 +``` + +**Components**: +- **Linear with PR count**: More open PRs require more organizational scanning/triage overhead +- **Logarithmic with team size**: Larger teams develop specialization, tooling, and distributed ownership that reduce per-capita burden +- **Constant (0.005)**: Calibrated to ~20 seconds per PR per week of planning/coordination time, excluding actual review + +**Validation Examples**: +- 20 PRs, 5 contributors: ~15 min/day total (3 min/person/day) +- 200 PRs, 50 contributors: ~6 hours/day total (7 min/person/day) +- 1000 PRs, 100 contributors: ~33 hours/day total (20 min/person/day) + +**Activities Captured** (non-review overhead only): +- Sprint/milestone planning discussions about open PRs +- Daily standup mentions and status coordination +- Searching for duplicate work before starting new PRs +- Identifying related PRs that may conflict or depend on each other +- Quarterly/monthly mass triage of stale PRs +- Project/product management tracking of feature delivery +- Estimating and re-prioritizing work based on open PR backlog + +**References**: +- Bacchelli, A., & Bird, C. (2013). Expectations, Outcomes, and Challenges of Modern Code Review. *ICSE '13*. +- Rigby, P. C., & Bird, C. (2013). Convergent Contemporary Software Peer Review Practices. *FSE '13*. +- Uwano, H., et al. (2006). Analyzing Individual Performance of Source Code Review Using Reviewers' Eye Movement. *ETRA '06*. + ## Model Limitations **Individual Estimates**: High variance (CV > 1.0) due to developer and task heterogeneity. diff --git a/cmd/prcost/main.go b/cmd/prcost/main.go index ab4bb3f..553e6a9 100644 --- a/cmd/prcost/main.go +++ b/cmd/prcost/main.go @@ -530,6 +530,23 @@ func mergeVelocityGrade(avgOpenDays float64) (grade, message string) { } } +// mergeRateGrade returns a grade based on merge success rate percentage. +// A: >90%, B: >80%, C: >70%, D: >60%, F: ≤60%. +func mergeRateGrade(mergeRatePct float64) (grade, message string) { + switch { + case mergeRatePct > 90: + return "A", "Excellent" + case mergeRatePct > 80: + return "B", "Good" + case mergeRatePct > 70: + return "C", "Acceptable" + case mergeRatePct > 60: + return "D", "Low" + default: + return "F", "Poor" + } +} + // printMergeTimeModelingCallout prints a callout showing potential savings from reduced merge time. func printMergeTimeModelingCallout(breakdown *cost.Breakdown, cfg cost.Config) { targetHours := cfg.TargetMergeTimeHours @@ -595,12 +612,12 @@ func printMergeTimeModelingCallout(breakdown *cost.Breakdown, cfg cost.Config) { fmt.Println(" ┌─────────────────────────────────────────────────────────────┐") fmt.Printf(" │ %-60s│\n", "MERGE TIME MODELING") fmt.Println(" └─────────────────────────────────────────────────────────────┘") - fmt.Printf(" If you lowered your average merge time to %s, you would save\n", formatTimeUnit(targetHours)) - fmt.Printf(" ~$%s/yr in engineering overhead", formatWithCommas(annualSavings)) if efficiencyDelta > 0 { - fmt.Printf(" (+%.1f%% throughput).\n", efficiencyDelta) + fmt.Printf(" Reduce merge time to %s to boost team throughput by %.1f%%\n", formatTimeUnit(targetHours), efficiencyDelta) + fmt.Printf(" and save ~$%s/yr in engineering overhead.\n", formatWithCommas(annualSavings)) } else { - fmt.Println(".") + fmt.Printf(" If you lowered your average merge time to %s, you would save\n", formatTimeUnit(targetHours)) + fmt.Printf(" ~$%s/yr in engineering overhead.\n", formatWithCommas(annualSavings)) } fmt.Println() } diff --git a/cmd/prcost/repository.go b/cmd/prcost/repository.go index 522aa42..9cb8fa0 100644 --- a/cmd/prcost/repository.go +++ b/cmd/prcost/repository.go @@ -99,8 +99,17 @@ func analyzeRepository(ctx context.Context, owner, repo string, sampleSize, days openPRCount = 0 } + // Convert PRSummary to PRMergeStatus for merge rate calculation + prStatuses := make([]cost.PRMergeStatus, len(prs)) + for i, pr := range prs { + prStatuses[i] = cost.PRMergeStatus{ + Merged: pr.Merged, + State: pr.State, + } + } + // Extrapolate costs from samples using library function - extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, openPRCount, actualDays, cfg) + extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, openPRCount, actualDays, cfg, prStatuses) // Display results in itemized format printExtrapolatedResults(fmt.Sprintf("%s/%s", owner, repo), actualDays, &extrapolated, cfg) @@ -199,8 +208,17 @@ func analyzeOrganization(ctx context.Context, org string, sampleSize, days int, } slog.Info("Counted total open PRs across organization", "org", org, "open_prs", totalOpenPRs) + // Convert PRSummary to PRMergeStatus for merge rate calculation + prStatuses := make([]cost.PRMergeStatus, len(prs)) + for i, pr := range prs { + prStatuses[i] = cost.PRMergeStatus{ + Merged: pr.Merged, + State: pr.State, + } + } + // Extrapolate costs from samples using library function - extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, totalOpenPRs, actualDays, cfg) + extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, totalOpenPRs, actualDays, cfg, prStatuses) // Display results in itemized format printExtrapolatedResults(fmt.Sprintf("%s (organization)", org), actualDays, &extrapolated, cfg) @@ -656,6 +674,18 @@ func printExtrapolatedEfficiency(ext *cost.ExtrapolatedBreakdown, days int, cfg fmt.Printf(" │ %-60s│\n", velocityHeader) fmt.Println(" └─────────────────────────────────────────────────────────────┘") + // Merge Rate box (if data available) + if ext.MergedPRs+ext.UnmergedPRs > 0 { + mergeRateGradeStr, mergeRateMessage := mergeRateGrade(ext.MergeRate) + fmt.Println(" ┌─────────────────────────────────────────────────────────────┐") + mergeRateHeader := fmt.Sprintf("MERGE RATE: %s (%.1f%%) - %s", mergeRateGradeStr, ext.MergeRate, mergeRateMessage) + if len(mergeRateHeader) > innerWidth { + mergeRateHeader = mergeRateHeader[:innerWidth] + } + fmt.Printf(" │ %-60s│\n", mergeRateHeader) + fmt.Println(" └─────────────────────────────────────────────────────────────┘") + } + // Weekly waste per PR author if ext.WasteHoursPerAuthorPerWeek > 0 && ext.TotalAuthors > 0 { fmt.Printf(" Weekly waste per PR author: $%14s %s (%d authors)\n", @@ -738,12 +768,12 @@ func printExtrapolatedMergeTimeModelingCallout(ext *cost.ExtrapolatedBreakdown, fmt.Println(" ┌─────────────────────────────────────────────────────────────┐") fmt.Printf(" │ %-60s│\n", "MERGE TIME MODELING") fmt.Println(" └─────────────────────────────────────────────────────────────┘") - fmt.Printf(" If you lowered your average merge time to %s, you would save\n", formatTimeUnit(targetHours)) - fmt.Printf(" ~$%s/yr in engineering overhead", formatWithCommas(annualSavings)) if efficiencyDelta > 0 { - fmt.Printf(" (+%.1f%% throughput).\n", efficiencyDelta) + fmt.Printf(" Reduce merge time to %s to boost team throughput by %.1f%%\n", formatTimeUnit(targetHours), efficiencyDelta) + fmt.Printf(" and save ~$%s/yr in engineering overhead.\n", formatWithCommas(annualSavings)) } else { - fmt.Println(".") + fmt.Printf(" If you lowered your average merge time to %s, you would save\n", formatTimeUnit(targetHours)) + fmt.Printf(" ~$%s/yr in engineering overhead.\n", formatWithCommas(annualSavings)) } fmt.Println() } diff --git a/internal/server/server.go b/internal/server/server.go index 49947e8..c7a7841 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -1659,8 +1659,17 @@ func (s *Server) processRepoSample(ctx context.Context, req *RepoSampleRequest, openPRCount = 0 } + // Convert PRSummary to PRMergeStatus for merge rate calculation + prStatuses := make([]cost.PRMergeStatus, len(prs)) + for i, pr := range prs { + prStatuses[i] = cost.PRMergeStatus{ + Merged: pr.Merged, + State: pr.State, + } + } + // Extrapolate costs from samples - extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, openPRCount, actualDays, cfg) + extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, openPRCount, actualDays, cfg, prStatuses) // Only include seconds_in_state if we have data (turnserver only) var secondsInState map[string]int @@ -1779,8 +1788,17 @@ func (s *Server) processOrgSample(ctx context.Context, req *OrgSampleRequest, to } s.logger.InfoContext(ctx, "Counted total open PRs across organization", "org", req.Org, "open_prs", totalOpenPRs) + // Convert PRSummary to PRMergeStatus for merge rate calculation + prStatuses := make([]cost.PRMergeStatus, len(prs)) + for i, pr := range prs { + prStatuses[i] = cost.PRMergeStatus{ + Merged: pr.Merged, + State: pr.State, + } + } + // Extrapolate costs from samples - extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, totalOpenPRs, actualDays, cfg) + extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, totalOpenPRs, actualDays, cfg, prStatuses) // Only include seconds_in_state if we have data (turnserver only) var secondsInState map[string]int @@ -2176,8 +2194,17 @@ func (s *Server) processRepoSampleWithProgress(ctx context.Context, req *RepoSam openPRCount = 0 } + // Convert PRSummary to PRMergeStatus for merge rate calculation + prStatuses := make([]cost.PRMergeStatus, len(prs)) + for i, pr := range prs { + prStatuses[i] = cost.PRMergeStatus{ + Merged: pr.Merged, + State: pr.State, + } + } + // Extrapolate costs from samples - extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, openPRCount, actualDays, cfg) + extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, openPRCount, actualDays, cfg, prStatuses) // Only include seconds_in_state if we have data (turnserver only) var secondsInState map[string]int @@ -2326,8 +2353,17 @@ func (s *Server) processOrgSampleWithProgress(ctx context.Context, req *OrgSampl } s.logger.InfoContext(ctx, "Counted total open PRs across organization", "open_prs", totalOpenPRs, "org", req.Org) + // Convert PRSummary to PRMergeStatus for merge rate calculation + prStatuses := make([]cost.PRMergeStatus, len(prs)) + for i, pr := range prs { + prStatuses[i] = cost.PRMergeStatus{ + Merged: pr.Merged, + State: pr.State, + } + } + // Extrapolate costs from samples - extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, totalOpenPRs, actualDays, cfg) + extrapolated := cost.ExtrapolateFromSamples(breakdowns, len(prs), totalAuthors, totalOpenPRs, actualDays, cfg, prStatuses) // Only include seconds_in_state if we have data (turnserver only) var secondsInState map[string]int diff --git a/internal/server/static/formatR2RCallout.js b/internal/server/static/formatR2RCallout.js index 76ad6db..4e7f6a7 100644 --- a/internal/server/static/formatR2RCallout.js +++ b/internal/server/static/formatR2RCallout.js @@ -16,16 +16,12 @@ function formatR2RCallout(avgOpenHours, r2rSavings, currentEfficiency, modeledEf } const efficiencyDelta = modeledEfficiency - currentEfficiency; - let throughputText = ''; - if (efficiencyDelta > 0) { - throughputText = ' (+' + efficiencyDelta.toFixed(1) + '% throughput)'; - } // Format target merge time let targetText = targetMergeHours.toFixed(1) + 'h'; - let html = '
'; - html += 'Pro-Tip: Save ' + savingsText + '/yr in lost development effort by reducing merge times to <' + targetText + ' with '; + let html = '
'; + html += '\uD83D\uDCA1 Pro-Tip: Boost team throughput by ' + efficiencyDelta.toFixed(1) + '% and save ' + savingsText + '/yr by reducing merge times to <' + targetText + ' with '; html += 'Ready to Review. '; html += 'Free for open-source repositories, $6/user/org for private repos.'; html += '
'; diff --git a/internal/server/static/formatR2RCallout.test.js b/internal/server/static/formatR2RCallout.test.js index 7df0034..4d08bee 100644 --- a/internal/server/static/formatR2RCallout.test.js +++ b/internal/server/static/formatR2RCallout.test.js @@ -32,10 +32,13 @@ test('Renders callout for PRs with avgOpenHours > 1.5 (default)', () => { assert(result.length > 0, 'Should return non-empty HTML'); }); -// Test 3: Should contain "Pro-Tip:" text -test('Contains "Pro-Tip:" text', () => { +// Test 3: Should contain "Pro-Tip:" text and throughput boost +test('Contains "Pro-Tip:" text and throughput boost', () => { const result = formatR2RCallout(10, 50000, 60, 70); + assert(result.includes('💡'), 'Should contain lightbulb emoji'); assert(result.includes('Pro-Tip:'), 'Should contain "Pro-Tip:"'); + assert(result.includes('Boost team throughput by'), 'Should contain throughput boost message'); + assert(result.includes('10.0%'), 'Should show efficiency delta of 10% (70 - 60)'); }); // Test 4: Should contain "Ready to Review" link diff --git a/internal/server/static/index.html b/internal/server/static/index.html index 0e9dd30..1b73733 100644 --- a/internal/server/static/index.html +++ b/internal/server/static/index.html @@ -1410,7 +1410,21 @@

Why calculate PR costs?

} } - function formatEfficiencyHTML(efficiencyPct, grade, message, preventableCost, preventableHours, totalCost, totalHours, avgOpenHours, isAnnual = false, annualWasteCost = 0, annualWasteHours = 0, wasteHoursPerWeek = 0, wasteCostPerWeek = 0, wasteHoursPerAuthorPerWeek = 0, wasteCostPerAuthorPerWeek = 0, totalAuthors = 0, salary = 250000, benefitsMultiplier = 1.2, analysisType = 'project', sourceName = '') { + function mergeRateGrade(mergeRatePct) { + if (mergeRatePct > 90) { + return { grade: 'A', message: 'Excellent success rate' }; + } else if (mergeRatePct > 80) { + return { grade: 'B', message: 'Good success rate' }; + } else if (mergeRatePct > 70) { + return { grade: 'C', message: 'Acceptable success rate' }; + } else if (mergeRatePct > 60) { + return { grade: 'D', message: 'Low success rate' }; + } else { + return { grade: 'F', message: 'Poor success rate' }; + } + } + + function formatEfficiencyHTML(efficiencyPct, grade, message, preventableCost, preventableHours, totalCost, totalHours, avgOpenHours, isAnnual = false, annualWasteCost = 0, annualWasteHours = 0, wasteHoursPerWeek = 0, wasteCostPerWeek = 0, wasteHoursPerAuthorPerWeek = 0, wasteCostPerAuthorPerWeek = 0, totalAuthors = 0, salary = 250000, benefitsMultiplier = 1.2, analysisType = 'project', sourceName = '', mergeRate = 0, mergedPRs = 0, unmergedPRs = 0) { let html = '
'; // Development Efficiency box @@ -1435,6 +1449,20 @@

Why calculate PR costs?

html += `
${velocityGradeObj.message}
`; html += '
'; // Close efficiency-box + // Merge Rate box (if data available) + if (mergedPRs + unmergedPRs > 0) { + const mergeRateGradeObj = mergeRateGrade(mergeRate); + html += '
'; + html += '

Merge Rate

'; + html += '
'; + html += `${mergeRateGradeObj.grade}`; + html += `${mergeRate.toFixed(1)}%`; + html += '
'; + html += `
${mergeRateGradeObj.message}
`; + html += '
Recently modified PRs successfully merged
'; + html += '
'; // Close efficiency-box + } + // Annual Impact box (only if annual) if (isAnnual && annualWasteCost > 0) { html += '
'; @@ -1469,16 +1497,12 @@

Why calculate PR costs?

} const efficiencyDelta = modeledEfficiency - currentEfficiency; - let throughputText = ''; - if (efficiencyDelta > 0) { - throughputText = ' (+' + efficiencyDelta.toFixed(1) + '% throughput)'; - } // Format target merge time let targetText = targetMergeHours.toFixed(1) + 'h'; - let html = '
'; - html += 'Pro-Tip: Save ' + savingsText + '/yr in lost development effort by reducing merge times to <' + targetText + ' with '; + let html = '
'; + html += '\uD83D\uDCA1 Pro-Tip: Boost team throughput by ' + efficiencyDelta.toFixed(1) + '% and save ' + savingsText + '/yr by reducing merge times to <' + targetText + ' with '; html += 'Ready to Review. '; html += 'Free for open-source repositories, $6/user/org for private repos.'; html += '
'; @@ -2374,7 +2398,10 @@

Why calculate PR costs?

const wasteCostPerAuthorPerWeek = e.waste_cost_per_author_per_week || 0; const totalAuthors = e.total_authors || 0; const avgPRDurationHours = e.avg_pr_duration_hours || 0; - html += formatEfficiencyHTML(extEfficiencyPct, extEfficiency.grade, extEfficiency.message, extPreventableCost, extPreventableHours, e.total_cost, e.total_hours, avgPRDurationHours, true, annualWasteCost, annualWasteHours, wasteHoursPerWeek, wasteCostPerWeek, wasteHoursPerAuthorPerWeek, wasteCostPerAuthorPerWeek, totalAuthors, salary, benefitsMultiplier, analysisType, sourceName); + const mergeRate = e.merge_rate || 0; + const mergedPRs = e.merged_prs || 0; + const unmergedPRs = e.unmerged_prs || 0; + html += formatEfficiencyHTML(extEfficiencyPct, extEfficiency.grade, extEfficiency.message, extPreventableCost, extPreventableHours, e.total_cost, e.total_hours, avgPRDurationHours, true, annualWasteCost, annualWasteHours, wasteHoursPerWeek, wasteCostPerWeek, wasteHoursPerAuthorPerWeek, wasteCostPerAuthorPerWeek, totalAuthors, salary, benefitsMultiplier, analysisType, sourceName, mergeRate, mergedPRs, unmergedPRs); // Add R2R callout if enabled, otherwise generic merge time callout // Calculate modeled efficiency (with 1.5h merge time) diff --git a/pkg/cost/cost.go b/pkg/cost/cost.go index 292c654..4426faa 100644 --- a/pkg/cost/cost.go +++ b/pkg/cost/cost.go @@ -158,6 +158,8 @@ type PRData struct { LinesAdded int LinesDeleted int AuthorBot bool + Merged bool // Whether the PR was merged + State string // PR state: "open", "closed" } // AuthorCostDetail breaks down the author's costs. diff --git a/pkg/cost/cost_test.go b/pkg/cost/cost_test.go index 377dc47..501e022 100644 --- a/pkg/cost/cost_test.go +++ b/pkg/cost/cost_test.go @@ -1261,7 +1261,7 @@ func TestAnalyzePRsContextCancellation(t *testing.T) { func TestExtrapolateFromSamplesEmpty(t *testing.T) { cfg := DefaultConfig() - result := ExtrapolateFromSamples([]Breakdown{}, 100, 10, 5, 30, cfg) + result := ExtrapolateFromSamples([]Breakdown{}, 100, 10, 5, 30, cfg, []PRMergeStatus{}) if result.TotalPRs != 100 { t.Errorf("Expected TotalPRs=100, got %d", result.TotalPRs) @@ -1297,7 +1297,13 @@ func TestExtrapolateFromSamplesSingle(t *testing.T) { }, cfg) // Extrapolate from 1 sample to 10 total PRs - result := ExtrapolateFromSamples([]Breakdown{breakdown}, 10, 2, 0, 7, cfg) + // Create merge status for 10 PRs: 9 merged, 1 open + prStatuses := make([]PRMergeStatus, 10) + for i := 0; i < 9; i++ { + prStatuses[i] = PRMergeStatus{Merged: true, State: "MERGED"} + } + prStatuses[9] = PRMergeStatus{Merged: false, State: "OPEN"} + result := ExtrapolateFromSamples([]Breakdown{breakdown}, 10, 2, 0, 7, cfg, prStatuses) if result.TotalPRs != 10 { t.Errorf("Expected TotalPRs=10, got %d", result.TotalPRs) @@ -1361,7 +1367,15 @@ func TestExtrapolateFromSamplesMultiple(t *testing.T) { } // Extrapolate from 2 samples to 20 total PRs over 14 days - result := ExtrapolateFromSamples(breakdowns, 20, 5, 3, 14, cfg) + // Create merge status for 20 PRs: 17 merged, 3 open + prStatuses := make([]PRMergeStatus, 20) + for i := 0; i < 17; i++ { + prStatuses[i] = PRMergeStatus{Merged: true, State: "MERGED"} + } + for i := 17; i < 20; i++ { + prStatuses[i] = PRMergeStatus{Merged: false, State: "OPEN"} + } + result := ExtrapolateFromSamples(breakdowns, 20, 5, 3, 14, cfg, prStatuses) if result.TotalPRs != 20 { t.Errorf("Expected TotalPRs=20, got %d", result.TotalPRs) @@ -1431,7 +1445,12 @@ func TestExtrapolateFromSamplesBotVsHuman(t *testing.T) { }, } - result := ExtrapolateFromSamples(breakdowns, 10, 5, 0, 7, cfg) + // Create merge status for 10 PRs: all merged + prStatuses := make([]PRMergeStatus, 10) + for i := 0; i < 10; i++ { + prStatuses[i] = PRMergeStatus{Merged: true, State: "MERGED"} + } + result := ExtrapolateFromSamples(breakdowns, 10, 5, 0, 7, cfg, prStatuses) // Should have both human and bot PR counts if result.HumanPRs <= 0 { @@ -1482,7 +1501,12 @@ func TestExtrapolateFromSamplesWasteCalculation(t *testing.T) { }, cfg) // Extrapolate over 7 days - result := ExtrapolateFromSamples([]Breakdown{breakdown}, 10, 3, 0, 7, cfg) + // Create merge status for 10 PRs: all merged + prStatuses := make([]PRMergeStatus, 10) + for i := 0; i < 10; i++ { + prStatuses[i] = PRMergeStatus{Merged: true, State: "MERGED"} + } + result := ExtrapolateFromSamples([]Breakdown{breakdown}, 10, 3, 0, 7, cfg, prStatuses) // Waste per week should be calculated if result.WasteHoursPerWeek <= 0 { @@ -1527,7 +1551,15 @@ func TestExtrapolateFromSamplesR2RSavings(t *testing.T) { }, cfg), } - result := ExtrapolateFromSamples(breakdowns, 100, 10, 5, 30, cfg) + // Create merge status for 100 PRs: 95 merged, 5 open + prStatuses := make([]PRMergeStatus, 100) + for i := 0; i < 95; i++ { + prStatuses[i] = PRMergeStatus{Merged: true, State: "MERGED"} + } + for i := 95; i < 100; i++ { + prStatuses[i] = PRMergeStatus{Merged: false, State: "OPEN"} + } + result := ExtrapolateFromSamples(breakdowns, 100, 10, 5, 30, cfg, prStatuses) // R2R savings should be calculated // Savings formula: baseline waste - remodeled waste - subscription cost @@ -1564,7 +1596,15 @@ func TestExtrapolateFromSamplesOpenPRTracking(t *testing.T) { // Test with actual open PRs actualOpenPRs := 15 - result := ExtrapolateFromSamples([]Breakdown{breakdown}, 100, 5, actualOpenPRs, 30, cfg) + // Create merge status for 100 PRs: 85 merged, 15 open + prStatuses := make([]PRMergeStatus, 100) + for i := 0; i < 85; i++ { + prStatuses[i] = PRMergeStatus{Merged: true, State: "MERGED"} + } + for i := 85; i < 100; i++ { + prStatuses[i] = PRMergeStatus{Merged: false, State: "OPEN"} + } + result := ExtrapolateFromSamples([]Breakdown{breakdown}, 100, 5, actualOpenPRs, 30, cfg, prStatuses) // Open PRs should match actual count (not extrapolated) if result.OpenPRs != actualOpenPRs { @@ -1600,7 +1640,12 @@ func TestExtrapolateFromSamplesParticipants(t *testing.T) { ClosedAt: now, }, cfg) - result := ExtrapolateFromSamples([]Breakdown{breakdown}, 10, 5, 0, 7, cfg) + // Create merge status for 10 PRs: all merged + prStatuses := make([]PRMergeStatus, 10) + for i := 0; i < 10; i++ { + prStatuses[i] = PRMergeStatus{Merged: true, State: "MERGED"} + } + result := ExtrapolateFromSamples([]Breakdown{breakdown}, 10, 5, 0, 7, cfg, prStatuses) // Participant costs should be extrapolated if result.ParticipantReviewCost <= 0 { diff --git a/pkg/cost/extrapolate.go b/pkg/cost/extrapolate.go index 69f6e31..019d79d 100644 --- a/pkg/cost/extrapolate.go +++ b/pkg/cost/extrapolate.go @@ -1,6 +1,15 @@ package cost -import "log/slog" +import ( + "log/slog" + "math" +) + +// PRMergeStatus represents merge status information for a PR (for calculating merge rate). +type PRMergeStatus struct { + State string // "OPEN", "CLOSED", "MERGED" + Merged bool +} // ExtrapolatedBreakdown represents cost estimates extrapolated from a sample // of PRs to estimate total costs across a larger population. @@ -94,6 +103,12 @@ type ExtrapolatedBreakdown struct { TotalCost float64 `json:"total_cost"` TotalHours float64 `json:"total_hours"` + // Merge rate statistics + MergedPRs int `json:"merged_prs"` // Number of successfully merged PRs + UnmergedPRs int `json:"unmerged_prs"` // Number of PRs not merged (closed or still open) + MergeRate float64 `json:"merge_rate"` // Percentage of PRs successfully merged (0-100) + MergeRateNote string `json:"merge_rate_note"` // Explanation of what counts as merged/unmerged + // R2R cost savings calculation UniqueNonBotUsers int `json:"unique_non_bot_users"` // Count of unique non-bot users (authors + participants) R2RSavings float64 `json:"r2r_savings"` // Annual savings if R2R cuts PR time to target merge time @@ -106,8 +121,10 @@ type ExtrapolatedBreakdown struct { // - breakdowns: Slice of Breakdown structs from successfully processed samples // - totalPRs: Total number of PRs in the population // - totalAuthors: Total number of unique authors across all PRs (not just samples) +// - actualOpenPRs: Count of actually open PRs (for tracking overhead) // - daysInPeriod: Number of days the sample covers (for per-week calculations) // - cfg: Configuration for hourly rate and hours per week calculation +// - prStatuses: Merge status for all PRs (for merge rate calculation) // // Returns: // - ExtrapolatedBreakdown with averaged costs scaled to total population @@ -116,12 +133,28 @@ type ExtrapolatedBreakdown struct { // by the total PR count to estimate population-wide costs. // //nolint:revive,maintidx // Complex calculation function benefits from cohesion -func ExtrapolateFromSamples(breakdowns []Breakdown, totalPRs, totalAuthors, actualOpenPRs int, daysInPeriod int, cfg Config) ExtrapolatedBreakdown { +func ExtrapolateFromSamples(breakdowns []Breakdown, totalPRs, totalAuthors, actualOpenPRs int, daysInPeriod int, cfg Config, prStatuses []PRMergeStatus) ExtrapolatedBreakdown { if len(breakdowns) == 0 { + // Calculate merge rate even with no successful samples + mergedCount := 0 + for _, status := range prStatuses { + if status.Merged { + mergedCount++ + } + } + mergeRate := 0.0 + if len(prStatuses) > 0 { + mergeRate = 100.0 * float64(mergedCount) / float64(len(prStatuses)) + } + return ExtrapolatedBreakdown{ TotalPRs: totalPRs, SampledPRs: 0, SuccessfulSamples: 0, + MergedPRs: mergedCount, + UnmergedPRs: len(prStatuses) - mergedCount, + MergeRate: mergeRate, + MergeRateNote: "Recently modified PRs successfully merged", } } @@ -292,11 +325,21 @@ func ExtrapolateFromSamples(breakdowns []Breakdown, totalPRs, totalAuthors, actu extCodeChurnCost := sumCodeChurnCost / samples * multiplier extAutomatedUpdatesCost := sumAutomatedUpdatesCost / samples * multiplier // Calculate Open PR Tracking cost based on actual open PRs (not from samples) - // Formula: actualOpenPRs × uniqueUsers × (tracking_minutes_per_day_per_person / 60) × daysInPeriod × hourlyRate - // This scales with team size: larger teams spend more total time tracking open PRs + // Formula: openPRs × log2(activeContributors + 1) × 0.005 × daysInPeriod × hourlyRate + // This represents planning/coordination overhead ONLY (excludes actual code review) + // - Linear with PR count: more PRs = more planning/triage overhead + // - Logarithmic with team size: larger teams have specialization/better processes + // - Constant 0.005: calibrated to ~20 seconds per PR per week of planning/coordination time + // (excludes actual review time, which is counted separately in FutureReviewCost) hourlyRate := cfg.AnnualSalary * cfg.BenefitsMultiplier / cfg.HoursPerYear uniqueUserCount := len(uniqueNonBotUsers) - extPRTrackingHours := float64(actualOpenPRs) * float64(uniqueUserCount) * (cfg.PRTrackingMinutesPerDay / 60.0) * float64(daysInPeriod) + var extPRTrackingHours float64 + if uniqueUserCount > 0 && actualOpenPRs > 0 { + // log2(n+1) to handle log(0) and provide smooth scaling + teamScaleFactor := math.Log2(float64(uniqueUserCount) + 1) + // 0.005 hours = 0.30 minutes per PR per day (organizational average for planning/coordination only) + extPRTrackingHours = float64(actualOpenPRs) * teamScaleFactor * 0.005 * float64(daysInPeriod) + } extPRTrackingCost := extPRTrackingHours * hourlyRate extFutureReviewCost := sumFutureReviewCost / samples * multiplier extFutureMergeCost := sumFutureMergeCost / samples * multiplier @@ -439,6 +482,28 @@ func ExtrapolateFromSamples(breakdowns []Breakdown, totalPRs, totalAuthors, actu r2rSavings = 0 // Don't show negative savings } + // Calculate merge rate from all PRs (not just samples) + mergedCount := 0 + unmergedCount := 0 + for _, status := range prStatuses { + if status.Merged { + mergedCount++ + } else { + unmergedCount++ + } + } + + mergeRate := 0.0 + if len(prStatuses) > 0 { + mergeRate = 100.0 * float64(mergedCount) / float64(len(prStatuses)) + } + + slog.Info("Calculated merge rate from all PRs", + "total_prs", len(prStatuses), + "merged", mergedCount, + "unmerged", unmergedCount, + "merge_rate_pct", mergeRate) + return ExtrapolatedBreakdown{ TotalPRs: totalPRs, HumanPRs: extHumanPRs, @@ -517,6 +582,11 @@ func ExtrapolateFromSamples(breakdowns []Breakdown, totalPRs, totalAuthors, actu TotalCost: extTotalCost, TotalHours: extTotalHours, + MergedPRs: mergedCount, + UnmergedPRs: unmergedCount, + MergeRate: mergeRate, + MergeRateNote: "Recently modified PRs successfully merged", + UniqueNonBotUsers: uniqueUserCount, R2RSavings: r2rSavings, } diff --git a/pkg/github/fetch.go b/pkg/github/fetch.go index d860bf8..cde29da 100644 --- a/pkg/github/fetch.go +++ b/pkg/github/fetch.go @@ -39,7 +39,7 @@ func PRDataFromPRX(prData *prx.PullRequestData) cost.PRData { // Fallback bot detection: if prx didn't mark it as a bot, check common bot names authorBot := pr.AuthorBot if !authorBot { - authorBot = isCommonBot(pr.Author) + authorBot = IsBot(pr.Author) if authorBot { slog.Info("Bot detected by name pattern (prx missed it)", "author", pr.Author, @@ -55,6 +55,8 @@ func PRDataFromPRX(prData *prx.PullRequestData) cost.PRData { Events: events, CreatedAt: pr.CreatedAt, ClosedAt: closedAt, + Merged: pr.Merged, + State: pr.State, } slog.Debug("Converted PRX data to cost.PRData", @@ -62,50 +64,13 @@ func PRDataFromPRX(prData *prx.PullRequestData) cost.PRData { "author_bot", authorBot, "prx_author_bot", pr.AuthorBot, "additions", pr.Additions, - "deletions", pr.Deletions) + "deletions", pr.Deletions, + "merged", pr.Merged, + "state", pr.State) return data } -// isCommonBot checks if a username matches common bot patterns. -// This is a fallback in case prx doesn't correctly mark the AuthorBot field. -func isCommonBot(username string) bool { - lowerName := strings.ToLower(username) - - // Common bot account names - botPatterns := []string{ - "dependabot", - "renovate", - "github-actions", - "codecov", - "greenkeeper", - "snyk-bot", - "allcontributors", - "imgbot", - "stalebot", - "mergify", - "netlify", - "vercel", - "codefactor-io", - "deepsource-autofix", - "pre-commit-ci", - "ready-to-review", - } - - for _, pattern := range botPatterns { - if strings.Contains(lowerName, pattern) { - return true - } - } - - // Check for [bot] suffix - if strings.HasSuffix(lowerName, "[bot]") { - return true - } - - return false -} - // FetchPRData retrieves pull request information from GitHub and converts it // to the format needed for cost calculation. // @@ -133,7 +98,7 @@ func FetchPRData(ctx context.Context, prURL string, token string, updatedAt time slog.Debug("Parsed PR URL", "owner", owner, "repo", repo, "number", number) // Get cache directory from user's cache directory - cacheDir, err := getCacheDir() + userCacheDir, err := os.UserCacheDir() if err != nil { slog.Warn("Failed to get cache directory, using non-cached client", "error", err) // Fallback to non-cached client @@ -147,6 +112,20 @@ func FetchPRData(ctx context.Context, prURL string, token string, updatedAt time return result, nil } + cacheDir := filepath.Join(userCacheDir, "prcost") + if err := os.MkdirAll(cacheDir, 0o700); err != nil { + slog.Warn("Failed to create cache directory, using non-cached client", "error", err) + // Fallback to non-cached client + client := prx.NewClient(token) + prData, err := client.PullRequest(ctx, owner, repo, number) + if err != nil { + slog.Error("GitHub API call failed", "owner", owner, "repo", repo, "pr", number, "error", err) + return cost.PRData{}, fmt.Errorf("failed to fetch PR data: %w", err) + } + result := PRDataFromPRX(prData) + return result, nil + } + // Create prx cache client for disk-based caching client, err := prx.NewCacheClient(token, cacheDir) if err != nil { @@ -225,9 +204,9 @@ func extractParticipantEvents(events []prx.Event) []cost.ParticipantEvent { } // Skip bots: check both prx's Bot field and common bot patterns - isBot := event.Bot || event.Actor == "github" || isCommonBot(event.Actor) - if isBot { - if !event.Bot && isCommonBot(event.Actor) { + isBotEvent := event.Bot || event.Actor == "github" || IsBot(event.Actor) + if isBotEvent { + if !event.Bot && IsBot(event.Actor) { slog.Debug("Bot event detected by name pattern (prx missed it)", "actor", event.Actor, "kind", event.Kind, @@ -246,21 +225,3 @@ func extractParticipantEvents(events []prx.Event) []cost.ParticipantEvent { return participantEvents } - -// getCacheDir returns the cache directory for prx client. -// Uses OS-specific user cache directory with prcost subdirectory. -func getCacheDir() (string, error) { - userCacheDir, err := os.UserCacheDir() - if err != nil { - return "", fmt.Errorf("get user cache dir: %w", err) - } - - cacheDir := filepath.Join(userCacheDir, "prcost") - - // Ensure cache directory exists - if err := os.MkdirAll(cacheDir, 0o700); err != nil { - return "", fmt.Errorf("create cache dir: %w", err) - } - - return cacheDir, nil -} diff --git a/pkg/github/fetch_test.go b/pkg/github/fetch_test.go index b28001b..d2f0d30 100644 --- a/pkg/github/fetch_test.go +++ b/pkg/github/fetch_test.go @@ -3,7 +3,6 @@ package github import ( "encoding/json" "os" - "strings" "testing" "time" @@ -277,77 +276,6 @@ func TestPRDataFromPRXWithRealData(t *testing.T) { t.Logf("PR 1891: %d human events out of %d total events", len(costData.Events), len(prxData.Events)) } -func TestGetCacheDir(t *testing.T) { - dir, err := getCacheDir() - if err != nil { - t.Fatalf("getCacheDir() error = %v", err) - } - if dir == "" { - t.Error("getCacheDir() returned empty string") - } - - // Should contain prcost in the path - if !strings.Contains(dir, "prcost") { - t.Errorf("getCacheDir() = %q, expected to contain 'prcost'", dir) - } -} - -func TestIsCommonBot(t *testing.T) { - tests := []struct { - name string - username string - want bool - }{ - {"dependabot", "dependabot[bot]", true}, - {"renovate", "renovate-bot", true}, - {"github-actions", "github-actions", true}, - {"codecov", "codecov-commenter", true}, - {"greenkeeper", "greenkeeper[bot]", true}, - {"snyk", "snyk-bot", true}, - {"allcontributors", "allcontributors[bot]", true}, - {"imgbot", "ImgBot", true}, // Case insensitive - {"stalebot", "stalebot", true}, - {"mergify", "mergify[bot]", true}, - {"netlify", "netlify[bot]", true}, - {"vercel", "vercel[bot]", true}, - {"codefactor", "codefactor-io", true}, - {"deepsource", "deepsource-autofix[bot]", true}, - {"pre-commit", "pre-commit-ci[bot]", true}, - {"regular user", "john-doe", false}, - {"bot in middle", "robot-person", false}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := isCommonBot(tt.username) - if got != tt.want { - t.Errorf("isCommonBot(%q) = %v, want %v", tt.username, got, tt.want) - } - }) - } -} - -func TestIsCommonBotCaseSensitivity(t *testing.T) { - tests := []struct { - name string - username string - want bool - }{ - {"uppercase BOT", "DEPENDABOT[bot]", true}, - {"mixed case", "DePeNdAbOt[bot]", true}, - {"lowercase all", "dependabot[bot]", true}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := isCommonBot(tt.username) - if got != tt.want { - t.Errorf("isCommonBot(%q) = %v, want %v", tt.username, got, tt.want) - } - }) - } -} - func TestExtractParticipantEventsEdgeCases(t *testing.T) { now := time.Now() @@ -438,73 +366,3 @@ func TestPRDataFromPRXWithRealSprinklerData(t *testing.T) { t.Logf("Sprinkler PR 37: %d human events out of %d total events", len(costData.Events), len(prxData.Events)) } - -func TestGetCacheDirCreatesDirectory(t *testing.T) { - // This test actually calls getCacheDir to improve coverage - dir, err := getCacheDir() - if err != nil { - t.Fatalf("getCacheDir() error = %v", err) - } - if dir == "" { - t.Error("getCacheDir() returned empty string") - } - - // Verify directory was created - info, err := os.Stat(dir) - if err != nil { - t.Errorf("Cache directory was not created: %v", err) - } - if !info.IsDir() { - t.Error("Cache path is not a directory") - } -} - -func TestIsCommonBotVariations(t *testing.T) { - tests := []struct { - username string - want bool - }{ - {"dependabot", true}, - {"dependabot[bot]", true}, - {"renovate", true}, - {"renovate-bot", true}, - {"github-actions", true}, - {"github-actions[bot]", true}, - {"codecov", true}, - {"codecov-commenter", true}, - {"greenkeeper", true}, - {"greenkeeper[bot]", true}, - {"snyk-bot", true}, - {"allcontributors", true}, - {"allcontributors[bot]", true}, - {"imgbot", true}, - {"ImgBot", true}, // case insensitive - {"stalebot", true}, - {"mergify", true}, - {"mergify[bot]", true}, - {"netlify", true}, - {"netlify[bot]", true}, - {"vercel", true}, - {"vercel[bot]", true}, - {"codefactor-io", true}, - {"deepsource-autofix", true}, - {"deepsource-autofix[bot]", true}, - {"pre-commit-ci", true}, - {"pre-commit-ci[bot]", true}, - {"ready-to-review", true}, - {"ready-to-review[bot]", true}, - {"regular-user", false}, - {"robot", false}, - {"botman", false}, - {"john-doe", false}, - } - - for _, tt := range tests { - t.Run(tt.username, func(t *testing.T) { - got := isCommonBot(tt.username) - if got != tt.want { - t.Errorf("isCommonBot(%q) = %v, want %v", tt.username, got, tt.want) - } - }) - } -} diff --git a/pkg/github/query.go b/pkg/github/query.go index c516ca3..b8c8861 100644 --- a/pkg/github/query.go +++ b/pkg/github/query.go @@ -18,7 +18,9 @@ type PRSummary struct { Owner string Repo string Author string + State string // "OPEN", "CLOSED", "MERGED" Number int + Merged bool // Whether the PR was merged } // ProgressCallback is called during PR fetching to report progress. @@ -143,6 +145,8 @@ func fetchPRsFromRepoWithSort(ctx context.Context, params repoSortParams) ([]PRS nodes { number updatedAt + state + merged author { login } @@ -214,6 +218,8 @@ func fetchPRsFromRepoWithSort(ctx context.Context, params repoSortParams) ([]PRS Nodes []struct { Number int UpdatedAt time.Time + State string + Merged bool Author struct{ Login string } } TotalCount int @@ -266,6 +272,8 @@ func fetchPRsFromRepoWithSort(ctx context.Context, params repoSortParams) ([]PRS Number: node.Number, Author: node.Author.Login, UpdatedAt: node.UpdatedAt, + State: node.State, + Merged: node.Merged, }) // Check if we've hit the maxPRs limit @@ -440,6 +448,8 @@ func fetchPRsFromOrgWithSort(ctx context.Context, params orgSortParams) ([]PRSum ... on PullRequest { number updatedAt + state + merged author { login } @@ -515,6 +525,8 @@ func fetchPRsFromOrgWithSort(ctx context.Context, params orgSortParams) ([]PRSum Nodes []struct { Number int UpdatedAt time.Time + State string + Merged bool Author struct{ Login string } Repository struct { Owner struct{ Login string } @@ -557,6 +569,8 @@ func fetchPRsFromOrgWithSort(ctx context.Context, params orgSortParams) ([]PRSum Number: node.Number, Author: node.Author.Login, UpdatedAt: node.UpdatedAt, + State: node.State, + Merged: node.Merged, }) // Check if we've hit the maxPRs limit @@ -613,27 +627,35 @@ func deduplicatePRsByOwnerRepoNumber(prs []PRSummary) []PRSummary { // IsBot returns true if the author name indicates a bot account. func IsBot(author string) bool { - // Check for common bot name patterns - if strings.HasSuffix(author, "[bot]") || strings.Contains(author, "-bot-") { + lowerAuthor := strings.ToLower(author) + + // Check for [bot] suffix + if strings.HasSuffix(lowerAuthor, "[bot]") { return true } - // Check for specific known bot usernames (case-insensitive) - lowerAuthor := strings.ToLower(author) - knownBots := []string{ - "renovate", + // Common bot account name patterns + botPatterns := []string{ "dependabot", + "renovate", "github-actions", "codecov", - "snyk", "greenkeeper", + "snyk", + "allcontributors", "imgbot", - "renovate-bot", - "dependabot-preview", - } - - for _, botName := range knownBots { - if lowerAuthor == botName { + "stalebot", + "mergify", + "netlify", + "vercel", + "codefactor-io", + "deepsource-autofix", + "pre-commit-ci", + "ready-to-review", + } + + for _, pattern := range botPatterns { + if strings.Contains(lowerAuthor, pattern) { return true } }