From 16019c356bdb501c9d5735b6b68cdc8a766adba5 Mon Sep 17 00:00:00 2001 From: Ansel Robateau Date: Fri, 3 Oct 2025 13:52:15 -0500 Subject: [PATCH 1/3] =?UTF-8?q?chore:=20=F0=9F=A4=96=20update=20package=20?= =?UTF-8?q?version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 112c78d..24e1063 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "github-actions", - "version": "2.24.0", + "version": "2.25.0", "description": "Used to store GitHub actions for use across the enterprise", "scripts": { "test": "./tooling/scripts/run_tests.sh", From cc9bcd24d3501a7ea9d99f1363a50c845cf8de19 Mon Sep 17 00:00:00 2001 From: Ansel Robateau Date: Sun, 2 Nov 2025 12:22:55 -0600 Subject: [PATCH 2/3] fix: update gemini model (CORE-2834) (#121) * fix: update gemini model * fix: rate limit requests to gemini * fix: limit tokens to 100,000 * fix: change calculation for chunking * fix: simplify large diff pr creation * fix: enhance split functionality. * chore: add debugging by printing the number of estimated tokens being sent. * fix: split even the first line * chore: add debug logging * bypass gemini call for debugging purposes * log the pr body * chore: send debug logs to console.error * chore: add more debugging * chore: remove debug logging * feat: activate gemini calls --- .../actions/auto-pr-description/action.yml | 2 + .../generate_pr_description.js | 103 +++++++++++------- .../generate_pr_description.js | 2 +- 3 files changed, 65 insertions(+), 42 deletions(-) diff --git a/.github/actions/auto-pr-description/action.yml b/.github/actions/auto-pr-description/action.yml index cd0c490..365d981 100644 --- a/.github/actions/auto-pr-description/action.yml +++ b/.github/actions/auto-pr-description/action.yml @@ -80,6 +80,8 @@ runs: printf '%s\n' "$DESCRIPTION" > pr_body.md fi + cat pr_body.md + # Add JIRA ticket link if found PR_TITLE=$(gh pr view ${{ inputs.pr-number }} --json title --jq '.title') TICKET_ID=$(echo "$PR_TITLE" | grep -oE '[A-Z]+-[0-9]+' || true) diff --git a/.github/actions/auto-pr-description/generate_pr_description.js b/.github/actions/auto-pr-description/generate_pr_description.js index 9ca4472..7eac3cd 100644 --- a/.github/actions/auto-pr-description/generate_pr_description.js +++ b/.github/actions/auto-pr-description/generate_pr_description.js @@ -4,10 +4,10 @@ const fs = require('fs'); const path = require('path'); // Configuration constants -const MAX_TOKENS_PER_REQUEST = 100000; // Conservative limit for Gemini 2.5 Flash +const MAX_TOKENS_PER_REQUEST = 80000; // Conservative limit for Gemini 2.5 Flash const CHARS_PER_TOKEN = 4; // Rough estimation -const MAX_CHARS_PER_CHUNK = MAX_TOKENS_PER_REQUEST * CHARS_PER_TOKEN; -const MAX_CHUNKS = 10; // Limit to prevent excessive API calls +//const MAX_CHARS_PER_CHUNK = MAX_TOKENS_PER_REQUEST * CHARS_PER_TOKEN; +const MAX_CHUNKS = 3; // Limit to prevent excessive API calls /** * Estimate token count for text (rough approximation) @@ -16,35 +16,80 @@ function estimateTokens(text) { return Math.ceil(text.length / CHARS_PER_TOKEN); } + +function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + + +function splitStringByTokens(str, maxTokens) { + console.error('splitStringByTokens'); + const words = str.split(' '); + const result = []; + let currentLine = ''; + + for (const word of words) { + if (estimateTokens(currentLine + word) <= maxTokens) { + currentLine += (currentLine ? ' ' : '') + word; + } else { + if (currentLine) result.push(currentLine); + currentLine = word; + } + } + + if (currentLine) result.push(currentLine); + + return result; +} + + /** * Split diff into chunks by file boundaries */ function chunkDiffByFiles(diffContent) { + console.error('chunkDiffByFiles'); const fileChunks = []; const lines = diffContent.split('\n'); let currentChunk = ''; let currentFile = ''; + let tokenCount = 0; for (const line of lines) { // Check if this is a new file header + //console.error(`Line is estimated at ${estimateTokens(line)} tokens`); + tokenCount += estimateTokens(line); + //console.error(`Total tokens for this chunk is ${tokenCount}`); if (line.startsWith('diff --git') || line.startsWith('+++') || line.startsWith('---')) { // If we have content and it's getting large, save current chunk - if (currentChunk && estimateTokens(currentChunk) > MAX_CHARS_PER_CHUNK / 2) { + if (currentChunk && tokenCount > MAX_TOKENS_PER_REQUEST) { fileChunks.push({ content: currentChunk.trim(), file: currentFile, type: 'file-chunk' }); currentChunk = ''; + tokenCount = 0; } // Start new chunk currentChunk = line + '\n'; + // Extract filename for reference if (line.startsWith('+++')) { currentFile = line.replace('+++ b/', '').replace('+++ a/', ''); } + if(tokenCount > MAX_TOKENS_PER_REQUEST){ + const split_chunk = splitStringByTokens(currentChunk, MAX_TOKENS_PER_REQUEST); + currentChunk = split_chunk[split_chunk.length-1]; + for(let i = 0; i < split_chunk.length -1;i++){ + fileChunks.push({ + content: split_chunk[i].trim(), + file: currentFile, + type: 'file-chunk' + }); + } + } } else { currentChunk += line + '\n'; } @@ -107,7 +152,8 @@ ${diffContent}`; * Call Gemini API with the given prompt */ async function callGeminiAPI(prompt, apiKey) { - const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=${apiKey}`, { + console.error(`Sending prompt with an estimated ${estimateTokens(prompt)} tokens`); + const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${apiKey}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ @@ -147,6 +193,7 @@ async function callGeminiAPI(prompt, apiKey) { * Process diff chunks and combine results */ async function processChunks(chunks, apiKey) { + console.error('processchunks'); if (chunks.length === 1) { // Single chunk, process normally return await callGeminiAPI(createPRPrompt(chunks[0].content), apiKey); @@ -157,6 +204,10 @@ async function processChunks(chunks, apiKey) { for (let i = 0; i < Math.min(chunks.length, MAX_CHUNKS); i++) { const chunk = chunks[i]; + if (i > 0) { + // sleep for 3 seconds + sleep(5 * 1000); + } console.error(`Processing chunk ${i + 1}/${Math.min(chunks.length, MAX_CHUNKS)} (${chunk.file || 'unknown file'})`); try { @@ -174,7 +225,7 @@ async function processChunks(chunks, apiKey) { if (chunkResults.length === 0) { throw new Error('Failed to process any chunks'); } - + sleep(5*1000); // Combine results from multiple chunks const combinedPrompt = `Combine these pull request descriptions into a single, coherent PR description. Use the same format: @@ -215,42 +266,12 @@ Create a unified description that captures the overall changes across all files. if (estimatedTokens > MAX_TOKENS_PER_REQUEST) { console.error('Large diff detected, using chunking strategy...'); - // For extremely large diffs, first try to summarize - if (estimatedTokens > MAX_TOKENS_PER_REQUEST * 5) { - console.error('Extremely large diff detected, using summary approach...'); - const summaryPrompt = createSummaryPrompt(diffContent); - result = await callGeminiAPI(summaryPrompt, apiKey); - - // Create a simplified PR description based on the summary - const prPrompt = `Based on this summary of changes, create a pull request description using this format: - -## Description -Brief summary of changes (1-2 sentences max). - -## Changes -- [ ] Key change 1 -- [ ] Key change 2 -- [ ] Key change 3 (max 5 items) - -## Verification -- [ ] Test step 1 -- [ ] Test step 2 -- [ ] Test step 3 (max 3 items) - -Summary: ${result}`; - - result = await callGeminiAPI(prPrompt, apiKey); - } else { - // Chunk the diff and process - const chunks = chunkDiffByFiles(diffContent); - console.error(`Split diff into ${chunks.length} chunks`); - - if (chunks.length > MAX_CHUNKS) { - console.error(`Warning: Too many chunks (${chunks.length}), processing first ${MAX_CHUNKS} chunks only`); - } - - result = await processChunks(chunks, apiKey); + const chunks = chunkDiffByFiles(diffContent); + console.error(`Split diff into ${chunks.length} chunks`); + if (chunks.length > MAX_CHUNKS) { + console.error(`Warning: Too many chunks (${chunks.length}), processing first ${MAX_CHUNKS} chunks only`); } + result = await processChunks(chunks, apiKey); } else { // Small diff, process normally result = await callGeminiAPI(createPRPrompt(diffContent), apiKey); diff --git a/.github/actions/auto-release-description/generate_pr_description.js b/.github/actions/auto-release-description/generate_pr_description.js index 60c3753..1f03180 100644 --- a/.github/actions/auto-release-description/generate_pr_description.js +++ b/.github/actions/auto-release-description/generate_pr_description.js @@ -43,7 +43,7 @@ Keep it concise and focused on the most important changes.`; const combinedPrompt = `${promptTemplate}\n\nHere is the git diff:\n\n${diffContent}`; try { - const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent?key=${apiKey}`, { + const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${apiKey}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ From a7aa4b798a89abe2b21e42f6a563bd874987fe2c Mon Sep 17 00:00:00 2001 From: Ansel Robateau Date: Sun, 2 Nov 2025 12:27:24 -0600 Subject: [PATCH 3/3] =?UTF-8?q?chore:=20=F0=9F=A4=96=20update=20package=20?= =?UTF-8?q?version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ✅ Closes: CORE-2834 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 24e1063..33c54e7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "github-actions", - "version": "2.25.0", + "version": "2.25.1", "description": "Used to store GitHub actions for use across the enterprise", "scripts": { "test": "./tooling/scripts/run_tests.sh",