Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 84 additions & 1 deletion src/engine/engine.gh-pages-behavior.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ function createMockChildProcess(): Partial<ChildProcess> {
* This is intentional - we want to know about any new git operations.
*/
const EXPECTED_GIT_COMMANDS = [
'clone', 'clean', 'fetch', 'checkout', 'ls-remote', 'reset',
'clone', 'clean', 'fetch', 'checkout', 'ls-remote', 'ls-files', 'reset',
'rm', 'add', 'config', 'diff-index', 'commit', 'tag', 'push', 'update-ref'
];

Expand Down Expand Up @@ -1066,4 +1066,87 @@ describe('gh-pages v6.3.0 - behavioral snapshot', () => {
expect(spawnCalls[0]?.args[0]).toBe('clone');
});
});

/**
* End-to-end regression for issue #204: gh-pages@6.3.0's "Removing files" step
* skips dotfiles and submodule gitlinks. Our beforeAdd hook should catch them
* via `git ls-files` and `git rm` the leftovers while leaving dist files alone.
*/
describe('submodule / dotfile cleanup regression (issue #204)', () => {
const engine = require('./engine');
const { cleanupMonkeypatch } = require('./engine.prepare-options-helpers');
const { logging } = require('@angular-devkit/core');

beforeEach(() => {
cleanupMonkeypatch();
});

it('engine.run() should git rm leftover gh-pages branch files not present in dist', async () => {
const repo = 'https://github.com/owner/leftover-test.git';
currentTestContext.repo = repo;

const leftoverFiles = [
'.github/workflows/deploy.yml',
'.gitignore',
'.gitmodules',
'build' // submodule gitlink
];

// Reconfigure the default mockSpawn so `git ls-files` returns our leftover set.
// Other git commands keep the outer describe's default behavior.
mockSpawn.mockImplementation((cmd: string, args: string[] | undefined, opts: unknown) => {
const capturedArgs = args || [];
spawnCalls.push({ cmd, args: capturedArgs, options: opts });

if (cmd === 'git' && capturedArgs[0] && !EXPECTED_GIT_COMMANDS.includes(capturedArgs[0])) {
throw new Error(`Unexpected git command: ${capturedArgs[0]}. Add to whitelist if intentional.`);
}

const child = createMockChildProcess();
setImmediate(() => {
let output = '';
if (cmd === 'git' && capturedArgs[0] === 'ls-files') {
output = leftoverFiles.join('\0') + '\0';
} else if (cmd === 'git' && capturedArgs[0] === 'config' &&
capturedArgs[1] === '--get' && capturedArgs[2]?.startsWith('remote.')) {
output = repo;
} else if (cmd === 'git' && capturedArgs[0] === 'ls-remote') {
output = 'refs/heads/gh-pages';
} else if (cmd === 'git' && capturedArgs[0] === 'diff-index') {
child.emit!('close', 1);
return;
}
child.stdout!.emit('data', Buffer.from(output));
child.emit!('close', 0);
});
return child;
});

const options = {
repo,
branch: 'gh-pages',
dotfiles: true,
notfound: false,
nojekyll: false
};

await engine.run(basePath, options, new logging.NullLogger());

// Our hook should have issued `git ls-files -z`.
const lsFilesCall = spawnCalls.find(c => c.cmd === 'git' && c.args[0] === 'ls-files');
expect(lsFilesCall).toBeDefined();
expect(lsFilesCall!.args).toContain('-z');

// And a subsequent `git rm` that targets exactly the leftovers.
const rmCalls = spawnCalls.filter(c => c.cmd === 'git' && c.args[0] === 'rm');
const cleanupCall = rmCalls.find(c => leftoverFiles.every(f => c.args.includes(f)));
expect(cleanupCall).toBeDefined();

// Dist files (from the outer beforeAll: index.html, main.js, styles.css, .htaccess)
// must NOT be in the cleanup call — those are files we just copied.
for (const distFile of ['index.html', 'main.js', 'styles.css', '.htaccess']) {
expect(cleanupCall!.args).not.toContain(distFile);
}
});
});
});
190 changes: 190 additions & 0 deletions src/engine/engine.gh-pages-cleanup.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
/**
* End-to-end integration test for issue #204 — REAL git, REAL gh-pages, REAL filesystem.
*
* No mocks. This test proves the `beforeAdd` cleanup hook actually produces a clean
* gh-pages commit when the branch previously contained dotfiles and submodule gitlinks.
*
* Flow:
* 1. Create a local bare git repo (serves as "remote").
* 2. Seed a gh-pages branch containing:
* - .github/workflows/deploy.yml (dot-directory, nested)
* - .gitignore (dotfile)
* - .gitmodules (dotfile)
* - a submodule gitlink `build` (mode 160000)
* - stale.html (regular file gh-pages' own remove would catch)
* 3. Run `engine.run()` against the bare repo with a dist containing only index.html.
* 4. `git ls-tree -r gh-pages` on the bare repo → assert ONLY `index.html` landed.
*
* A companion test demonstrates the upstream bug itself by calling `gh-pages.publish()`
* directly (bypassing our hook) and observing the leftovers leak into the commit. That
* test will start failing once tschaub/gh-pages ships PR #612 in a release — which is
* fine; it's the signal that our workaround can be removed.
*/

import * as path from 'path';
import * as fs from 'fs/promises';
import * as os from 'os';
import { execSync } from 'child_process';

import { logging } from '@angular-devkit/core';

import * as engine from './engine';
import { cleanupMonkeypatch } from './engine.prepare-options-helpers';

// NO MOCKS — we want real gh-pages behavior end-to-end
const ghPages = require('gh-pages');

interface GitOptions {
readonly cwd: string;
}

function git(args: string, options: GitOptions): string {
return execSync(`git -C "${options.cwd}" ${args}`, { stdio: ['pipe', 'pipe', 'pipe'] })
.toString()
.trim();
}

async function seedGhPagesBranch(workDir: string, bareRepoPath: string): Promise<void> {
await fs.mkdir(workDir, { recursive: true });
execSync(`git init "${workDir}"`, { stdio: 'pipe' });
git('config user.email "seed@test.com"', { cwd: workDir });
git('config user.name "Seed"', { cwd: workDir });
git('checkout -b gh-pages', { cwd: workDir });

// Dotfiles and dot-directory (what gh-pages' broken remove step misses)
await fs.mkdir(path.join(workDir, '.github', 'workflows'), { recursive: true });
await fs.writeFile(path.join(workDir, '.github', 'workflows', 'deploy.yml'), 'name: deploy\n');
await fs.writeFile(path.join(workDir, '.gitignore'), 'node_modules\n');
await fs.writeFile(
path.join(workDir, '.gitmodules'),
'[submodule "build"]\n\tpath = build\n\turl = https://example.invalid/build.git\n'
);
// A regular non-dot file — gh-pages' own remove step WILL catch this one.
await fs.writeFile(path.join(workDir, 'stale.html'), '<html>stale</html>\n');

git('add .github .gitignore .gitmodules stale.html', { cwd: workDir });
git('commit -m "seed dotfiles and stale content"', { cwd: workDir });

// Add a submodule gitlink without needing a real subrepo.
// update-index --cacheinfo creates the 160000 entry directly in the index.
const seedSha = git('rev-parse HEAD', { cwd: workDir });
git(`update-index --add --cacheinfo 160000,${seedSha},build`, { cwd: workDir });
git('commit -m "add submodule gitlink"', { cwd: workDir });

git(`remote add origin "${bareRepoPath}"`, { cwd: workDir });
git('push origin gh-pages', { cwd: workDir });
}

describe('end-to-end cleanup regression (issue #204, real git)', () => {
let tempDir: string;
let distDir: string;
let bareRepoPath: string;
let originalEnv: NodeJS.ProcessEnv;

// Unique suffix so parallel-ish reruns never collide in /tmp.
const testRunId = `${Date.now()}-${Math.random().toString(36).substring(7)}`;

beforeEach(async () => {
cleanupMonkeypatch();

// Isolate from ambient CI envs and tokens so engine.prepareOptions doesn't
// rewrite the repo URL or append CI metadata during tests.
originalEnv = { ...process.env };
delete process.env.TRAVIS;
delete process.env.CIRCLECI;
delete process.env.GITHUB_ACTIONS;
delete process.env.GH_TOKEN;
delete process.env.PERSONAL_TOKEN;
delete process.env.GITHUB_TOKEN;

tempDir = await fs.mkdtemp(path.join(os.tmpdir(), `ghp-204-${testRunId}-`));

distDir = path.join(tempDir, 'dist');
await fs.mkdir(distDir, { recursive: true });
await fs.writeFile(path.join(distDir, 'index.html'), '<html>fresh dist</html>\n');

bareRepoPath = path.join(tempDir, 'bare.git');
execSync(`git init --bare "${bareRepoPath}"`, { stdio: 'pipe' });

await seedGhPagesBranch(path.join(tempDir, 'seed'), bareRepoPath);

// Clear gh-pages' cache so each test starts from a clean clone.
ghPages.clean();
});

afterEach(async () => {
ghPages.clean();
cleanupMonkeypatch();
process.env = originalEnv;
await fs.rm(tempDir, { recursive: true, force: true });
});

it('engine.run() produces a gh-pages commit containing ONLY dist files (no dotfiles, no submodule)', async () => {
await engine.run(
distDir,
{
repo: bareRepoPath,
branch: 'gh-pages',
dotfiles: true,
// Keep assertions simple: don't let our own 404.html / .nojekyll machinery
// add files to the expected set.
notfound: false,
nojekyll: false,
name: 'Test',
email: 'test@test.com',
message: 'test deploy'
},
new logging.NullLogger()
);

// Inspect what actually landed on gh-pages in the bare repo.
const tree = git('ls-tree -r gh-pages --name-only', { cwd: bareRepoPath })
.split('\n')
.filter(Boolean)
.sort();

expect(tree).toEqual(['index.html']);

// Explicit negative checks — makes regressions very readable on failure.
expect(tree).not.toContain('.gitignore');
expect(tree).not.toContain('.gitmodules');
expect(tree).not.toContain('.github/workflows/deploy.yml');
expect(tree).not.toContain('build');
expect(tree).not.toContain('stale.html');
}, 30_000);

it('baseline: without our hook, gh-pages alone leaks dotfiles and submodule gitlinks (demonstrates the upstream bug)', async () => {
// Call gh-pages.publish() directly — no engine.run(), no beforeAdd hook.
// This is exactly what angular-cli-ghpages v3 did before our fix.
await new Promise<void>((resolve, reject) => {
ghPages.publish(
distDir,
{
repo: bareRepoPath,
branch: 'gh-pages',
dotfiles: true,
user: { name: 'Test', email: 'test@test.com' },
message: 'baseline: unhooked gh-pages publish'
},
(err: Error | null) => {
if (err) reject(err);
else resolve();
}
);
});

const tree = git('ls-tree -r gh-pages --name-only', { cwd: bareRepoPath })
.split('\n')
.filter(Boolean);

// gh-pages' broken remove step missed all of these:
expect(tree).toContain('.gitignore');
expect(tree).toContain('.gitmodules');
expect(tree).toContain('.github/workflows/deploy.yml');
expect(tree).toContain('build');
// Our dist file did land:
expect(tree).toContain('index.html');
// And the non-dot stale file DID get removed by gh-pages' (partial) remove step:
expect(tree).not.toContain('stale.html');
}, 30_000);
});
82 changes: 82 additions & 0 deletions src/engine/engine.prepare-options-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
*/

import { logging } from '@angular-devkit/core';
import * as fs from 'fs/promises';
import * as path from 'path';
import * as util from 'util';

import { Schema } from '../deploy/schema';
Expand Down Expand Up @@ -225,6 +227,86 @@ export async function injectTokenIntoRepoUrl(options: PreparedOptions): Promise<
}
}

/**
* Minimal subset of the gh-pages internal Git class that our cleanup hook relies on.
* See src/node_modules/gh-pages/lib/git.js — `Git.prototype.exec`, `Git.prototype.rm`, `cwd`, `output`.
*/
export interface GhPagesGit {
cwd: string;
output: string;
exec(...args: string[]): Promise<GhPagesGit>;
rm(files: string[]): Promise<GhPagesGit>;
}

/**
* Recursively walk a directory and return the set of relative file paths,
* using POSIX separators so the output matches what `git ls-files` prints.
* Honors the same dotfile-inclusion semantics as gh-pages' `options.dotfiles`.
*/
export async function collectDistFiles(
baseDir: string,
includeDotfiles: boolean
): Promise<Set<string>> {
const files = new Set<string>();

async function walk(relDir: string): Promise<void> {
const fullDir = relDir ? path.join(baseDir, relDir) : baseDir;
const entries = await fs.readdir(fullDir, { withFileTypes: true });
for (const entry of entries) {
if (!includeDotfiles && entry.name.startsWith('.')) {
continue;
}
const relPath = relDir ? `${relDir}/${entry.name}` : entry.name;
if (entry.isDirectory()) {
await walk(relPath);
} else if (entry.isFile()) {
files.add(relPath);
}
}
}

await walk('');
return files;
}

/**
* Create a `beforeAdd` hook that removes leftover files from the gh-pages branch
* before gh-pages stages our dist for commit.
*
* Why this exists (issue #204):
* gh-pages@6.3.0's "Removing files" step calls globby without `dot: true`, so
* dotfiles (.gitignore, .gitmodules, .github/…) and submodule gitlinks from the
* gh-pages branch are NOT removed before our dist is copied on top. They then
* get re-committed and leak into the deploy.
*
* Fix: after gh-pages' broken remove + our file copy, ask git what it still has
* indexed (`git ls-files -z`), diff against the set of files in our dist, and
* `git rm` the leftovers. `git rm` correctly handles submodule gitlinks too.
*
* Upstream fix: tschaub/gh-pages#612 (merged 2025-08-09, unreleased as of
* gh-pages@6.3.0). When a release containing that PR lands, this hook becomes
* redundant and can be removed.
*/
export function createCleanupBeforeAddHook(
distDir: string,
dotfiles: boolean,
logger: logging.LoggerApi
): (git: GhPagesGit) => Promise<void> {
return async (git) => {
const distFiles = await collectDistFiles(distDir, dotfiles);
await git.exec('ls-files', '-z');
const tracked = (git.output || '').split('\0').filter(Boolean);
const toRemove = tracked.filter((f) => !distFiles.has(f));
if (toRemove.length === 0) {
return;
}
logger.info(
`Removing ${toRemove.length} leftover file(s) from gh-pages branch not in dist: ${toRemove.join(', ')}`
);
await git.rm(toRemove);
};
}

/**
* Get the remote URL from the git repository
*
Expand Down
Loading