Skip to content

Commit

Permalink
Feature: cache file signature detection (#1238)
Browse files Browse the repository at this point in the history
  • Loading branch information
emmercm committed Jul 26, 2024
1 parent 5d27f2b commit 54d4b73
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 105 deletions.
1 change: 1 addition & 0 deletions src/console/progressBar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export const ProgressBarSymbol = {
// Candidates
GENERATING: chalk.cyan('Σ'),
FILTERING: chalk.cyan('∆'),
EXTENSION_CORRECTION: chalk.cyan('.'),
HASHING: chalk.cyan('#'),
VALIDATING: chalk.cyan(process.platform === 'win32' ? '?' : '≟'),
COMBINING_ALL: chalk.cyan(process.platform === 'win32' ? 'U' : '∪'),
Expand Down
15 changes: 5 additions & 10 deletions src/modules/candidateExtensionCorrector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import DAT from '../types/dats/dat.js';
import Parent from '../types/dats/parent.js';
import ROM from '../types/dats/rom.js';
import ArchiveEntry from '../types/files/archives/archiveEntry.js';
import FileSignature from '../types/files/fileSignature.js';
import FileCache from '../types/files/fileCache.js';
import Options, { FixExtension } from '../types/options.js';
import OutputFactory from '../types/outputFactory.js';
import ReleaseCandidate from '../types/releaseCandidate.js';
Expand Down Expand Up @@ -52,7 +52,7 @@ export default class CandidateExtensionCorrector extends Module {
.filter((romWithFiles) => this.romNeedsCorrecting(romWithFiles))
.length;
this.progressBar.logTrace(`${dat.getNameShort()}: correcting ${romsThatNeedCorrecting.toLocaleString()} output file extension${romsThatNeedCorrecting !== 1 ? 's' : ''}`);
await this.progressBar.setSymbol(ProgressBarSymbol.HASHING);
await this.progressBar.setSymbol(ProgressBarSymbol.EXTENSION_CORRECTION);
await this.progressBar.reset(romsThatNeedCorrecting);

const correctedParentsToCandidates = await this.correctExtensions(dat, parentsToCandidates);
Expand Down Expand Up @@ -148,21 +148,16 @@ export default class CandidateExtensionCorrector extends Module {
this.progressBar.logTrace(`${dat.getNameShort()}: ${parent.getName()}: correcting extension for: ${romWithFiles.getInputFile()
.toString()}`);

await romWithFiles.getInputFile().createReadStream(async (stream) => {
const romSignature = await FileSignature.signatureFromFileStream(stream);
if (!romSignature) {
// No signature was found, so we can't perform any correction
return;
}

const romSignature = await FileCache.getOrComputeFileSignature(romWithFiles.getInputFile());
if (romSignature) {
// ROM file signature found, use the appropriate extension
const { dir, name } = path.parse(correctedRom.getName());
const correctedRomName = path.format({
dir,
name: name + romSignature.getExtension(),
});
correctedRom = correctedRom.withName(correctedRomName);
});
}

this.progressBar.removeWaitingMessage(waitingMessage);
await this.progressBar.incrementDone();
Expand Down
13 changes: 7 additions & 6 deletions src/types/cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,17 @@ export default class Cache<V> {
* Delete a key in the cache.
*/
public async delete(key: string | RegExp): Promise<void> {
let keys: string[];
let keysToDelete: string[];
if (key instanceof RegExp) {
keys = [...this.keys().keys()].filter((k) => k.match(key));
keysToDelete = [...this.keys().keys()].filter((k) => k.match(key));
} else {
keys = [key];
keysToDelete = [key];
}

await Promise.all(keys.map(async (k) => {
await this.lockKey(k, () => this.deleteUnsafe(k));
}));
// Note: avoiding lockKey() because it could get expensive with many keys to delete
await this.keyMutexesMutex.runExclusive(() => {
keysToDelete.forEach((k) => this.deleteUnsafe(k));
});
}

private deleteUnsafe(key: string): void {
Expand Down
61 changes: 51 additions & 10 deletions src/types/files/fileCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import Archive from './archives/archive.js';
import ArchiveEntry, { ArchiveEntryProps } from './archives/archiveEntry.js';
import File, { FileProps } from './file.js';
import { ChecksumBitmask } from './fileChecksums.js';
import FileSignature from './fileSignature.js';
import ROMHeader from './romHeader.js';

interface CacheValue {
Expand All @@ -14,11 +15,15 @@ interface CacheValue {
value: FileProps | ArchiveEntryProps<Archive>[] | string | undefined,
}

enum ValueType {
FILE_CHECKSUMS = 'F',
ARCHIVE_CHECKSUMS = 'A',
FILE_HEADER = 'H',
}
const ValueType = {
FILE_CHECKSUMS: 'F',
ARCHIVE_CHECKSUMS: 'A',
// ROM headers and file signatures may not be found for files, and that is a valid result that
// gets cached. But when the list of known headers or signatures changes, we may be able to find
// a non-undefined result. So these dynamic values help with cache busting.
ROM_HEADER: `H${ROMHeader.getKnownHeaderCount()}`,
FILE_SIGNATURE: `S${FileSignature.getKnownSignatureCount()}`,
};

export default class FileCache {
private static readonly VERSION = 3;
Expand All @@ -45,7 +50,8 @@ export default class FileCache {
const keyRegex = new RegExp(`^V${prevVersion}\\|`);
return this.cache.delete(keyRegex);
}));
// await this.cache.delete(new RegExp(`\\|[^${Object.values(ValueType).join()}]$`));
// Delete keys from old value types
await this.cache.delete(new RegExp(`\\|(?!(${Object.values(ValueType).join('|')}))[^|]+$`));

// Delete keys for deleted files
const disks = FsPoly.disksSync();
Expand Down Expand Up @@ -198,7 +204,7 @@ export default class FileCache {
static async getOrComputeFileHeader(file: File): Promise<ROMHeader | undefined> {
// NOTE(cemmer): we're explicitly not catching ENOENT errors here, we want it to bubble up
const stats = await FsPoly.stat(file.getFilePath());
const cacheKey = this.getCacheKey(file.toString(), ValueType.FILE_HEADER);
const cacheKey = this.getCacheKey(file.toString(), ValueType.ROM_HEADER);

const cachedValue = await this.cache.getOrCompute(
cacheKey,
Expand All @@ -214,10 +220,11 @@ export default class FileCache {
},
(cached) => {
if (cached.fileSize !== stats.size || cached.modifiedTimeMillis !== stats.mtimeMs) {
// File has changed since being cached
// Recompute if the file has changed since being cached
return true;
}
return false;
// Recompute if the cached value isn't known
return typeof cached.value === 'string' && !ROMHeader.headerFromName(cached.value);
},
);

Expand All @@ -228,7 +235,41 @@ export default class FileCache {
return ROMHeader.headerFromName(cachedHeaderName);
}

private static getCacheKey(filePath: string, valueType: ValueType): string {
static async getOrComputeFileSignature(file: File): Promise<FileSignature | undefined> {
// NOTE(cemmer): we're explicitly not catching ENOENT errors here, we want it to bubble up
const stats = await FsPoly.stat(file.getFilePath());
const cacheKey = this.getCacheKey(file.toString(), ValueType.FILE_SIGNATURE);

const cachedValue = await this.cache.getOrCompute(
cacheKey,
async () => {
const signature = await file.createReadStream(
async (stream) => FileSignature.signatureFromFileStream(stream),
);
return {
fileSize: stats.size,
modifiedTimeMillis: stats.mtimeMs,
value: signature?.getName(),
};
},
(cached) => {
if (cached.fileSize !== stats.size || cached.modifiedTimeMillis !== stats.mtimeMs) {
// File has changed since being cached
return true;
}
// Recompute if the cached value isn't known
return typeof cached.value === 'string' && !FileSignature.signatureFromName(cached.value);
},
);

const cachedSignatureName = cachedValue.value as string | undefined;
if (!cachedSignatureName) {
return undefined;
}
return FileSignature.signatureFromName(cachedSignatureName);
}

private static getCacheKey(filePath: string, valueType: string): string {
return `V${FileCache.VERSION}|${filePath}|${valueType}`;
}
}
4 changes: 1 addition & 3 deletions src/types/files/fileFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,7 @@ export default class FileFactory {
let signature: FileSignature | undefined;
try {
const file = await File.fileOf({ filePath });
signature = await file.createReadStream(
async (stream) => FileSignature.signatureFromFileStream(stream),
);
signature = await FileCache.getOrComputeFileSignature(file);
} catch {
// Fail silently on assumed I/O errors
return undefined;
Expand Down
Loading

0 comments on commit 54d4b73

Please sign in to comment.