+
Corpus Types
+
HAR: Automated network captures of initial page loads
+
HTML: Manual SingleFile captures of post-interaction states
+
+
+
+ `)
+ } catch (_error) {
+ res.status(500).send('Error listing corpus files')
+ }
+})
+
+// Serve the main page from corpus
+app.get('/corpus/:key/:mode(clean|gitcasso)', async (req, res) => {
+ try {
+ // biome-ignore lint/complexity/useLiteralKeys: type comes from path string
+ const key = req.params['key']
+ // biome-ignore lint/complexity/useLiteralKeys: type comes from path string
+ const mode = req.params['mode'] as 'clean' | 'gitcasso'
+
+ if (!key || !(key in CORPUS)) {
+ return res.status(400).send('Invalid key - not found in CORPUS')
+ }
+
+ const entry = CORPUS[key]!
+
+ if (entry.type === 'har') {
+ // Handle HAR corpus
+ const harData = await loadHar(key)
+ const originalUrl = entry.url
+ const mainEntry =
+ harData.log.entries.find(
+ (entry) =>
+ entry.request.url === originalUrl &&
+ entry.response.content.mimeType?.includes('text/html') &&
+ entry.response.content.text,
+ ) ||
+ harData.log.entries.find(
+ (entry) =>
+ entry.response.status === 200 &&
+ entry.response.content.mimeType?.includes('text/html') &&
+ entry.response.content.text,
+ )
+ if (!mainEntry) {
+ return res.status(404).send('No HTML content found in HAR file')
+ }
+
+ // Extract all domains from HAR entries for dynamic replacement
+ const domains = new Set