Skip to content

Commit b8cacfb

Browse files
committed
refactor: remove duplicate code
1 parent 0a90b35 commit b8cacfb

File tree

1 file changed

+3
-8
lines changed

1 file changed

+3
-8
lines changed

src/crawlers.ts

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,25 +78,20 @@ export async function createAndStartSearchCrawler(
7878
log.info(`Search-crawler requestHandler: Processing URL: ${request.url}`);
7979
const organicResults = scrapeOrganicResults($);
8080

81-
// filter organic results to get only results with URL
82-
let results = organicResults.filter((result) => result.url !== undefined);
83-
// remove results with URL starting with '/search?q=' (google return empty search results for images)
84-
results = results.filter((result) => !result.url!.startsWith('/search?q='));
85-
8681
// Destructure userData for easier access (pagination fields are initialized in createSearchRequest)
8782
const { collectedResults, currentPage, totalPages, maxResults } = request.userData;
8883

8984
// Merge with previously collected results and deduplicate
90-
const allResults = [...collectedResults, ...results];
85+
const allResults = [...collectedResults, ...organicResults];
9186
const deduplicated = deduplicateResults(allResults);
9287

93-
log.info(`Page ${currentPage + 1}/${totalPages}: Extracted ${results.length} results, Total unique: ${deduplicated.length}/${maxResults}`);
88+
log.info(`Page ${currentPage + 1}/${totalPages}: Extracted ${organicResults.length} results, Total unique: ${deduplicated.length}/${maxResults}`);
9489

9590
// Decide whether to fetch the next page
9691
// Continue fetching if: (1) we haven't reached maxResults AND (2) we haven't exceeded totalPages AND (3) Google returned results
9792
const shouldFetchNextPage = deduplicated.length < maxResults
9893
&& currentPage + 1 < totalPages
99-
&& results.length > 0; // Stop if Google returned 0 results (empty page)
94+
&& organicResults.length > 0; // Stop if Google returned 0 results (empty page)
10095

10196
if (shouldFetchNextPage) {
10297
// Queue the next page

0 commit comments

Comments
 (0)