Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Handle and Retry Failed Request
  • Loading branch information
Jacob Goh committed Nov 10, 2018
1 parent 6aaed6d commit 3098b48
Showing 1 changed file with 20 additions and 2 deletions.
22 changes: 20 additions & 2 deletions index.js
@@ -1,5 +1,13 @@
const { BehaviorSubject, from } = require('rxjs');
const { map, distinct, filter, mergeMap, share } = require('rxjs/operators');
const { BehaviorSubject, from, of } = require('rxjs');
const {
map,
distinct,
filter,
mergeMap,
retry,
catchError,
share
} = require('rxjs/operators');
const rp = require('request-promise-native');
const normalizeUrl = require('normalize-url');
const cheerio = require('cheerio');
Expand All @@ -8,6 +16,7 @@ const fs = require('fs');

const baseUrl = `https://imdb.com`;
const maxConcurrentReq = 10;
const maxRetries = 5;

const allUrl$ = new BehaviorSubject(baseUrl);

Expand All @@ -24,6 +33,15 @@ const urlAndDOM$ = uniqueUrl$.pipe(
mergeMap(
url => {
return from(rp(url)).pipe(
retry(maxRetries),
catchError(error => {
const { uri } = error.options;
console.log(`Error requesting ${uri} after ${maxRetries} retries.`);
// return null on error
return of(null);
}),
// filter out errors
filter(v => v),
// get the cheerio function $
map(html => cheerio.load(html)),
// add URL to the result. It will be used later for crawling
Expand Down

0 comments on commit 3098b48

Please sign in to comment.