Skip to content

Commit

Permalink
more stable request handling
Browse files Browse the repository at this point in the history
  • Loading branch information
digizeph committed Nov 19, 2023
1 parent aaa4ef8 commit abc6493
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 4 deletions.
9 changes: 9 additions & 0 deletions src/crawler/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,15 @@ pub fn extract_link_size(body: &str) -> Vec<(String, i64)> {
res
}

pub(crate) async fn fetch_body(url: &str) -> Result<String, BrokerError> {
let client = reqwest::Client::builder()
.user_agent("bgpkit-broker/3")
.pool_max_idle_per_host(0)
.build()?;
let body = client.get(url).send().await?.text().await?;
Ok(body)
}

/// Remove trailing slash from a string.
///
/// # Arguments
Expand Down
6 changes: 4 additions & 2 deletions src/crawler/riperis.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::crawler::common::{crawl_months_list, extract_link_size, remove_trailing_slash};
use crate::crawler::common::{
crawl_months_list, extract_link_size, fetch_body, remove_trailing_slash,
};
use crate::crawler::Collector;
use crate::{BrokerError, BrokerItem};
use chrono::{NaiveDate, NaiveDateTime};
Expand Down Expand Up @@ -44,7 +46,7 @@ pub async fn crawl_ripe_ris(
async fn crawl_month(url: String, collector_id: String) -> Result<Vec<BrokerItem>, BrokerError> {
let url = remove_trailing_slash(url.as_str());
debug!("crawling data for {} ...", url.as_str());
let body = reqwest::get(url.as_str()).await?.text().await?;
let body = fetch_body(url.as_str()).await?;
debug!(" download for {} finished ", url.as_str());

let new_url = url.to_string();
Expand Down
6 changes: 4 additions & 2 deletions src/crawler/routeviews.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::crawler::common::{crawl_months_list, extract_link_size, remove_trailing_slash};
use crate::crawler::common::{
crawl_months_list, extract_link_size, fetch_body, remove_trailing_slash,
};
use crate::crawler::Collector;
use crate::{BrokerError, BrokerItem};
use chrono::{NaiveDate, NaiveDateTime};
Expand Down Expand Up @@ -51,7 +53,7 @@ async fn crawl_month(url: String, collector_id: String) -> Result<Vec<BrokerItem
// RIBS
for subdir in ["RIBS", "UPDATES"] {
let url = format!("{}/{}", &root_url, subdir);
let body = reqwest::get(url.as_str()).await?.text().await?;
let body = fetch_body(url.as_str()).await?;
let collector_id_clone = collector_id.clone();
let data_items: Vec<BrokerItem> = tokio::task::spawn_blocking(move || {
let items = extract_link_size(body.as_str());
Expand Down

0 comments on commit abc6493

Please sign in to comment.