Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only download pages matching the user's languages #345

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 61 additions & 19 deletions src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::{

use anyhow::{ensure, Context, Result};
use log::debug;
use reqwest::{blocking::Client, Proxy};
use reqwest::{blocking::Client, Proxy, StatusCode};
use walkdir::{DirEntry, WalkDir};
use zip::ZipArchive;

Expand Down Expand Up @@ -134,7 +134,7 @@ impl Cache {
}

/// Download the archive from the specified URL.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe document the return type?

Suggested change
/// Download the archive from the specified URL.
/// Download the archive from the specified URL.
/// Return `None` if the URL results in a HTTP 404.

fn download(archive_url: &str) -> Result<Vec<u8>> {
fn download(archive_url: &str) -> Result<Option<Vec<u8>>> {
let mut builder = Client::builder();
if let Ok(ref host) = env::var("HTTP_PROXY") {
if let Ok(proxy) = Proxy::http(host) {
Expand All @@ -149,29 +149,60 @@ impl Cache {
let client = builder
.build()
.context("Could not instantiate HTTP client")?;
let mut resp = client
.get(archive_url)
.send()?
debug!("Trying to download {archive_url}");
let response = client.get(archive_url).send()?;
if response.status() == StatusCode::NOT_FOUND {
return Ok(None);
}
let mut response = response
.error_for_status()
.with_context(|| format!("Could not download tldr pages from {archive_url}"))?;
.with_context(|| format!("Unexpected HTTP error downloading {archive_url}"))?;
let mut buf: Vec<u8> = vec![];
let bytes_downloaded = resp.copy_to(&mut buf)?;
let bytes_downloaded = response.copy_to(&mut buf)?;
debug!("{} bytes downloaded", bytes_downloaded);
Ok(buf)
Ok(Some(buf))
}

/// Update the pages cache from the specified URL.
pub fn update(&self, archive_url: &str) -> Result<()> {
pub fn update(&self, archives_url: &str, languages: &[String]) -> Result<()> {
self.ensure_cache_dir_exists()?;

// First, download the compressed data
let bytes: Vec<u8> = Self::download(archive_url)?;
debug!("Updating with languages: {}", languages.join(", "));

// Download all archives before starting to unpack them so that we leave the cache intact
// if any unexpected errors happen.
let downloaded_archives = languages
.iter()
.flat_map(|language| {
let url = if language == "en" {
format!("{archives_url}/tldr-pages.zip")
} else {
format!("{archives_url}/tldr-pages.{language}.zip")
};

let result = Self::download(&url).transpose().map(|bytes| {
ZipArchive::new(Cursor::new(bytes?))
.map(|archive| (language, archive))
.with_context(|| {
format!(
"Could not open downloaded ZIP archive for language \"{language}\""
)
})
});

if result.is_none() {
debug!("No archive for language \"{language}\" found on server, ignoring.");
}

// Decompress the response body into an `Archive`
let mut archive = ZipArchive::new(Cursor::new(bytes))
.context("Could not decompress downloaded ZIP archive")?;
result
})
.collect::<Result<Vec<_>>>()?;

ensure!(
!downloaded_archives.is_empty(),
"No archives were downloaded, please check the list of specified languages and make sure it contains at least one valid language."
);

// Clear cache directory
// Note: This is not the best solution. Ideally we would download the
// archive to a temporary directory and then swap the two directories.
// But renaming a directory doesn't work across filesystems and Rust
Expand All @@ -180,10 +211,21 @@ impl Cache {
self.clear()
.context("Could not clear the cache directory")?;

// Extract archive into pages dir
archive
.extract(&self.pages_dir())
.context("Could not unpack compressed data")?;
let pages_dir = self.pages_dir();

for (language, mut archive) in downloaded_archives {
let subdirectory_name = if language == "en" {
"pages".to_string()
} else {
format!("pages.{language}")
};

archive
.extract(pages_dir.join(subdirectory_name))
.with_context(|| {
format!("Could not unpack compressed data for language \"{language}\"")
})?;
}

Ok(())
}
Expand Down
18 changes: 9 additions & 9 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ const APP_INFO: AppInfo = AppInfo {
name: NAME,
author: NAME,
};
const ARCHIVE_URL: &str = "https://tldr.sh/assets/tldr.zip";
const ARCHIVES_URL: &str = "https://tldr.sh/assets/";
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this should be called ARCHIVE_URL_PREFIX instead? (If yes, the cache update method parameter needs a rename as well.)


/// The cache should be updated if it was explicitly requested,
/// or if an automatic update is due and allowed.
Expand Down Expand Up @@ -133,8 +133,8 @@ fn clear_cache(cache: &Cache, quietly: bool, enable_styles: bool) {
}

/// Update the cache
fn update_cache(cache: &Cache, quietly: bool, enable_styles: bool) {
cache.update(ARCHIVE_URL).unwrap_or_else(|e| {
fn update_cache(cache: &Cache, languages: &[String], quietly: bool, enable_styles: bool) {
cache.update(ARCHIVES_URL, languages).unwrap_or_else(|e| {
print_error(enable_styles, &e.context("Could not update cache"));
process::exit(1);
});
Expand Down Expand Up @@ -308,9 +308,14 @@ fn main() {
clear_cache(&cache, args.quiet, enable_styles);
}

let languages = match args.language.as_ref() {
Some(language) => vec![language.to_owned()],
None => get_languages_from_env(),
};

// Cache update, pass through
let cache_updated = if should_update_cache(&cache, &args, &config) {
update_cache(&cache, args.quiet, enable_styles);
update_cache(&cache, &languages, args.quiet, enable_styles);
true
} else {
false
Expand Down Expand Up @@ -345,11 +350,6 @@ fn main() {
// https://github.com/tldr-pages/tldr/blob/main/CLIENT-SPECIFICATION.md#page-names
let command = args.command.join("-").to_lowercase();

// Collect languages
let languages = args
.language
.map_or_else(get_languages_from_env, |lang| vec![lang]);

// Search for command in cache
if let Some(lookup_result) = cache.find_page(
&command,
Expand Down