Skip to content

Commit

Permalink
Add sitemaps
Browse files Browse the repository at this point in the history
  • Loading branch information
bouzuya committed Jan 9, 2023
1 parent b2dc693 commit 8cacb70
Show file tree
Hide file tree
Showing 5 changed files with 249 additions and 0 deletions.
1 change: 1 addition & 0 deletions sitemaps/.gitignore
@@ -0,0 +1 @@
/target/
139 changes: 139 additions & 0 deletions sitemaps/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions sitemaps/Cargo.toml
@@ -0,0 +1,13 @@
[package]
name = "sitemaps"
version = "0.0.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
thiserror = "1.0.38"
url = "2.3.1"

[dev-dependencies]
anyhow = "1.0.68"
3 changes: 3 additions & 0 deletions sitemaps/README.md
@@ -0,0 +1,3 @@
# sitemaps

<https://www.sitemaps.org/>
93 changes: 93 additions & 0 deletions sitemaps/src/lib.rs
@@ -0,0 +1,93 @@
#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
pub enum Error {
#[error("invalid loc")]
InvalidLoc,
#[error("too many urls")]
TooManyUrls,
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Sitemaps {
urls: Vec<Url>,
}

impl Sitemaps {
pub fn new(urls: Vec<Url>) -> Result<Self, Error> {
if urls.len() > 50_000 {
return Err(Error::TooManyUrls);
}
// TODO: check <= 52,428,800 bytes
Ok(Self { urls })
}
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Url {
pub loc: Loc,
// pub lastmod: Option<Lastmod>,
// pub changefreq: Option<Changefreq>,
// pub priority: Option<Priority>,
}

#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
pub struct Loc(url::Url);

impl std::fmt::Display for Loc {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}

impl std::str::FromStr for Loc {
type Err = Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let u = url::Url::parse(s).map_err(|_| Error::InvalidLoc)?;
if u.as_str() != s {
return Err(Error::InvalidLoc);
}
if u.as_str().chars().count() >= 2048 {
return Err(Error::InvalidLoc);
}
Ok(Self(u))
}
}

impl TryFrom<&str> for Loc {
type Error = Error;

fn try_from(value: &str) -> Result<Self, Self::Error> {
<Self as std::str::FromStr>::from_str(value)
}
}

#[cfg(test)]
mod tests {
use std::str::FromStr;

use super::*;

#[test]
fn test_loc() -> anyhow::Result<()> {
let s = "https://example.com/";
let loc = Loc::from_str(s)?;
assert_eq!(loc, Loc::try_from(s)?);
assert_eq!(loc.to_string(), s);

let s = "https://example.com";
assert!(Loc::from_str(s).is_err());

let s = format!("https://example.com/{}", "a".repeat(2028));
assert_eq!(s.len(), 2048);
assert!(Loc::from_str(s.as_str()).is_err());

let s = format!("https://example.com/{}", "a".repeat(2027));
assert_eq!(s.len(), 2047);
assert_eq!(Loc::from_str(s.as_str())?.to_string(), s);

let s = "https://example.com/path";
let loc = Loc::from_str(s)?;
assert_eq!(loc.to_string(), s);
Ok(())
}
}

0 comments on commit 8cacb70

Please sign in to comment.