Skip to content

Commit

Permalink
Numerous fixes, try pull hashnode tag IDs from tag pages
Browse files Browse the repository at this point in the history
- Fix wrong clap env args
- Setup platform features
- For missing hashnode tags, try getting tag page (fixes #1)
- Fix clippy lints, some additional tests
- Add tests for: medium/hashnode html parsing, and GitHub pages
- Conditionally compile platforms based on feature flags
  • Loading branch information
jeikabu committed Jun 27, 2021
1 parent 04b3f77 commit 2a8f41a
Show file tree
Hide file tree
Showing 10 changed files with 4,420 additions and 45 deletions.
8 changes: 5 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ repository = "https://github.com/jeikabu/cargo_bullhorn"
description = "CLI tool to publish articles and update them."

[features]
default = ["github_pages", "hashnode", "medium"]
default = ["devto", "github_pages", "hashnode", "medium"]
devto = []
github_pages = ["git"]
hashnode = ["graphql_client", "slug"]
hashnode = ["graphql_client", "quick-xml"]
medium = ["rss", "quick-xml"]


[dependencies]
anyhow = "1.0"
clap = "3.0.0-beta"
Expand All @@ -29,7 +31,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde_yaml = "0.8"
shellexpand = "2.1"
slug = { version = "0.1", optional = true }
slug = { version = "0.1" }
thiserror = "1.0"
tokio = { version = "1.5.0", features = ["full"] }
tracing = "0.1"
Expand Down
17 changes: 15 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ async fn start() -> Result<()> {
.init();

let mut opts = Opts::parse();
if opts.platforms.is_empty() || opts.platforms.iter().any(|i| *i == Platforms::All) {
opts.platforms.clear();
opts.platforms.push(Platforms::Devto);
opts.platforms.push(Platforms::Hashnode);
opts.platforms.push(Platforms::Medium);
}

if let Ok(config) = shellexpand::env(&opts.settings.config) {
use std::io::prelude::*;
Expand All @@ -66,10 +72,15 @@ async fn start() -> Result<()> {
post.apply(&opts.settings);

// Post "original" represented by canonical URL
let git = github_pages::GithubPages::new(&post, opts.settings.clone())?;
git.publish(&mut post)?;
#[cfg(feature = "github_pages")]
{
let git = github_pages::GithubPages::new(&post, opts.settings.clone())?;
git.publish(&mut post)?;
}

let mut futures: Vec<futures::future::LocalBoxFuture<()>> = vec![];

#[cfg(feature = "devto")]
if let (Some(_), Some(api_token)) = (opts.platforms.iter().find(|p| **p == Platforms::Devto), &opts.devto_api_token) {
let settings = opts.settings.clone();
let post = post.clone();
Expand All @@ -79,6 +90,7 @@ async fn start() -> Result<()> {
}));
}

#[cfg(feature = "hashnode")]
if let (Some(_), Some(api_token), Some(username)) = (opts.platforms.iter().find(|p| **p == Platforms::Hashnode), &opts.hashnode_api_token, &opts.hashnode_username) {
let settings = opts.settings.clone();
let post = post.clone();
Expand All @@ -88,6 +100,7 @@ async fn start() -> Result<()> {
}));
}

#[cfg(feature = "medium")]
if let (Some(_), Some(api_token)) = (opts.platforms.iter().find(|p| **p == Platforms::Medium), &opts.medium_api_token) {
let settings = opts.settings.clone();
let pub_id = opts.medium_publication_id.clone();
Expand Down
4 changes: 3 additions & 1 deletion src/platforms/devto.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![cfg(feature = "devto")]

use crate::{*, post::Post};

type ArticleResponse = serde_json::Map<String, serde_json::Value>;
Expand Down Expand Up @@ -27,7 +29,7 @@ impl From<Post> for Body {
body_markdown: item.body,
published: item.front_matter.is_published(),
canonical_url: item.front_matter.canonical_url,
tags: item.front_matter.tags.unwrap_or(vec![]),
tags: item.front_matter.tags.unwrap_or_default(),
series: item.front_matter.series,
date: item.front_matter.date,
};
Expand Down
61 changes: 52 additions & 9 deletions src/platforms/github_pages.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![cfg(feature = "github_pages")]

use crate::*;

#[derive(serde::Serialize)]
Expand All @@ -7,6 +9,7 @@ struct Article {
tags: Vec<String>,
}

#[derive(Clone, Debug, PartialEq)]
struct FilenameParts {
pub year: u32,
pub month: u32,
Expand All @@ -30,7 +33,7 @@ impl GithubPages {
Ok(repo) => break (repo, path),
_ => repo_path = path.parent(),
},
None => return Err(anyhow!("Can't find git repository for: {:?}", &post.path)),
None => return Err(Error::NotFound { expected: format!("Git repository for: {:?}", &post.path) }.into()),
};
};
debug!("git: Found repository: {:?}", &repo_path);
Expand All @@ -46,7 +49,7 @@ impl GithubPages {
let parts = GithubPages::parse_filename(&post)?;

if post.front_matter.canonical_url.is_none() {
let url = self.get_canonical_url(&post, &parts)?;
let url = self.get_canonical_url(&parts)?;
debug!("Setting canonical URL: {} ({:?})", url, post.path);
post.front_matter.canonical_url = Some(url);
}
Expand All @@ -61,15 +64,15 @@ impl GithubPages {
Ok(())
}

fn get_canonical_url(&self, _post: &Post, parts: &FilenameParts) -> Result<String> {
fn get_canonical_url(&self, parts: &FilenameParts) -> Result<String> {
// Obtain server from git remote. E.g.
// `origin github:repo/repo.github.io.git` -> `repo.github.io`
let origin = self.repo.find_remote(&self.settings.remote)?;
let origin_url = origin.url().expect("Bad remote");
let regex = regex::Regex::new(r".*/(?P<pages_url>.*)\.git")?;
let regex = regex::Regex::new(r".*/(?P<pages_url>.*\.github\.io)(\.git)?")?;
let remote_error = Error::NotFound { expected: "repo/repo.github.io.git".to_owned() };
let url = regex.captures(origin_url)
.ok_or(remote_error.clone())?
.ok_or_else(|| remote_error.clone())?
.name("pages_url").ok_or(remote_error)?;
trace!("git: Remote server: {}", url.as_str());

Expand All @@ -83,10 +86,10 @@ impl GithubPages {
fn parse_filename(post: &Post) -> Result<FilenameParts> {
// Ignore extension and parse filename as `YYYY-MM-DD-name` (per https://jekyllrb.com/docs/structure/)
let file_error = Error::BadPath { expected: "YYYY-MM-DD-name.ext".to_owned(), found: post.path.to_owned() };
let file_stem = post.path.file_stem().ok_or(file_error.clone())?
.to_str().ok_or(file_error.clone())?;
let file_stem = post.path.file_stem().ok_or_else(|| file_error.clone())?
.to_str().ok_or_else(|| file_error.clone())?;
let regex = regex::Regex::new(r"(\d{4})-(\d{1,2})-(\d{1,2})-(.*)")?;
let captures = regex.captures(file_stem).expect("Bad filename");
let captures = regex.captures(file_stem).ok_or_else(|| file_error.clone())?;
Ok(FilenameParts {
year: captures.get(1).unwrap().as_str().parse::<u32>()?,
month: captures.get(2).unwrap().as_str().parse::<u32>()?,
Expand All @@ -99,9 +102,49 @@ impl GithubPages {
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;

fn create_post(filename: &str) -> Post {
Post {
path: PathBuf::from(format!("{}/{}", env!("CARGO_MANIFEST_DIR"), filename)),
..Default::default()
}
}

#[test]
fn x() {
fn create() -> Result<()> {
let post = create_post("2021-7-1-test.md");
let settings: Settings = Default::default();
GithubPages::new(&post, settings)?;
Ok(())
}

#[test]
fn parse_filename() -> Result<()> {
let post = create_post("2021-7-1-test.md");
let parts = GithubPages::parse_filename(&post)?;
assert_eq!(parts, FilenameParts{ year: 2021, month: 7, day: 1, name: "test".to_owned() });

let post = create_post("2021-07-01-test.md");
let parts = GithubPages::parse_filename(&post)?;
assert_eq!(parts, FilenameParts{ year: 2021, month: 7, day: 1, name: "test".to_owned() });

let post = create_post("test.md");
let _ = GithubPages::parse_filename(&post).unwrap_err();
Ok(())
}

#[test]
fn get_canonical_url() -> Result<()> {
let post = create_post("2021-7-1-test.md");
let settings = Settings {
remote: "origin".to_owned(),
..Default::default()
};
let github_pages = GithubPages::new(&post, settings)?;
let parts = GithubPages::parse_filename(&post)?;
let _ = github_pages.get_canonical_url(&parts).unwrap_err();
//assert_eq!(canonical_url, "https://cargo_bullhorn.github.io/2021/07/01/test.html");
Ok(())
}
}
137 changes: 128 additions & 9 deletions src/platforms/hashnode.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![cfg(feature = "hashnode")]

use crate::{*, post::Post};
use graphql_client::GraphQLQuery;

Expand Down Expand Up @@ -66,28 +68,46 @@ impl Hashnode {
.send()
.await?;
let categories: Vec<tags::TagsTagCategories> = {
// Response is `[Tags]`, but `[Tags!]!`
// Response is GraphQL type `[Tags]` (each item and array itself can be null).
// But `[Tags!]!` (nothing is null) is simpler in Rust
let categories: graphql_client::Response<tags::ResponseData> = resp.json().await?;
categories.data
.and_then(|d| d.tag_categories)
// Turn `Option<Vec<Option<TagsTagCategories>>>` into `Vec<TagsTagCategories>`
.unwrap_or_default()
.into_iter()
// Unwrap Some and remove None
.filter_map(|c| c)
.flatten()
.collect()
};

for tag in front_matter_tags {
// Find handnode tag that matches front-matter tag
// Find hashnode tag that matches front-matter tag
let slug = slug::slugify(&tag);
let tag = tag.to_lowercase();
if let Some(tag_match) = categories.iter().find(|category|
category.slug == slug || category.name.to_lowercase() == tag
category.slug == slug || category.name.to_lowercase() == tag.to_lowercase()
) {
debug!("Matched tag `{}`: {} ({})", tag, tag_match.name, tag_match.id);
tags.push(tag_match.id.clone());
}
} else {
// Not returned from tag query, try GETing the tag-specific page
// and extracting the ID from that.;
let resp = self.client.get(format!("https://hashnode.com/n/{}", slug))
.send()
.await;
if let Ok(resp) = resp {
if let Ok(text) = resp.text().await {
match parse_tag_html(&text) {
Ok(meta) => {
debug!("Matched tag `{}`: {} ({})", tag, meta.name, meta.id);
tags.push(meta.id);
continue;
},
Err(e) => warn!("Failed to parse tag ({}): {}", slug, e),
}
}
}
trace!("Unable to match tag: {}", tag);
}
}
}
Ok(tags)
Expand Down Expand Up @@ -135,12 +155,12 @@ impl Hashnode {
cover_image_url: None,
is_republished,
is_part_of_publication,
tags: vec![],
tags,
sourced_from_github: None,
};
let body = UpdateStory::build_query(update_story::Variables {
input,
post_id,
input,
});
if self.settings.dry {
} else {
Expand Down Expand Up @@ -227,3 +247,102 @@ impl Hashnode {
Ok((pub_id, existing_id))
}
}

fn parse_tag_html(text: &str) -> Result<ExtraData> {
let mut reader = quick_xml::Reader::from_str(&text);
reader
.check_end_names(false)
.trim_text(true);
let mut buf = Vec::new();
let mut in_script = false;
loop {
use quick_xml::events::Event;
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"script" => {
trace!("Start {:?}", e);
let script = e.attributes()
.filter_map(|attr| attr.ok())
.find(|attr|
attr.key == b"id"
&& attr.unescape_and_decode_value(&reader)
.map_or(false, |val| val == "__NEXT_DATA__")
);
if let Some(script) = script {
trace!("Script: {:?}", script);
in_script = true;
}
},
Ok(Event::Text(ref e)) if in_script => {
if let Ok(text) = e.unescape_and_decode(&reader) {
let script: Script = serde_json::from_str(&text)
.map_err(|e| Error::BadString {
expected: e.to_string(),
found: text,
})?;
let tag_meta = script.props.page_props.extra_data;
trace!("Found tag {}: id={}",
tag_meta.name,
tag_meta.id);
return Ok(tag_meta);
}
},
Ok(Event::End(ref e)) if in_script && e.name() == b"script" => {
in_script = false;
},
Ok(Event::Eof) => break,
Err(e) => warn!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => {},
}
buf.clear();
}
Err(Error::NotFound{ expected: "".to_owned()}.into())
}

#[derive(serde::Deserialize)]
struct Script {
props: Props,
}

#[derive(serde::Deserialize)]
#[serde(rename_all = "camelCase")]
struct Props {
page_props: PageProps,
}

#[derive(serde::Deserialize)]
#[serde(rename_all = "camelCase")]
struct PageProps {
status_code: i32,
extra_data: ExtraData,
}

#[derive(serde::Deserialize)]
struct ExtraData {
#[serde(rename = "_id")]
id: String,
name: String,
slug: String,
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn parse_tag() -> Result<()> {
use std::io::prelude::*;
tracing_subscriber::fmt()
.with_max_level(tracing::Level::TRACE)
.with_test_writer()
.init();

let path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests").join("hashnode_tag.html");
let mut buffer = String::new();
let _size = std::fs::File::open(path)?
.read_to_string(&mut buffer)?;
let info = parse_tag_html(&buffer)?;
assert_eq!(info.name, "dotnet");
Ok(())
}
}
Loading

0 comments on commit 2a8f41a

Please sign in to comment.