-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Switch from shell script to Emacs-native code
Now the shell script is an optional utility. Capturing from the browser doesn't require it, nor does it require the extra protocol-handler.
- Loading branch information
Showing
3 changed files
with
132 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
;;; org-protocol-capture-html --- Capture HTML with org-protocol | ||
|
||
;;; Commentary: | ||
;; This makes it possible to capture HTML into Org-mode with | ||
;; org-protocol by passing it through Pandoc to convert the HTML into | ||
;; Org syntax. You can use a JavaScript function like the ones found | ||
;; here[0] to get the HTML from the browser's selection, or here's one | ||
;; that seems to work: | ||
;; | ||
;; function () {var html = ""; if (typeof window.getSelection != "undefined") {var sel = window.getSelection(); if (sel.rangeCount) {var container = document.createElement("div"); for (var i = 0, len = sel.rangeCount; i < len; ++i) {container.appendChild(sel.getRangeAt(i).cloneContents());} html = container.innerHTML;}} else if (typeof document.selection != "undefined") {if (document.selection.type == "Text") {html = document.selection.createRange().htmlText;}} return html;}(); | ||
;; | ||
;; [0] http://stackoverflow.com/a/6668159/712624 | ||
|
||
;;; Code: | ||
(defun org-protocol-capture-html-with-pandoc (data) | ||
"Process an org-protocol://capture-html:// URL. | ||
This function is basically a copy of `org-protocol-do-capture', but | ||
it passes the captured content (not the URL or title) through | ||
Pandoc, converting HTML to Org-mode." | ||
;; It would be nice to not basically duplicate | ||
;; `org-protocol-do-capture', but passing the data back to that | ||
;; function would require re-encoding the data into a URL string | ||
;; with Emacs after Pandoc converts it. Since we've already split | ||
;; it up, we might as well go ahead and run the capture directly. | ||
(let* ((parts (org-protocol-split-data data t org-protocol-data-separator)) | ||
(template (or (and (>= 2 (length (car parts))) (pop parts)) | ||
org-protocol-default-template-key)) | ||
(url (org-protocol-sanitize-uri (car parts))) | ||
(type (if (string-match "^\\([a-z]+\\):" url) | ||
(match-string 1 url))) | ||
(title (or (cadr parts) "")) | ||
(content (or (caddr parts) "")) | ||
(orglink (org-make-link-string | ||
url (if (string-match "[^[:space:]]" title) title url))) | ||
(query (or (org-protocol-convert-query-to-plist (cadddr parts)) "")) | ||
(org-capture-link-is-already-stored t)) ;; avoid call to org-store-link | ||
|
||
(setq org-stored-links | ||
(cons (list url title) org-stored-links)) | ||
(kill-new orglink) | ||
|
||
(with-temp-buffer | ||
(insert content) | ||
(if (not (= 0 (call-process-region | ||
(point-min) (point-max) | ||
"pandoc" t t nil "--no-wrap" "-f" "html" "-t" "org"))) | ||
(message "Pandoc failed: " (buffer-string)) | ||
(progn | ||
;; Pandoc succeeded | ||
(org-store-link-props :type type | ||
:link url | ||
:description title | ||
:orglink orglink | ||
:initial (buffer-string)) | ||
(raise-frame) | ||
(funcall 'org-capture nil template)))) | ||
nil)) | ||
|
||
(add-to-list 'org-protocol-protocol-alist | ||
'("capture-html" | ||
:protocol "capture-html" | ||
:function org-protocol-capture-html-with-pandoc | ||
:kill-client t)) | ||
|
||
(provide 'org-protocol-capture-html) | ||
;;; org-protocol-capture-html ends here |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,69 @@ | ||
#!/bin/bash | ||
|
||
# ** Defaults | ||
heading="Heading" | ||
template="w" | ||
url="http://example.com" | ||
|
||
# ** Functions | ||
function urlencode { | ||
python -c "import sys, urllib; print urllib.quote(' '.join(sys.argv[1:]), safe='')" "$@" | ||
} | ||
function usage { | ||
cat <<EOF | ||
org-protocol-capture-html [-t TITLE] [-u URL] [HTML] | ||
Send HTML to Emacs through org-protocol, passing it through Pandoc to | ||
convert HTML to Org-mode. HTML may be passed as an argument or | ||
through STDIN. | ||
Options: | ||
-h HEADING Use HEADING as the Org heading (default: w) | ||
-t TEMPLATE Use the org-capture template with TEMPLATE key | ||
-u URL Use URL for the heading link | ||
EOF | ||
} | ||
|
||
# ** Args | ||
while getopts "h:t:u:" opt | ||
do | ||
case $opt in | ||
h) heading=$OPTARG ;; | ||
t) template=$OPTARG ;; | ||
u) url=$OPTARG ;; | ||
*) usage; exit ;; | ||
esac | ||
done | ||
shift $(( OPTIND - 1 )); | ||
|
||
# ** Get HTML | ||
if [[ $@ ]] | ||
then | ||
# Get data from args | ||
# Get from args | ||
data="$@" | ||
else | ||
# Get data from STDIN | ||
# Get from STDIN | ||
data=$(cat) | ||
fi | ||
|
||
if [[ -z $data ]] | ||
if ! [[ $data ]] | ||
then | ||
# No data; quit | ||
echo "No data passed via args or STDIN." >&2 | ||
exit 1 | ||
fi | ||
|
||
# Fix protocol | ||
data=$(sed 's|^org-protocol-html://|org-protocol://|' <<<"$data") | ||
|
||
# Split data | ||
readarray -t data <<<"$(sed -r 's|^(org-protocol://capture://w/[^/]+/[^/]+/)(.*)|\1\n\2|' <<<"$data")" | ||
|
||
start="${data[0]}" | ||
end="${data[1]}" | ||
|
||
# Decode URL-encoded/quoted data | ||
end=$(python -c "import sys, urllib; print urllib.unquote(' '.join(sys.argv[1:]))" "$end") | ||
|
||
# Convert with Pandoc | ||
end=$(pandoc --no-wrap -f html -t org <<<"$end") | ||
# ** Check template length | ||
if [[ ${#template} -gt 1 ]] | ||
then | ||
echo "Template key should be one letter." >&2 | ||
exit 1 | ||
fi | ||
|
||
# Reencode data | ||
end=$(python -c "import sys, urllib; print urllib.quote(' '.join(sys.argv[1:]), safe='')" "$end") | ||
# ** URL-encode data | ||
heading=$(urlencode "$heading") | ||
url=$(urlencode "$url") | ||
data=$(urlencode "$data") | ||
|
||
# Send to Emacs | ||
emacsclient "${start}${end}" | ||
# ** Send to Emacs | ||
emacsclient "org-protocol://capture-html://$template/$url/$heading/$data" |