Skip to content

Commit

Permalink
upload: improve ergonomics so tab-completion works as expected and on…
Browse files Browse the repository at this point in the history
…e can upload multiple files
  • Loading branch information
gwern committed Oct 16, 2023
1 parent 5627659 commit 399642b
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 121 deletions.
31 changes: 25 additions & 6 deletions build/bash.sh
Expand Up @@ -2,7 +2,7 @@

# Author: Gwern Branwen
# Date: 2016-10-01
# When: Time-stamp: "2023-10-15 16:00:06 gwern"
# When: Time-stamp: "2023-10-16 11:51:32 gwern"
# License: CC-0
#
# Bash helper functions for Gwern.net wiki use.
Expand All @@ -12,6 +12,8 @@
#
# See also: /static/build/{upload, gwa, crossref, compressJpg2}

source /usr/share/bash-completion/bash_completion # useful for better `upload` tab-completion

# Default parallelism:
export N="29"

Expand Down Expand Up @@ -91,7 +93,7 @@ e () { FILE=""
exiftool -m -overwrite_original "$FILE" "$@";
# getting very tired of these hyphen junk in my titles...
TITLE1="$(exiftool -printFormat '$Title' -Title "$FILE")"
TITLE2="$(echo "$TITLE1" | sed -e 's/‐/-/g' -e 's/^ +//g' -e 's/ +$//g' -e 's/\.$//' | tr '_' ':')" # WARNING: tr mangles Unicode, but sed doesn't
TITLE2="$(echo "$TITLE1" | sed -e 's/‐/-/g' -e 's/^ \+//g' -e 's/ \+$//g' -e 's/\.$//' | tr '_' ':')" # WARNING: tr mangles Unicode, but sed doesn't
if [[ "$TITLE1" != "$TITLE2" ]]; then exiftool -overwrite_original -Title="$TITLE2" "$FILE"; fi

else emacsclient "$FILE";
Expand Down Expand Up @@ -243,19 +245,36 @@ gwtag () { (
# echo "---" && grep -F -- "$1" ./metadata/*.yaml
); }

# eg. `"ai ai/anime ai/anime/danbooru ... ai/scaling ai/scaling/economics ... japan/poetry/shotetsu japan/poetry/teika ... technology/digital-antiquarian ... zeo/short-sleeper"`
GWERNNET_DIRS_FULL="$(cd ~/ && find wiki/doc/ -type d | grep -F -v -e 'doc/rotten.com' -e 'doc/www/' \
-e 2000-iapac-norvir -e mountimprobable.com -e personal/2011-gwern-yourmorals.org \
-e psychology/european-journal-of-parapsychology -e reinforcement-learning/armstrong-controlproblem \
-e statistics/order/beanmachine-multistage -e gwern.net-gitstats -e metadata/annotation | \
cut --delimiter='/' --fields=3- | sort)"
GWERNNET_DIRS_SHORT="$(echo $GWERNNET_DIRS_FULL | tr '/' '\n' | sort --unique)"
# eg. `"1 2 2010-crc 2013-cicadas 3 4 5 abandoned-footnotes ab-testing ... www zeami zeo"`
GWERNNET_DIRS_SHORT="$(echo $GWERNNET_DIRS_FULL | tr '/' '\n' | tr ' ' '\n' | sort --unique)"
# for completing tags which may need to be disambiguated, like 'gpt/nonfiction':
GWERNNET_DIRS_SUFFIXES="$(echo $GWERNNET_DIRS_FULL | tr ' ' '\n' | grep -E -e '[a-z0-9-]\+/[a-z0-9-]\+/[a-z0-9-]\+' | \
rev | cut --delimiter='/' --fields=1-2 | rev)"
complete -W "$GWERNNET_DIRS_FULL $GWERNNET_DIRS_SHORT $GWERNNET_DIRS_SUFFIXES" -f upload
# eg. `"1/lsd 2/2010-crc 3/fiction 3/nonfiction ... palm/2 personality/conscientiousness personality/psychopathy ... video/analysis video/generation vision/dream"`
GWERNNET_DIRS_SUFFIXES="$(echo $GWERNNET_DIRS_FULL | tr ' ' '\n' | grep -E -e '[a-z0-9-]+/[a-z0-9-]+/[a-z0-9-]+' | \
rev | cut --delimiter='/' --fields=1-2 | rev | sort --unique)"
complete -W "$GWERNNET_DIRS_FULL $GWERNNET_DIRS_SHORT $GWERNNET_DIRS_SUFFIXES" u gwtag gwt t

alias u="upload"
# 'upload' moved to ~/wiki/static/build/upload for easier calling from XMonad
## tab-complete the first argument as the local file, and the second argument as the remote directory:
_upload() {
local cur cword
_init_completion || return ## assumes `bash-completion` package is installed & sourced previously

if [[ $cword -eq 1 ]]; then
# File completion on first argument
_filedir
elif [[ $cword -eq 2 ]]; then
# Directory completion on second argument
COMPREPLY=( $(compgen -W "${GWERNNET_DIRS_FULL} ${GWERNNET_DIRS_SHORT} ${GWERNNET_DIRS_SUFFIXES}" -- "$cur") )
fi
}
complete -F _upload upload

# wait for a file to become quiescent because eg. Firefox is still downloading it:
is_downloading() {
Expand Down
248 changes: 133 additions & 115 deletions build/upload.sh
Expand Up @@ -3,7 +3,7 @@
# upload: convenience script for uploading PDFs, images, and other files to gwern.net. Handles naming & reformatting.
# Author: Gwern Branwen
# Date: 2021-01-01
# When: Time-stamp: "2023-10-14 22:56:04 gwern"
# When: Time-stamp: "2023-10-16 12:34:20 gwern"
# License: CC-0
#
# Upload files to Gwern.net conveniently, either temporary working files or permanent additions.
Expand All @@ -26,127 +26,145 @@ WWW_BROWSER="firefox"

if [ ! -f "$1" ]; then echo "l20: '$1' is not a file‽" && exit 1; fi

wait
# the fundamental function which does all the real work. Jump to the bottom for the actual argument-handling loop of `upload`.
_upload() {
wait

(locate "$1" &)
(locate "$1" &)

FILENAME="$1"
if [[ $FILENAME == *.jpeg ]]; then
FILENAME="${FILENAME%.jpeg}.jpg"
mv "$1" "$FILENAME"
fi

# Attempt to make filename globally unique, due to repetition of surnames.
#
# eg. I go to do `upload 2023-liu-2.pdf economics`, and it turns out `/doc/psychology/2023-liu-2.pdf` already exists...
# as do `/doc/biology/2023-liu-3.pdf` and `/doc/technology/2023-liu-4.pdf`. (Liu is an *extremely* common Asian surname.)
# So this function will try to loop over numeric suffixes 1–9 to rename it to the first workable filename, in this case, `2023-liu-5.pdf`.
function rename_file() {
local filename="$1"
local base_name extension new_filename new_file_path

base_name="${filename%.*}"
extension="${filename##*.}"

new_file_path=$(find ~/wiki/ -type f -name "$filename" -print -quit)

# if filename already exists, try to rename it
if [[ -n "$new_file_path" ]]; then
for ((i=2; i<=20; i++)); do
new_filename="${base_name}-${i}.${extension}"
new_file_path=$(find ~/wiki/ -type f -name "$new_filename" -print -quit)

if [[ -z "$new_file_path" ]]; then
mv "$filename" "$new_filename"
echo "File '$filename' has been renamed to '$new_filename'"
filename="$new_filename"
break
fi
done
FILENAME="$1"
if [[ $FILENAME == *.jpeg ]]; then
FILENAME="${FILENAME%.jpeg}.jpg"
mv "$1" "$FILENAME"
fi

# if filename after possible renaming does not exist, that means we're using a new filename
if [[ ! -e "$filename" ]]; then
echo "Error: File '$filename' could not be renamed. Please check for possible issues." >&2
return 1
fi
# Attempt to make filename globally unique, due to repetition of surnames.
#
# eg. I go to do `upload 2023-liu-2.pdf economics`, and it turns out `/doc/psychology/2023-liu-2.pdf` already exists...
# as do `/doc/biology/2023-liu-3.pdf` and `/doc/technology/2023-liu-4.pdf`. (Liu is an *extremely* common Asian surname.)
# So this function will try to loop over numeric suffixes 1–9 to rename it to the first workable filename, in this case, `2023-liu-5.pdf`.
function rename_file() {
local filename="$1"
local base_name extension new_filename new_file_path

base_name="${filename%.*}"
extension="${filename##*.}"

new_file_path=$(find ~/wiki/ -type f -name "$filename" -print -quit)

# if filename already exists, try to rename it
if [[ -n "$new_file_path" ]]; then
for ((i=2; i<=20; i++)); do
new_filename="${base_name}-${i}.${extension}"
new_file_path=$(find ~/wiki/ -type f -name "$new_filename" -print -quit)

if [[ -z "$new_file_path" ]]; then
mv "$filename" "$new_filename"
echo "File '$filename' has been renamed to '$new_filename'"
filename="$new_filename"
break
fi
done
fi

FILENAME="$filename"
return 0
# if filename after possible renaming does not exist, that means we're using a new filename
if [[ ! -e "$filename" ]]; then
echo "Error: File '$filename' could not be renamed. Please check for possible issues." >&2
return 1
fi

FILENAME="$filename"
return 0
}
rename_file "$FILENAME"

if [ $# -eq 1 ]; then
TARGET=$(basename "$FILENAME")
if [[ "$TARGET" =~ .*\.jpg || "$TARGET" =~ .*\.png ]]; then exiftool -overwrite_original -All="" "$TARGET"; fi # strip potentially dangerous metadata from scrap images
# format Markdown/text files for more readability
TEMPFILE=$(mktemp /tmp/text.XXXXX)
if [[ "$TARGET" =~ .*\.page || "$TARGET" =~ .*\.txt ]]; then fold --spaces --width=120 "$TARGET" >> "$TEMPFILE" && mv "$TEMPFILE" "$TARGET"; fi

mv "$TARGET" ~/wiki/doc/www/misc/
cd ~/wiki/ || exit
TARGET2="./doc/www/misc/$TARGET"
(rsync --chmod='a+r' -q "$TARGET2" gwern@176.9.41.242:"/home/gwern/gwern.net/doc/www/misc/" || \
rsync --chmod='a+r' -v "$TARGET2" gwern@176.9.41.242:"/home/gwern/gwern.net/doc/www/misc/"
URL="https://gwern.net/doc/www/misc/$TARGET"
echo "$URL" && $WWW_BROWSER "$URL") &

else
TARGET_DIR=""
TARGET_DIR=doc/"$2"

if [ ! -d ~/wiki/"$TARGET_DIR" ]; then
# try to guess a target:
GUESS=$(cd ~/wiki/ && ./static/build/guessTag "$2")
if [ ! -d ~/wiki/doc/"$GUESS"/ ]; then
# the guess failed too, so bail out entirely:
ls ~/wiki/"$TARGET_DIR" ~/wiki/doc/"$GUESS"/
echo "$FILENAME; Directory $TARGET_DIR $2 (and fallback guess $GUESS) does not exist?"
return 2
else
# restart with fixed directory
echo "Retry as \"upload $FILENAME $GUESS\""
upload "$FILENAME" "$GUESS"
fi
else
if [ -a "$FILENAME" ]; then
## automatically rename a file like 'benter1994.pdf' (Libgen) to '1994-benter.pdf' (gwern.net):
FILE="$FILENAME"
if [[ "$FILE" =~ ([a-zA-Z]+)([0-9][0-9][0-9][0-9])\.pdf ]];
then
SWAP="${BASH_REMATCH[2]}-${BASH_REMATCH[1]}.pdf"
SWAP=$(echo "$SWAP" | tr 'A-Z' 'a-z') ## eg '1979-Svorny.pdf' → '1979-svorny.pdf'

mv "$FILE" "$SWAP"
FILE="$SWAP"
fi
TARGET=$TARGET_DIR/$(basename "$FILE")
if [ ! -e ~/wiki/"$TARGET" ]; then
mv "$FILE" ~/wiki/"$TARGET"
cd ~/wiki/ || return
chmod a+r "$TARGET"
if [[ "$TARGET" =~ .*\.pdf ]]; then
METADATA=$(crossref "$TARGET") && echo "$METADATA" & # background for speed, but print it out mostly-atomically to avoid being mangled & impeding copy-paste of the annotation metadata
compressPdf "$TARGET";
chmod a+r "$TARGET";
fi
(git add "$TARGET" &)
# TODO: add back in `--mkpath`
(rsync --chmod='a+r' -q "$TARGET" gwern@176.9.41.242:"/home/gwern/gwern.net/$TARGET_DIR/" || \
rsync --chmod='a+r' -v "$TARGET" gwern@176.9.41.242:"/home/gwern/gwern.net/$TARGET_DIR/"
URL="https://gwern.net/$TARGET_DIR/$(basename "$FILE")"
cloudflare-expire "$TARGET_DIR/$(basename "$FILE")"
echo ""
echo "/$TARGET $URL"

$WWW_BROWSER "$URL") &

else echo ~/wiki/"$TARGET" " already exists"
fi
else echo "First argument $FILENAME is not a file?"
return 1
fi
fi
fi
}
rename_file "$FILENAME"

if [ $# -eq 1 ]; then
TARGET=$(basename "$FILENAME")
if [[ "$TARGET" =~ .*\.jpg || "$TARGET" =~ .*\.png ]]; then exiftool -overwrite_original -All="" "$TARGET"; fi # strip potentially dangerous metadata from scrap images
# format Markdown/text files for more readability
TEMPFILE=$(mktemp /tmp/text.XXXXX)
if [[ "$TARGET" =~ .*\.page || "$TARGET" =~ .*\.txt ]]; then fold --spaces --width=120 "$TARGET" >> "$TEMPFILE" && mv "$TEMPFILE" "$TARGET"; fi

mv "$TARGET" ~/wiki/doc/www/misc/
cd ~/wiki/ || exit
TARGET2="./doc/www/misc/$TARGET"
(rsync --chmod='a+r' -q "$TARGET2" gwern@176.9.41.242:"/home/gwern/gwern.net/doc/www/misc/" || \
rsync --chmod='a+r' -v "$TARGET2" gwern@176.9.41.242:"/home/gwern/gwern.net/doc/www/misc/"
URL="https://gwern.net/doc/www/misc/$TARGET"
echo "$URL" && $WWW_BROWSER "$URL") &

# `upload` main loop, calling `upload` as appropriate:
## If last argument is not a file, it's a directory, and we call `_upload` repeatedly with `_upload $file_n $directory`.
## This keeps the logic simpler than trying to handle many variable-length arguments in `_upload`.
if [[ ! -f "${!#}" ]]; then
dir="${!#}"
files=("${@:1:$(($#-1))}")
else
TARGET_DIR=""
TARGET_DIR=doc/"$2"

if [ ! -d ~/wiki/"$TARGET_DIR" ]; then
# try to guess a target:
GUESS=$(cd ~/wiki/ && ./static/build/guessTag "$2")
if [ ! -d ~/wiki/doc/"$GUESS"/ ]; then
# the guess failed too, so bail out entirely:
ls ~/wiki/"$TARGET_DIR" ~/wiki/doc/"$GUESS"/
echo "$FILENAME; Directory $TARGET_DIR $2 (and fallback guess $GUESS) does not exist?"
return 2
else
# restart with fixed directory
echo "Retry as \"upload $FILENAME $GUESS\""
upload "$FILENAME" "$GUESS"
fi
else
if [ -a "$FILENAME" ]; then
## automatically rename a file like 'benter1994.pdf' (Libgen) to '1994-benter.pdf' (gwern.net):
FILE="$FILENAME"
if [[ "$FILE" =~ ([a-zA-Z]+)([0-9][0-9][0-9][0-9])\.pdf ]];
then
SWAP="${BASH_REMATCH[2]}-${BASH_REMATCH[1]}.pdf"
SWAP=$(echo "$SWAP" | tr 'A-Z' 'a-z') ## eg '1979-Svorny.pdf' → '1979-svorny.pdf'

mv "$FILE" "$SWAP"
FILE="$SWAP"
fi
TARGET=$TARGET_DIR/$(basename "$FILE")
if [ ! -e ~/wiki/"$TARGET" ]; then
mv "$FILE" ~/wiki/"$TARGET"
cd ~/wiki/ || return
chmod a+r "$TARGET"
if [[ "$TARGET" =~ .*\.pdf ]]; then
METADATA=$(crossref "$TARGET") && echo "$METADATA" & # background for speed, but print it out mostly-atomically to avoid being mangled & impeding copy-paste of the annotation metadata
compressPdf "$TARGET";
chmod a+r "$TARGET";
fi
(git add "$TARGET" &)
# TODO: add back in `--mkpath`
(rsync --chmod='a+r' -q "$TARGET" gwern@176.9.41.242:"/home/gwern/gwern.net/$TARGET_DIR/" || \
rsync --chmod='a+r' -v "$TARGET" gwern@176.9.41.242:"/home/gwern/gwern.net/$TARGET_DIR/"
URL="https://gwern.net/$TARGET_DIR/$(basename "$FILE")"
cloudflare-expire "$TARGET_DIR/$(basename "$FILE")"
echo ""
echo "/$TARGET $URL"

$WWW_BROWSER "$URL") &

else echo ~/wiki/"$TARGET" " already exists"
fi
else echo "First argument $FILENAME is not a file?"
return 1
fi
fi
dir="" # or set to default directory
files=("$@")
fi

pwd
for file in "${files[@]}"; do
_upload "$file" "$dir"
done

wait; pwd

0 comments on commit 399642b

Please sign in to comment.