Large diffs are not rendered by default.

@@ -134,34 +134,46 @@ fetch_detect() {
return $FETCH_RESULT
}

# AVOID CHANGING THIS FUNCTION IF AT ALL POSSIBLE.
# THINGS WILL BREAK IN EVERYTHING IF THIS ONE BREAKS.

fetch() {
if [ $_FETCHTOOL = 1 ]; then

_CMD="$FETCH_CMD \"$1\""
if [ "$2" = "" ]; then
_FILE="$(basename "$1")"
_CMD="$_CMD -O $(basename "$1")"
elif [ "$2" = "-" ]; then
_CMD="$_CMD -O -"
else
_FILE="$2"
_CMD="$_CMD -O \"$2\""
fi
_CMD="$_CMD -O $_FILE"

elif [ $_FETCHTOOL = 2 ]; then

_CMD="$FETCH_CMD $1"
if [ "$2" = "" ]; then
_FILE="$(basename "$1")"
_CMD="$_CMD > $(basename "$1")"
elif [ "$2" = "-" ]; then
_CMD="$_CMD"
else
_FILE="$2"
_CMD="$_CMD > \"$2\""
fi
_CMD="$_CMD > $_FILE"

elif [ $_FETCHTOOL = 3 ]; then

_CMD="$FETCH_CMD $1"
if [ "$2" = "" ]; then
_FILE="$(basename "$1")"
_CMD="$_CMD -o $(basename "$1")"
elif [ "$2" = "-" ]; then
_CMD="$_CMD -o -"
else
_FILE="$2"
_CMD="$_CMD -o \"$2\""
fi
_CMD="$_CMD -o $_FILE"
fi

eval "$_CMD" 2>/dev/null

# echo -e "\n$_CMD"
eval " $_CMD" 2>/dev/null
FETCH_RESULT=$?

if [ ! $FETCH_RESULT = 0 ]; then
@@ -258,7 +270,7 @@ entity_to_char() {
-e "s/|/-/g"
}

function reverse_lines {
reverse_lines() {
readarray -t LINES
for (( I = ${#LINES[@]}; I; )); do
echo "${LINES[--I]}"
@@ -310,12 +322,7 @@ dl_batoto() {
# The URLs are not preloaded like the former, so the fetch one page done thing won't work.
# Unfortunately, short of grabbing pages until an image 404's, there's no way of knowing when we're done.

data="$(fetch "$1" tmp)"
folder="$(cat tmp | grep -C0 "<title>" | sed -e "s/^[[:space:]]*<title>//" -e "s/ Page .*//" -e "s/^[[:space:]]*//" -e "s/[[:space:]]*$/\n/" | entity_to_char)"

rm tmp

echo "$folder"
folder="$(fetch "$1" "-" | grep -C0 "<title>" | sed -e "s/^[[:space:]]*<title>//" -e "s/ Page .*//" -e "s/^[[:space:]]*//" -e "s/[[:space:]]*$/\n/" | entity_to_char)"

mkdir -p "$folder"
cd "$folder"
@@ -324,6 +331,11 @@ dl_batoto() {
PAGES=0
RET=0

base="$1"
if [ ! "${base:${#base}-1}" = "/" ]; then
base="${base}/"
fi

echo -n "[Batoto] Downloading '$folder' "

while [ "$RET" = "0" ]; do
@@ -332,7 +344,8 @@ dl_batoto() {

# On batoto, two slashes is a syntax error as of Jun 13, 2015.

fetch "${1}${CUR}" "$CUR.htm"
# We also need to fetch to a file here unfortunately, because possible stupidity.
fetch "${base}${CUR}" "$CUR.htm"

# Batoto sometimes gives out gunzips. We need to account for that... =_=

@@ -345,8 +358,10 @@ dl_batoto() {

img="$(grep -C0 'img\.src = ' $CUR.htm | sed -e 's/^[[:space:]]*img\.src = \"//g' -e "s/\";[[:space:]]*$//g")"

ext="${img##*.}"

# If this 404's, fetch will return non-zero. Thus, loop breaks.
fetch "$img" "" "nowarn"
fetch "$img" "${CUR}_${folder}.${ext}"
RET=$?

rm $CUR.htm
@@ -474,18 +489,21 @@ dl_dynsc() {
# Now loop-de-loop. First, make a decent name. Dynasty always has
# a short-title at the end of the URL.

folder="`echo $1 | sed -re 's/^.+\///'`"
mkdir -p $folder
cd $folder
PAGEDATA="$(fetch "$1" "-")"

folder="$(echo "$PAGEDATA" | grep "<title>" | sed -e 's/<title>Dynasty Reader &raquo; //g' -e 's|</title>||g')"

mkdir -p "$folder"
cd "$folder"

PAGEDATA="$(fetch "$1" "-" | grep "var pages")"
PAGELIST="$(echo "$PAGEDATA" | grep "var pages")"

# This set of seds cuts up the pagelist in a manner
# that makes it identical to a bash array.
# So we're essentially modifying the webpage into a dl-script.
# Cool, eh?

PAGETMP="$(echo $PAGEDATA | sed -e "s/\"image\"\://g" -e "s/,\"name\"\:\"[[:alnum:]_-]*\"//g" -e "s/\}\]/\)/g" -e "s/{//g" -e "s/}//g" -e "s/;//g" -e "s/ //g" -e "s/varpages=\[/pages=\(/g" -e "s/,/ /g")"
PAGETMP="$(echo $PAGELIST | sed -e "s/\"image\"\://g" -e "s/,\"name\"\:\"[[:alnum:]_-]*\"//g" -e "s/\}\]/\)/g" -e "s/{//g" -e "s/}//g" -e "s/;//g" -e "s/ //g" -e "s/varpages=\[/pages=\(/g" -e "s/,/ /g")"

# One possible nasty. Spaces.
# sed -i "s/\%20/ /g" tmp.1
@@ -513,23 +531,14 @@ dl_dynsc() {
scrape_dynsc() {
echo -n "[DynastyScans] Scraping Chapters..."

fetch "$1" "scrape.htm"

grep 'class="name"' scrape.htm > batch.txtf

sed -i 's|^.*href="||g' batch.txtf
sed -i 's|" class=.*||g' batch.txtf
sed -i "s/^[[:space:]]*//" batch.txtf
sed -i "s/[[:space:]]*$//" batch.txtf

# URLS are local.
sed -i "s|^|http://dynasty-scans.com|g" batch.txtf

# Luckily, dynasty is one of the few sites that does ascending rather than descending order (txtf = forward, txtr = reversed)
cat batch.txtf >> batch.txt

# We've scraped a batch file from the URL list. Clean up.
rm scrape.htm batch.txtf
fetch "$1" "-" | \
grep 'class="name"' | \
sed -e 's|^.*href="||g' \
-e 's|" class=.*||g' \
-e "s/^[[:space:]]*//" \
-e "s/[[:space:]]*$//" \
-e "s|^|http://dynasty-scans.com|g" >> batch.txt

echo -e "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[DynastyScans] Scraped chapters to batch.txt. You can modify this, or pass it to autobatch."
}
@@ -631,7 +640,7 @@ dl_eh() {
}

scrape_eh() {
echo -n "[e-h] This isn't supported, considering there's really zero categorization here."
echo -e "[e-h] This isn't supported, considering there's really zero categorization here."
}
#!/bin/bash
# Copyright (C) 2015 Jon Feldman/@chaoskagami
@@ -658,7 +667,7 @@ fakku_state=1
fakku_filt=0

auto_fakku() {
if [ -n "`echo $1 | grep 'fakku.net/' | sed -e 's/^ *//' -e 's/[[:space:]]*$//'`" ]; then
if [ -n "$(echo $1 | grep 'fakku.net/' | sed -e 's/^ *//' -e 's/[[:space:]]*$//')" ]; then
# Fakku
return 1
fi
@@ -667,7 +676,7 @@ auto_fakku() {
}

dl_fakku() {
PAGES="$(fetch "$1/read" "-" | grep "window.params.thumbs =")"
PAGES="$(fetch "$1/read" "-" | grep "window.params.thumbs =" )"

# First. Escape fixups. Nuke escaped forward slashes
# Next. Reformat decl.
@@ -791,20 +800,13 @@ scrape_foolsl() {

echo -n "[Foolslide] Scraping Chapters..."

fetch "$1" tmp.htm

grep '<div class="title"><a href=' tmp.htm > batch.txtr

sed -i 's|<div class="title"><a href="||g' batch.txtr
sed -i 's|" title=.*||g' batch.txtr
sed -i "s/^[[:space:]]*//" batch.txtr
sed -i "s/[[:space:]]*$//" batch.txtr

# Lines are reverse order. tac.
# If whatever we're using has no tac, you're stuck with reverse order.
cat batch.txtr | reverse_lines >> batch.txt

rm tmp.htm batch.txtr
fetch "$1" "-" | \
grep '<div class="title"><a href=' \
sed -e 's|<div class="title"><a href="||g' \
-e 's|" title=.*||g' \
-e "s/^[[:space:]]*//" \
-e "s/[[:space:]]*$//" | \
reverse_lines >> batch.txt

echo -e "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[Foolslide] Scraped chapters to batch.txt. You can modify this, or pass it to autobatch."
}
@@ -827,7 +829,7 @@ scrape_foolsl() {
mpark_longname="MangaPark"
mpark_url="http://mangapark.me/"
# Broken
mpark_state=0
mpark_state=1
# No filter
mpark_filt=0

@@ -853,23 +855,19 @@ dl_mpark() {
mkdir -p "$folder"
cd "$folder"

DATA=$(fetch "$sitepage" "-" | grep 'a target="_blank"' - | sed 's|<em><a target="_blank" href=||g')

echo "$DATA" > tmp.1
declare -a DATA
DATA=$(fetch "$sitepage" "-" | grep 'target="_blank"' - | sed -e '1d' -e 's|^[[:space:]]*<a.*target="_blank" href=||g' -e "s/ title=.*$//" -e "s/\"//g"| tr '\n' ' ')

sed -i "s/ title=.*//" tmp.1
eval "pages=(`cat tmp.1 | tr '\n' ' '`)"
# eval "pages = ( $DATA )"

echo -n "[Mangapark] Downloading '$folder' "

CUR=0
for image in "${pages[@]}"; do
for image in ${DATA[@]}; do
fetch "$image"
spinner "$CUR"
CUR=$(( CUR + 1 ))
done

rm tmp.1

done_spin

BIN -951 Bytes (86%) dist/scangrab.shbz2
Binary file not shown.
BIN -89 Bytes (98%) dist/scangrab.shgz
Binary file not shown.
BIN +44 Bytes (100%) dist/scangrab.shxz
Binary file not shown.
@@ -43,12 +43,7 @@ dl_batoto() {
# The URLs are not preloaded like the former, so the fetch one page done thing won't work.
# Unfortunately, short of grabbing pages until an image 404's, there's no way of knowing when we're done.

data="$(fetch "$1" tmp)"
folder="$(cat tmp | grep -C0 "<title>" | sed -e "s/^[[:space:]]*<title>//" -e "s/ Page .*//" -e "s/^[[:space:]]*//" -e "s/[[:space:]]*$/\n/" | entity_to_char)"

rm tmp

echo "$folder"
folder="$(fetch "$1" "-" | grep -C0 "<title>" | sed -e "s/^[[:space:]]*<title>//" -e "s/ Page .*//" -e "s/^[[:space:]]*//" -e "s/[[:space:]]*$/\n/" | entity_to_char)"

mkdir -p "$folder"
cd "$folder"
@@ -57,6 +52,11 @@ dl_batoto() {
PAGES=0
RET=0

base="$1"
if [ ! "${base:${#base}-1}" = "/" ]; then
base="${base}/"
fi

echo -n "[Batoto] Downloading '$folder' "

while [ "$RET" = "0" ]; do
@@ -65,7 +65,8 @@ dl_batoto() {

# On batoto, two slashes is a syntax error as of Jun 13, 2015.

fetch "${1}${CUR}" "$CUR.htm"
# We also need to fetch to a file here unfortunately, because possible stupidity.
fetch "${base}${CUR}" "$CUR.htm"

# Batoto sometimes gives out gunzips. We need to account for that... =_=

@@ -78,8 +79,10 @@ dl_batoto() {

img="$(grep -C0 'img\.src = ' $CUR.htm | sed -e 's/^[[:space:]]*img\.src = \"//g' -e "s/\";[[:space:]]*$//g")"

ext="${img##*.}"

# If this 404's, fetch will return non-zero. Thus, loop breaks.
fetch "$img" "" "nowarn"
fetch "$img" "${CUR}_${folder}.${ext}"
RET=$?

rm $CUR.htm
@@ -34,18 +34,21 @@ dl_dynsc() {
# Now loop-de-loop. First, make a decent name. Dynasty always has
# a short-title at the end of the URL.

folder="`echo $1 | sed -re 's/^.+\///'`"
mkdir -p $folder
cd $folder
PAGEDATA="$(fetch "$1" "-")"

PAGEDATA="$(fetch "$1" "-" | grep "var pages")"
folder="$(echo "$PAGEDATA" | grep "<title>" | sed -e 's/<title>Dynasty Reader &raquo; //g' -e 's|</title>||g')"

mkdir -p "$folder"
cd "$folder"

PAGELIST="$(echo "$PAGEDATA" | grep "var pages")"

# This set of seds cuts up the pagelist in a manner
# that makes it identical to a bash array.
# So we're essentially modifying the webpage into a dl-script.
# Cool, eh?

PAGETMP="$(echo $PAGEDATA | sed -e "s/\"image\"\://g" -e "s/,\"name\"\:\"[[:alnum:]_-]*\"//g" -e "s/\}\]/\)/g" -e "s/{//g" -e "s/}//g" -e "s/;//g" -e "s/ //g" -e "s/varpages=\[/pages=\(/g" -e "s/,/ /g")"
PAGETMP="$(echo $PAGELIST | sed -e "s/\"image\"\://g" -e "s/,\"name\"\:\"[[:alnum:]_-]*\"//g" -e "s/\}\]/\)/g" -e "s/{//g" -e "s/}//g" -e "s/;//g" -e "s/ //g" -e "s/varpages=\[/pages=\(/g" -e "s/,/ /g")"

# One possible nasty. Spaces.
# sed -i "s/\%20/ /g" tmp.1
@@ -73,23 +76,14 @@ dl_dynsc() {
scrape_dynsc() {
echo -n "[DynastyScans] Scraping Chapters..."

fetch "$1" "scrape.htm"

grep 'class="name"' scrape.htm > batch.txtf

sed -i 's|^.*href="||g' batch.txtf
sed -i 's|" class=.*||g' batch.txtf
sed -i "s/^[[:space:]]*//" batch.txtf
sed -i "s/[[:space:]]*$//" batch.txtf

# URLS are local.
sed -i "s|^|http://dynasty-scans.com|g" batch.txtf

# Luckily, dynasty is one of the few sites that does ascending rather than descending order (txtf = forward, txtr = reversed)
cat batch.txtf >> batch.txt

# We've scraped a batch file from the URL list. Clean up.
rm scrape.htm batch.txtf
fetch "$1" "-" | \
grep 'class="name"' | \
sed -e 's|^.*href="||g' \
-e 's|" class=.*||g' \
-e "s/^[[:space:]]*//" \
-e "s/[[:space:]]*$//" \
-e "s|^|http://dynasty-scans.com|g" >> batch.txt

echo -e "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[DynastyScans] Scraped chapters to batch.txt. You can modify this, or pass it to autobatch."
}
@@ -96,5 +96,5 @@ dl_eh() {
}

scrape_eh() {
echo -n "[e-h] This isn't supported, considering there's really zero categorization here."
echo -e "[e-h] This isn't supported, considering there's really zero categorization here."
}
@@ -23,7 +23,7 @@ fakku_state=1
fakku_filt=0

auto_fakku() {
if [ -n "`echo $1 | grep 'fakku.net/' | sed -e 's/^ *//' -e 's/[[:space:]]*$//'`" ]; then
if [ -n "$(echo $1 | grep 'fakku.net/' | sed -e 's/^ *//' -e 's/[[:space:]]*$//')" ]; then
# Fakku
return 1
fi
@@ -32,7 +32,7 @@ auto_fakku() {
}

dl_fakku() {
PAGES="$(fetch "$1/read" "-" | grep "window.params.thumbs =")"
PAGES="$(fetch "$1/read" "-" | grep "window.params.thumbs =" )"

# First. Escape fixups. Nuke escaped forward slashes
# Next. Reformat decl.
@@ -60,20 +60,13 @@ scrape_foolsl() {

echo -n "[Foolslide] Scraping Chapters..."

fetch "$1" tmp.htm

grep '<div class="title"><a href=' tmp.htm > batch.txtr

sed -i 's|<div class="title"><a href="||g' batch.txtr
sed -i 's|" title=.*||g' batch.txtr
sed -i "s/^[[:space:]]*//" batch.txtr
sed -i "s/[[:space:]]*$//" batch.txtr

# Lines are reverse order. tac.
# If whatever we're using has no tac, you're stuck with reverse order.
cat batch.txtr | reverse_lines >> batch.txt

rm tmp.htm batch.txtr
fetch "$1" "-" | \
grep '<div class="title"><a href=' \
sed -e 's|<div class="title"><a href="||g' \
-e 's|" title=.*||g' \
-e "s/^[[:space:]]*//" \
-e "s/[[:space:]]*$//" | \
reverse_lines >> batch.txt

echo -e "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[Foolslide] Scraped chapters to batch.txt. You can modify this, or pass it to autobatch."
}
@@ -17,7 +17,7 @@
mpark_longname="MangaPark"
mpark_url="http://mangapark.me/"
# Broken
mpark_state=0
mpark_state=1
# No filter
mpark_filt=0

@@ -43,23 +43,19 @@ dl_mpark() {
mkdir -p "$folder"
cd "$folder"

DATA=$(fetch "$sitepage" "-" | grep 'a target="_blank"' - | sed 's|<em><a target="_blank" href=||g')

echo "$DATA" > tmp.1
declare -a DATA
DATA=$(fetch "$sitepage" "-" | grep 'target="_blank"' - | sed -e '1d' -e 's|^[[:space:]]*<a.*target="_blank" href=||g' -e "s/ title=.*$//" -e "s/\"//g"| tr '\n' ' ')

sed -i "s/ title=.*//" tmp.1
eval "pages=(`cat tmp.1 | tr '\n' ' '`)"
# eval "pages = ( $DATA )"

echo -n "[Mangapark] Downloading '$folder' "

CUR=0
for image in "${pages[@]}"; do
for image in ${DATA[@]}; do
fetch "$image"
spinner "$CUR"
CUR=$(( CUR + 1 ))
done

rm tmp.1

done_spin

@@ -134,34 +134,46 @@ fetch_detect() {
return $FETCH_RESULT
}

# AVOID CHANGING THIS FUNCTION IF AT ALL POSSIBLE.
# THINGS WILL BREAK IN EVERYTHING IF THIS ONE BREAKS.

fetch() {
if [ $_FETCHTOOL = 1 ]; then

_CMD="$FETCH_CMD \"$1\""
if [ "$2" = "" ]; then
_FILE="$(basename "$1")"
_CMD="$_CMD -O $(basename "$1")"
elif [ "$2" = "-" ]; then
_CMD="$_CMD -O -"
else
_FILE="$2"
_CMD="$_CMD -O \"$2\""
fi
_CMD="$_CMD -O $_FILE"

elif [ $_FETCHTOOL = 2 ]; then

_CMD="$FETCH_CMD $1"
if [ "$2" = "" ]; then
_FILE="$(basename "$1")"
_CMD="$_CMD > $(basename "$1")"
elif [ "$2" = "-" ]; then
_CMD="$_CMD"
else
_FILE="$2"
_CMD="$_CMD > \"$2\""
fi
_CMD="$_CMD > $_FILE"

elif [ $_FETCHTOOL = 3 ]; then

_CMD="$FETCH_CMD $1"
if [ "$2" = "" ]; then
_FILE="$(basename "$1")"
_CMD="$_CMD -o $(basename "$1")"
elif [ "$2" = "-" ]; then
_CMD="$_CMD -o -"
else
_FILE="$2"
_CMD="$_CMD -o \"$2\""
fi
_CMD="$_CMD -o $_FILE"
fi

eval "$_CMD" 2>/dev/null

# echo -e "\n$_CMD"
eval " $_CMD" 2>/dev/null
FETCH_RESULT=$?

if [ ! $FETCH_RESULT = 0 ]; then
@@ -258,7 +270,7 @@ entity_to_char() {
-e "s/|/-/g"
}

function reverse_lines {
reverse_lines() {
readarray -t LINES
for (( I = ${#LINES[@]}; I; )); do
echo "${LINES[--I]}"

Large diffs are not rendered by default.

@@ -18,7 +18,7 @@
# This file automatically fetches some 'known' files. In case of an api change, the files should
# be invalid.

(cd ../src && ./merge ../test/scangrab)
(cd ../src && COMPRESS=0 MINIFY=0 ./merge ../test/scangrab)


# We try to use SFW stuff for NSFW scrapers, fyi.