From d92e336263da3f0f2a58dfc24cb9b5f23449cc5c Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Thu, 25 Jun 2015 16:56:29 +0100 Subject: [PATCH 01/16] Initial work on refactoring tdbloader2 scripts (JENA-977) - Better option processing - Split tdbloader2worker into a data and index phase script - Support only running a specific phase --- apache-jena/bin/tdbloader2 | 72 ++++++++++++++- apache-jena/bin/tdbloader2data | 107 ++++++++++++++++++++++ apache-jena/bin/tdbloader2index | 155 ++++++++++++++++++++++++++++++++ 3 files changed, 333 insertions(+), 1 deletion(-) create mode 100755 apache-jena/bin/tdbloader2data create mode 100755 apache-jena/bin/tdbloader2index diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index c0810741d8a..37cc874003a 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -48,6 +48,7 @@ case "$(uname)" in esac export JENA_CP +echo $JENA_CP if [ -z "$SORT_ARGS" ] then SORT_ARGS="--buffer-size=50%" @@ -58,4 +59,73 @@ then fi export SORT_ARGS -exec "$JENA_HOME/bin/tdbloader2worker" "$@" +# Process arguments +LOC= +PHASE= + +while [ $# -gt 0 ] +do + ARG=$1 + case "$ARG" in + --loc|-loc) + # Location space separated + shift + LOC="$1" + shift + ;; + -*loc=*) + # Location = separated + LOC=${ARG/-*loc=/} + shift + ;; + --phase) + # Phase space separated + shift + PHASE="$1" + shift + ;; + *) + # Once we see an unrecognized argument treat as start of files to process + break + ;; + esac +done + +if [ -z "$PHASE" ]; then + PHASE="all" +fi + +echo "Location is '$LOC'" +echo "Phase is '$PHASE'" + +log() { echo " $(date $DATE)" "$@" ; } + +#DATE="+%Y-%m-%dT%H:%M:%S%:z" +DATE="+%H:%M:%S" + +# ---- Start +log "-- TDB Bulk Loader Start" +TIME1="$(date +%s)" + +case "$PHASE" in + all) + exec "$JENA_HOME/bin/tdbloader2data" --loc "$LOC" "$@" + exec "$JENA_HOME/bin/tdbloader2index" --loc "$LOC" + ;; + data) + exec "$JENA_HOME/bin/tdbloader2data" --loc "$LOC" "$@" + ;; + index) + exec "$JENA_HOME/bin/tdbloader2index" --loc "$LOC" + ;; + *) + echo "Unrecognized phase $PHASE" 1>&2 + exit 1 + ;; +esac + +# ---- End +TIME2="$(date +%s)" +log "-- TDB Bulk Loader Finish" +ELAPSED=$(($TIME2-$TIME1)) +log "-- $ELAPSED seconds" \ No newline at end of file diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data new file mode 100755 index 00000000000..90200e4ceed --- /dev/null +++ b/apache-jena/bin/tdbloader2data @@ -0,0 +1,107 @@ +#!/usr/bin/env bash + +## Licensed to the Apache Software Foundation (ASF) under one +## or more contributor license agreements. See the NOTICE file +## distributed with this work for additional information +## regarding copyright ownership. The ASF licenses this file +## to you under the Apache License, Version 2.0 (the +## "License"); you may not use this file except in compliance +## with the License. You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + +# The environment for this sub-script is setup by "tdbloader2" + +# Exit on error. +set -e + +# Sort order is ASCII +export LC_ALL="C" + +log() { echo " $(date $DATE)" "$@" ; } + +#DATE="+%Y-%m-%dT%H:%M:%S%:z" +DATE="+%H:%M:%S" + +## JVM Arguments +JVM_ARGS=${JVM_ARGS:--Xmx1200M} + +# Classpath set in "tdbloader2" +if [ -z "$JENA_CP" ] +then + echo "Classpath not provided : set JENA_CP" 1>&2 + exit 1 +fi + +USAGE="Usage: tdbloader2data --loc location datafile ..." +PKG=org.apache.jena.tdb.store.bulkloader2 + +while [ $# -gt 0 ] +do + ARG=$1 + case "$ARG" in + --loc|-loc) + # Location space separated + shift + LOC="$1" + shift + ;; + -*loc=*) + # Location = separated + LOC=${ARG/-*loc=/} + shift + ;; + --help) + echo $USAGE + exit 0 + ;; + *) + # Any further arguments are treated as data files + break + ;; + esac +done + +# Verify arguments +if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi +if [ $# = 0 ]; then echo "No data files specified" ; exit 1 ; fi + +# Look for any index and data files in the directory. +# Skip a possible configuration file +if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)" +then + echo "Location is not empty: $LOC" + exit 1 +fi + +if [ ! -e "$LOC" ] ; then + # If non-existent try to create + mkdir "$LOC" + if [ $? != 0 ]; then + echo "Failed to create new directory: $LOC" + exit 1 + fi +fi +if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi + +FILES="$@" +## Stdin? +KEEPWORKFILES="${KEEPWORKFILES:-}" + +# ---- Data loading phase +log "Data Load Phase" +# Produce nodes file and triples/quads text file. + +DATA_TRIPLES="$LOC/data-triples.tmp" +DATA_QUADS="$LOC/data-quads.tmp" + +java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \ + "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES + +log "Data Load Phase Completed" diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index new file mode 100755 index 00000000000..372aa5ce20f --- /dev/null +++ b/apache-jena/bin/tdbloader2index @@ -0,0 +1,155 @@ +#!/usr/bin/env bash + +## Licensed to the Apache Software Foundation (ASF) under one +## or more contributor license agreements. See the NOTICE file +## distributed with this work for additional information +## regarding copyright ownership. The ASF licenses this file +## to you under the Apache License, Version 2.0 (the +## "License"); you may not use this file except in compliance +## with the License. You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + +# The environment for this sub-script is setup by "tdbloader2" + +# Exit on error. +set -e + +# Sort order is ASCII +export LC_ALL="C" + +log() { echo " $(date $DATE)" "$@" ; } + +TMP=$$ +#DATE="+%Y-%m-%dT%H:%M:%S%:z" +DATE="+%H:%M:%S" + +##--parallel is not always available. +SORT_ARGS="${SORT_ARGS:---buffer-size=50%}" +JVM_ARGS=${JVM_ARGS:--Xmx1200M} + +# Classpath set in "tdbloader2" +if [ -z "$JENA_CP" ] +then + echo "Classpath not provided : set JENA_CP" 1>&2 + exit 1 +fi + +USAGE="Usage: tdbloader2index --loc location" +PKG=org.apache.jena.tdb.store.bulkloader2 + +while [ $# -gt 0 ] +do + ARG=$1 + case "$ARG" in + --loc|-loc) + # Location space separated + shift + LOC="$1" + shift + ;; + -*loc=*) + # Location = separated + LOC=${ARG/-*loc=/} + shift + ;; + --help) + echo $USAGE + exit 0 + ;; + *) + # Any further arguments are ignored + break + ;; + esac +done + +# Verify arguments +if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi +if [ ! -e "$LOC" ] ; then echo "Location specified does not exist: $LOC" ; exit 1; fi +if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi + +KEEPWORKFILES="${KEEPWORKFILES:-}" + +DATA_TRIPLES="$LOC/data-triples.tmp" +DATA_QUADS="$LOC/data-quads.tmp" + +# ---- Index intermediates +## All files are writtern S P O / G S P O columns per row but in different sort orders. +log "Index Building Phase" + +which pv >/dev/null 2>&1 +HAS_PV=$? + +process_rows() +{ + local KEYS="$1" + local DATA="$2" + local IDX=$3 + local WORK="$LOC/$IDX-txt" + + if [ ! -s "$DATA" ] + then + return + fi + + log "Creating Index $IDX" + log " Sort $IDX" + if [ $HAS_PV = 0 ]; then + # Use pv (pipe viewer) to monitor sort progress + # Note that progress data will only be seen if running in the foreground + SIZE=$(du -k "$DATA" | cut -f 1) + pv -c -N data < "$DATA" | sort $SORT_ARGS -u $KEYS | pv -c -N sort -s $SIZE > $WORK + else + # Use sort without any progress monitoring + sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK + fi + log " Sort $IDX Completed" + log " Build $IDX" + rm -f "$LOC/$IDX.dat" + rm -f "$LOC/$IDX.idn" + java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK" + log " Build $IDX Completed" + # Remove intermediary file. + if [ "$KEEPWORKFILES" != "yes" ] + then + rm "$WORK" + fi +} + +K1="-k 1,1" +K2="-k 2,2" +K3="-k 3,3" +K4="-k 4,4" + +process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO + +process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS + +process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP + +process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO + +process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS + +process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP + +process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG + +process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG + +process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG + +log "Index Building Phase Completed" + +# ---- Clean up. +if [ "$KEEPWORKFILES" != "yes" ] +then + rm -f "$DATA_TRIPLES" "$DATA_QUADS" +fi From 7b61a144854d81acbd180b5debfd5c8638d2af57 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Thu, 25 Jun 2015 17:04:36 +0100 Subject: [PATCH 02/16] Further tweak new tdbloader2 scripts (JENA-977) - Add proper usage to tdbloader2 - Check for temporary data files needed for index phase in tdbloader2index --- apache-jena/bin/tdbloader2 | 35 ++++++++++++++++++++++++++++++--- apache-jena/bin/tdbloader2index | 9 +++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index 37cc874003a..34ee0295e23 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -15,6 +15,30 @@ ## See the License for the specific language governing permissions and ## limitations under the License. +function printUsage() { + cat << EOF +Usage: tdbloader2 + +Options are as follows: + + --help + Prints this help summary and exits + + --loc + Sets the location in which the database should be created + + --phase + Sets the phase of the build to run, supported values are: + + all Full bulk load + data Data phase only + index Index phase only, requires the data phase to previously have been run + + When not specified defaults to all + +EOF +} + # If JENA_HOME is empty if [ -z "$JENA_HOME" ] then @@ -48,7 +72,7 @@ case "$(uname)" in esac export JENA_CP -echo $JENA_CP +#echo $JENA_CP if [ -z "$SORT_ARGS" ] then SORT_ARGS="--buffer-size=50%" @@ -84,6 +108,11 @@ do PHASE="$1" shift ;; + --help) + # Help + printUsage + exit 0 + ;; *) # Once we see an unrecognized argument treat as start of files to process break @@ -95,8 +124,8 @@ if [ -z "$PHASE" ]; then PHASE="all" fi -echo "Location is '$LOC'" -echo "Phase is '$PHASE'" +#echo "Location is '$LOC'" +#echo "Phase is '$PHASE'" log() { echo " $(date $DATE)" "$@" ; } diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index index 372aa5ce20f..5624854ce6b 100755 --- a/apache-jena/bin/tdbloader2index +++ b/apache-jena/bin/tdbloader2index @@ -80,6 +80,15 @@ KEEPWORKFILES="${KEEPWORKFILES:-}" DATA_TRIPLES="$LOC/data-triples.tmp" DATA_QUADS="$LOC/data-quads.tmp" +if [ ! -e "$DATA_TRIPLES" ] ; then + echo "No triples data file found in location, please run the tdbloader2data script first" + exit 1 +fi +if [ ! -e "$DATA_QUADS" ]; then + echo "No quads data file found in location, please run the tdbloader2data script first" + exit 1 +fi + # ---- Index intermediates ## All files are writtern S P O / G S P O columns per row but in different sort orders. log "Index Building Phase" From a96b0164c43142791ac030e5332b3f54df6fb4ba Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Fri, 26 Jun 2015 12:25:57 +0100 Subject: [PATCH 03/16] Further refactoring of tdbloader2 scripts (JENA-977) - Proper usage summaries in all scripts - -k/--keep-work option instead of hidden environment variable for keeping work - Short forms for all options --- apache-jena/bin/tdbloader2 | 72 +++++++++++++++----- apache-jena/bin/tdbloader2data | 82 +++++++++++++++++----- apache-jena/bin/tdbloader2index | 116 +++++++++++++++++++++++--------- 3 files changed, 204 insertions(+), 66 deletions(-) diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index 34ee0295e23..9ff27273d2a 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -17,24 +17,53 @@ function printUsage() { cat << EOF -Usage: tdbloader2 +tdbloader2 - TDB Bulk Loader -Options are as follows: +Usage: tdbloader2 --loc [Options] ... +Bulk loader for TDB which manipulates the data files directly and so +can only be used to create new databases. This command relies on +POSIX utilities so will only work on POSIX operating systems. + +If you wish to bulk load to an existing database please use tdbloader +instead. + +Required options are as follows: + + -l + --loc + Sets the location in which the database should be created. + + This location must be a directory and must be empty, if a + non-existent path is specified it will be created as a new + directory. + + + Specifies the path to one/more data files to load + +Common additional options are as follows: + + -h --help Prints this help summary and exits - --loc - Sets the location in which the database should be created +Advanced additional options are as follows: + -k + --keep-work + Keeps the temporary work files around after they are no longer + needed. May be useful for debugging. + + -p --phase Sets the phase of the build to run, supported values are: - all Full bulk load - data Data phase only - index Index phase only, requires the data phase to previously have been run + all Full bulk load + data Data phase only + index Index phase only, requires the data phase to + previously have been run - When not specified defaults to all + When no phase is specified it defaults to all EOF } @@ -86,12 +115,18 @@ export SORT_ARGS # Process arguments LOC= PHASE= +KEEP_WORK=0 while [ $# -gt 0 ] do ARG=$1 case "$ARG" in - --loc|-loc) + -k|--keep-work) + # Keep work files + shift + KEEP_WORK=1 + ;; + -l|--loc|-loc) # Location space separated shift LOC="$1" @@ -102,13 +137,13 @@ do LOC=${ARG/-*loc=/} shift ;; - --phase) + -p|--phase) # Phase space separated shift PHASE="$1" shift ;; - --help) + -h|--help) # Help printUsage exit 0 @@ -123,9 +158,10 @@ done if [ -z "$PHASE" ]; then PHASE="all" fi - -#echo "Location is '$LOC'" -#echo "Phase is '$PHASE'" +COMMON_ARGS= +if [ $KEEP_WORK = 0 ]; then + COMMON_ARGS="--keep-work" +fi log() { echo " $(date $DATE)" "$@" ; } @@ -138,14 +174,14 @@ TIME1="$(date +%s)" case "$PHASE" in all) - exec "$JENA_HOME/bin/tdbloader2data" --loc "$LOC" "$@" - exec "$JENA_HOME/bin/tdbloader2index" --loc "$LOC" + exec "$JENA_HOME/bin/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" + exec "$JENA_HOME/bin/tdbloader2index" $COMMON_ARGS --loc "$LOC" ;; data) - exec "$JENA_HOME/bin/tdbloader2data" --loc "$LOC" "$@" + exec "$JENA_HOME/bin/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" ;; index) - exec "$JENA_HOME/bin/tdbloader2index" --loc "$LOC" + exec "$JENA_HOME/bin/tdbloader2index" $COMMON_ARGS --loc "$LOC" ;; *) echo "Unrecognized phase $PHASE" 1>&2 diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data index 90200e4ceed..5aceb273021 100755 --- a/apache-jena/bin/tdbloader2data +++ b/apache-jena/bin/tdbloader2data @@ -18,6 +18,48 @@ # The environment for this sub-script is setup by "tdbloader2" +function printUsage() { + cat << EOF +tdbloader2data - TDB Bulk Loader - Data Phase + +Usage tdbloader2data --loc [Options] ... + +Bulk Loader for TDB which generates the Node Table. This command +relies on POSIX utilities so will only work on POSIX operating +systems. + +This command can only be used to create new database. If you wish to +bulk load to an existing database please use tdbloader instead. + +Required options are as follows: + + -l + --loc + Sets the location in which the database should be created. + + This location must be a directory and must be empty, if a + non-existent path is specified it will be created as a new + directory. + + + Specifies the path to one/more data files to load + +Common additional options are as follows: + + -h + --help + Prints this help summary and exits + +Advanced additional options are as follows: + + -k + --keep-work + Keeps the temporary work files around after they are no longer + needed. May be useful for debugging. + +EOF +} + # Exit on error. set -e @@ -29,24 +71,24 @@ log() { echo " $(date $DATE)" "$@" ; } #DATE="+%Y-%m-%dT%H:%M:%S%:z" DATE="+%H:%M:%S" -## JVM Arguments -JVM_ARGS=${JVM_ARGS:--Xmx1200M} - -# Classpath set in "tdbloader2" -if [ -z "$JENA_CP" ] -then - echo "Classpath not provided : set JENA_CP" 1>&2 - exit 1 -fi - -USAGE="Usage: tdbloader2data --loc location datafile ..." PKG=org.apache.jena.tdb.store.bulkloader2 +# Process Arguments +LOC= +KEEP_WORK=0 + while [ $# -gt 0 ] do ARG=$1 case "$ARG" in - --loc|-loc) + -k|--keep-work) + # Keep work files + # This option is actually not used by this script but may be passed in + # by the parent tdbloader2 script + shift + KEEP_WORK=1 + ;; + -l|--loc|-loc) # Location space separated shift LOC="$1" @@ -57,8 +99,8 @@ do LOC=${ARG/-*loc=/} shift ;; - --help) - echo $USAGE + -h|--help) + printUsage exit 0 ;; *) @@ -91,8 +133,16 @@ fi if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi FILES="$@" -## Stdin? -KEEPWORKFILES="${KEEPWORKFILES:-}" + +## JVM Arguments +JVM_ARGS=${JVM_ARGS:--Xmx1200M} + +# Classpath set in "tdbloader2" +if [ -z "$JENA_CP" ] +then + echo "Classpath not provided : set JENA_CP" 1>&2 + exit 1 +fi # ---- Data loading phase log "Data Load Phase" diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index index 5624854ce6b..2730af1f2ce 100755 --- a/apache-jena/bin/tdbloader2index +++ b/apache-jena/bin/tdbloader2index @@ -18,6 +18,45 @@ # The environment for this sub-script is setup by "tdbloader2" +function printUsage() { + cat << EOF +tdbloader2index - TDB Bulk Loader - Index Phase + +Usage: tdbloader2index --loc [Options] + +Bulk Loader for TDB which generates the Index files based upon the +temporary data files generated by tdbloader2data. This command relies +on POSIX utilities so will only work on POSIX operating systems. + +This command can only be used to create new database. If you wish to +bulk load to an existing database please use tdbloader instead. + +Required options are as follows: + + -l + --loc + Sets the location in which the database should be created. + + This location must be a directory and must be empty, if a + non-existent path is specified it will be created as a new + directory. + +Common additional options are as follows: + + -h + --help + Prints this help summary and exits + +Advanced additional options are as follows: + + -k + --keep-work + Keeps the temporary work files around after they are no longer + needed. May be useful for debugging. + +EOF +} + # Exit on error. set -e @@ -30,25 +69,22 @@ TMP=$$ #DATE="+%Y-%m-%dT%H:%M:%S%:z" DATE="+%H:%M:%S" -##--parallel is not always available. -SORT_ARGS="${SORT_ARGS:---buffer-size=50%}" -JVM_ARGS=${JVM_ARGS:--Xmx1200M} - -# Classpath set in "tdbloader2" -if [ -z "$JENA_CP" ] -then - echo "Classpath not provided : set JENA_CP" 1>&2 - exit 1 -fi - -USAGE="Usage: tdbloader2index --loc location" PKG=org.apache.jena.tdb.store.bulkloader2 +# Process Arguments +LOC= +KEEP_WORK=0 + while [ $# -gt 0 ] do ARG=$1 case "$ARG" in - --loc|-loc) + -k|--keep-work) + # Keep work files + shift + KEEP_WORK=1 + ;; + -l|--loc|-loc) # Location space separated shift LOC="$1" @@ -59,8 +95,8 @@ do LOC=${ARG/-*loc=/} shift ;; - --help) - echo $USAGE + -h|--help) + printUsage exit 0 ;; *) @@ -75,8 +111,6 @@ if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi if [ ! -e "$LOC" ] ; then echo "Location specified does not exist: $LOC" ; exit 1; fi if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi -KEEPWORKFILES="${KEEPWORKFILES:-}" - DATA_TRIPLES="$LOC/data-triples.tmp" DATA_QUADS="$LOC/data-quads.tmp" @@ -89,14 +123,29 @@ if [ ! -e "$DATA_QUADS" ]; then exit 1 fi +##--parallel is not always available. +SORT_ARGS="${SORT_ARGS:---buffer-size=50%}" +JVM_ARGS=${JVM_ARGS:--Xmx1200M} + +# Classpath set in "tdbloader2" +if [ -z "$JENA_CP" ] +then + echo "Classpath not provided : set JENA_CP" 1>&2 + exit 1 +fi + # ---- Index intermediates ## All files are writtern S P O / G S P O columns per row but in different sort orders. log "Index Building Phase" +# Check whether Pipe Viewer is available +# Needs to temporarily disable exit on error +set +e which pv >/dev/null 2>&1 HAS_PV=$? +set -e -process_rows() +generate_index() { local KEYS="$1" local DATA="$2" @@ -109,6 +158,8 @@ process_rows() fi log "Creating Index $IDX" + + # Sort the input data log " Sort $IDX" if [ $HAS_PV = 0 ]; then # Use pv (pipe viewer) to monitor sort progress @@ -120,14 +171,16 @@ process_rows() sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK fi log " Sort $IDX Completed" + + # Build into an index log " Build $IDX" rm -f "$LOC/$IDX.dat" rm -f "$LOC/$IDX.idn" java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK" log " Build $IDX Completed" - # Remove intermediary file. - if [ "$KEEPWORKFILES" != "yes" ] - then + + # Remove work file unless keeping + if [ $KEEP_WORK = 1 ]; then rm "$WORK" fi } @@ -137,28 +190,27 @@ K2="-k 2,2" K3="-k 3,3" K4="-k 4,4" -process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO +generate_index "$K1 $K2 $K3" "$DATA_TRIPLES" SPO -process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS +generate_index "$K2 $K3 $K1" "$DATA_TRIPLES" POS -process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP +generate_index "$K3 $K1 $K2" "$DATA_TRIPLES" OSP -process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO +generate_index "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO -process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS +generate_index "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS -process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP +generate_index "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP -process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG +generate_index "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG -process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG +generate_index "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG -process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG +generate_index "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG log "Index Building Phase Completed" # ---- Clean up. -if [ "$KEEPWORKFILES" != "yes" ] -then +if [ $KEEP_WORK = 1 ]; then rm -f "$DATA_TRIPLES" "$DATA_QUADS" fi From 7770596bc94613409fe2753240b603ae22a38b57 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Fri, 26 Jun 2015 16:15:18 +0100 Subject: [PATCH 04/16] Various further improvements to the scripts (JENA-977) - Validate sort temporary directory when indexing and WARN if the disk it is on is low on space (10% or less free) - Support --debug and --trace flags in all scripts, add various debug output throughout scripts - Fix a bug with not detecting sort failure when pv is used to monitor progress - Fix a bug in size calculations used for progress monitoring and sort failure detection This commit includes some temporary DEV changes that will be reverted later --- apache-jena/bin/tdbloader2 | 59 +++++++++++---- apache-jena/bin/tdbloader2data | 43 +++++++++-- apache-jena/bin/tdbloader2index | 126 ++++++++++++++++++++++++++++---- 3 files changed, 192 insertions(+), 36 deletions(-) diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index 9ff27273d2a..9508031f65e 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -49,6 +49,10 @@ Common additional options are as follows: Advanced additional options are as follows: + -d + --debug + Enable debug mode, adds extra debug output + -k --keep-work Keeps the temporary work files around after they are no longer @@ -65,6 +69,10 @@ Advanced additional options are as follows: When no phase is specified it defaults to all + -t + --trace + Enable trace mode, essentially sets -x within the scripts + EOF } @@ -101,13 +109,12 @@ case "$(uname)" in esac export JENA_CP -#echo $JENA_CP -if [ -z "$SORT_ARGS" ] -then +# echo JENA_CP +if [ -z "$SORT_ARGS" ]; then SORT_ARGS="--buffer-size=50%" - if $(sort --parallel=3 < /dev/null 2>/dev/null) - then - SORT_ARGS="$SORT_ARGS --parallel=3" + sort --parallel=3 < /dev/null 2>/dev/null + if [ $? = 0 ]; then + SORT_ARGS="$SORT_ARGS --parallel=3" fi fi export SORT_ARGS @@ -116,11 +123,23 @@ export SORT_ARGS LOC= PHASE= KEEP_WORK=0 +DEBUG=0 +TRACE=0 while [ $# -gt 0 ] do ARG=$1 case "$ARG" in + -d|--debug) + # Debug Mode + shift + DEBUG=1 + ;; + -h|--help) + # Help + printUsage + exit 0 + ;; -k|--keep-work) # Keep work files shift @@ -143,10 +162,11 @@ do PHASE="$1" shift ;; - -h|--help) - # Help - printUsage - exit 0 + -t|--trace) + # Trace mode + shift + TRACE=1 + set -x ;; *) # Once we see an unrecognized argument treat as start of files to process @@ -159,9 +179,15 @@ if [ -z "$PHASE" ]; then PHASE="all" fi COMMON_ARGS= -if [ $KEEP_WORK = 0 ]; then +if [ $KEEP_WORK = 1 ]; then COMMON_ARGS="--keep-work" fi +if [ $DEBUG = 1 ]; then + COMMON_ARGS="$COMMON_ARGS --debug" +fi +if [ $TRACE = 1 ]; then + COMMON_ARGS="$COMMON_ARGS --trace" +fi log() { echo " $(date $DATE)" "$@" ; } @@ -172,16 +198,19 @@ DATE="+%H:%M:%S" log "-- TDB Bulk Loader Start" TIME1="$(date +%s)" +TOOL_DIR=$JENA_HOME/bin/ +# DEV - Following is just for debugging +TOOL_DIR= case "$PHASE" in all) - exec "$JENA_HOME/bin/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" - exec "$JENA_HOME/bin/tdbloader2index" $COMMON_ARGS --loc "$LOC" + exec "${TOOL_DIR}tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" + exec "${TOOL_DIR}tdbloader2index" $COMMON_ARGS --loc "$LOC" ;; data) - exec "$JENA_HOME/bin/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" + exec "${TOOL_DIR}tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" ;; index) - exec "$JENA_HOME/bin/tdbloader2index" $COMMON_ARGS --loc "$LOC" + exec "${TOOL_DIR}tdbloader2index" $COMMON_ARGS --loc "$LOC" ;; *) echo "Unrecognized phase $PHASE" 1>&2 diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data index 5aceb273021..efb590a2be4 100755 --- a/apache-jena/bin/tdbloader2data +++ b/apache-jena/bin/tdbloader2data @@ -52,35 +52,58 @@ Common additional options are as follows: Advanced additional options are as follows: + -d + --debug + Enable debug mode, adds extra debug output + -k --keep-work Keeps the temporary work files around after they are no longer needed. May be useful for debugging. + -t + --trace + Enable trace mode, essentially sets -x within the scripts + EOF } # Exit on error. set -e -# Sort order is ASCII -export LC_ALL="C" - log() { echo " $(date $DATE)" "$@" ; } +function debug() { + if [ $DEBUG = 1 ]; then + log "DEBUG" "$@" + fi +} + #DATE="+%Y-%m-%dT%H:%M:%S%:z" DATE="+%H:%M:%S" PKG=org.apache.jena.tdb.store.bulkloader2 +#DEV - Allows use against Jena 2 API +PKG=com.hp.hpl.jena.tdb.store.bulkloader2 # Process Arguments LOC= KEEP_WORK=0 +DEBUG=0 while [ $# -gt 0 ] do ARG=$1 case "$ARG" in + -d|--debug) + # Debug Mode + shift + DEBUG=1 + ;; + -h|--help) + printUsage + exit 0 + ;; -k|--keep-work) # Keep work files # This option is actually not used by this script but may be passed in @@ -99,9 +122,10 @@ do LOC=${ARG/-*loc=/} shift ;; - -h|--help) - printUsage - exit 0 + -t|--trace) + # Trace mode + shift + set -x ;; *) # Any further arguments are treated as data files @@ -124,18 +148,23 @@ fi if [ ! -e "$LOC" ] ; then # If non-existent try to create + debug "Trying to create new database directory: $LOC" mkdir "$LOC" if [ $? != 0 ]; then echo "Failed to create new directory: $LOC" exit 1 fi + debug "New database directory created: $LOC" fi if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi +# TODO Make LOC absolute + FILES="$@" ## JVM Arguments JVM_ARGS=${JVM_ARGS:--Xmx1200M} +debug "JVM Arguments are $JVM_ARGS" # Classpath set in "tdbloader2" if [ -z "$JENA_CP" ] @@ -151,6 +180,8 @@ log "Data Load Phase" DATA_TRIPLES="$LOC/data-triples.tmp" DATA_QUADS="$LOC/data-quads.tmp" +debug "Data files are $DATA_TRIPLES and $DATA_QUADS" + java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \ "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index index 2730af1f2ce..971b82426b8 100755 --- a/apache-jena/bin/tdbloader2index +++ b/apache-jena/bin/tdbloader2index @@ -49,11 +49,18 @@ Common additional options are as follows: Advanced additional options are as follows: + -d + --debug + Enable debug mode, adds extra debug output + -k --keep-work Keeps the temporary work files around after they are no longer needed. May be useful for debugging. + -t + --trace + Enable trace mode, essentially sets -x within the scripts EOF } @@ -65,20 +72,45 @@ export LC_ALL="C" log() { echo " $(date $DATE)" "$@" ; } -TMP=$$ +function debug() { + if [ $DEBUG = 1 ]; then + log "DEBUG" "$@" + fi +} + +function warn() { + log "WARN" "$@" +} + +function getSize() { + ls -l $1 | awk '{print $5}' +} + #DATE="+%Y-%m-%dT%H:%M:%S%:z" DATE="+%H:%M:%S" PKG=org.apache.jena.tdb.store.bulkloader2 +#DEV - Allows use against Jena 2 API +PKG=com.hp.hpl.jena.tdb.store.bulkloader2 # Process Arguments LOC= KEEP_WORK=0 +DEBUG=0 while [ $# -gt 0 ] do ARG=$1 case "$ARG" in + -d|--debug) + # Debug Mode + shift + DEBUG=1 + ;; + -h|--help) + printUsage + exit 0 + ;; -k|--keep-work) # Keep work files shift @@ -95,9 +127,10 @@ do LOC=${ARG/-*loc=/} shift ;; - -h|--help) - printUsage - exit 0 + -t|--trace) + # Trace mode + shift + set -x ;; *) # Any further arguments are ignored @@ -111,6 +144,8 @@ if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi if [ ! -e "$LOC" ] ; then echo "Location specified does not exist: $LOC" ; exit 1; fi if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi +# TODO Make LOC absolute + DATA_TRIPLES="$LOC/data-triples.tmp" DATA_QUADS="$LOC/data-quads.tmp" @@ -123,9 +158,12 @@ if [ ! -e "$DATA_QUADS" ]; then exit 1 fi +debug "Data files are $DATA_TRIPLES and $DATA_QUADS" + ##--parallel is not always available. SORT_ARGS="${SORT_ARGS:---buffer-size=50%}" JVM_ARGS=${JVM_ARGS:--Xmx1200M} +debug "JVM Arguments are $JVM_ARGS" # Classpath set in "tdbloader2" if [ -z "$JENA_CP" ] @@ -133,17 +171,57 @@ then echo "Classpath not provided : set JENA_CP" 1>&2 exit 1 fi +debug "Jena Classpath is $JENA_CP" # ---- Index intermediates ## All files are writtern S P O / G S P O columns per row but in different sort orders. log "Index Building Phase" # Check whether Pipe Viewer is available -# Needs to temporarily disable exit on error +# Needs to temporarily disable exit on error as which produces an error +# if the given command is not found set +e which pv >/dev/null 2>&1 HAS_PV=$? set -e +if [ $HAS_PV = 0 ]; then + debug "pv (Pipe Viewer) available on your system so sorts will show progres" +else + debug "No pv (Pipe Viewer) on your system so sorts will show no progress" +fi + +# Check where we are storing temporary sort files +debug "Sort Arguments: $SORT_ARGS" +SORT_TEMP_DIR= +if [[ "$SORT_ARGS" == *"-T "* ]]; then + # Specified via -T argument + SORT_TEMP_DIR=(${SORT_ARGS/-T /}) + SORT_TEMP_DIR=${SORT_TEMP_DIR[0]} +elif [[ "$SORT_ARGS" == *"--temporary-directory="* ]]; then + # Specified via --temporary-directory argument + SORT_TEMP_DIR=(${SORT_ARGS/--temporary-directory=/}) + SORT_TEMP_DIR=${SORT_TEMP_DIR[0]} +else + # Using the system temp directory + SORT_TEMP_DIR="$TMPDIR" +fi +debug "Sort Temp Directory: $SORT_TEMP_DIR" + +# Find out how much space is on the sort directory +SORT_DRIVE_INFO=$(df "$SORT_TEMP_DIR" | tail -n +2) +SORT_DRIVE_DISK=$(echo $SORT_DRIVE_INFO | awk '{print $1}') +SORT_DRIVE_FREE_SPACE=$(echo $SORT_DRIVE_INFO | awk '{print $4}') +SORT_DRIVE_USED=$(echo $SORT_DRIVE_INFO | awk '{print $5}') +SORT_DRIVE_FREE=${SORT_DRIVE_USED/"%"/} +SORT_DRIVE_FREE=$((100 - $SORT_DRIVE_FREE)) +debug "Sort Temp Directory ${SORT_TEMP_DIR} is on disk ${SORT_DRIVE_DISK} which has ${SORT_DRIVE_FREE}% free space (${SORT_DRIVE_FREE_SPACE} bytes)" + +if [ $SORT_DRIVE_FREE -le 10 ]; then + echo + warn "Sort Temp Directory ${SORT_TEMP_DIR} is on disk ${SORT_DRIVE_DISK} which only has ${SORT_DRIVE_FREE}% free space (${SORT_DRIVE_FREE_SPACE} bytes) available" + warn "This may result in sort failures if the data to be indexed is large" + echo +fi generate_index() { @@ -152,35 +230,52 @@ generate_index() local IDX=$3 local WORK="$LOC/$IDX-txt" - if [ ! -s "$DATA" ] - then + if [ ! -s "$DATA" ]; then + debug "Skipping Index $IDX as no relevant data to index" return fi log "Creating Index $IDX" # Sort the input data - log " Sort $IDX" + log "Sort $IDX" + debug "Sorting $DATA into work file $WORK" if [ $HAS_PV = 0 ]; then # Use pv (pipe viewer) to monitor sort progress # Note that progress data will only be seen if running in the foreground - SIZE=$(du -k "$DATA" | cut -f 1) + # To report progress need to know size of input data + SIZE=$(getSize "$DATA") + debug "Size of data to be sorted is $SIZE bytes" + pv -c -N data < "$DATA" | sort $SORT_ARGS -u $KEYS | pv -c -N sort -s $SIZE > $WORK + + # CAUTION + # If sort errors here then the piping through pv will stop us from seeing the error + # and we'll continue onwards + # Therefore we need to check that the output size is same as input size as this is + # the only way to tell if sort suceeded + OUTPUT_SIZE=$(getSize "$WORK") + debug "Size of sorted data is $OUTPUT_SIZE bytes" + if [ $SIZE != $OUTPUT_SIZE ]; then + log "Aborting due to sort error" + exit 1 + fi else # Use sort without any progress monitoring sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK fi - log " Sort $IDX Completed" + log "Sort $IDX Completed" # Build into an index - log " Build $IDX" + log "Build $IDX" rm -f "$LOC/$IDX.dat" rm -f "$LOC/$IDX.idn" java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK" - log " Build $IDX Completed" + log "Build $IDX Completed" # Remove work file unless keeping - if [ $KEEP_WORK = 1 ]; then + if [ $KEEP_WORK = 0 ]; then + debug "Cleaning up work file $WORK" rm "$WORK" fi } @@ -211,6 +306,7 @@ generate_index "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG log "Index Building Phase Completed" # ---- Clean up. -if [ $KEEP_WORK = 1 ]; then - rm -f "$DATA_TRIPLES" "$DATA_QUADS" +if [ $KEEP_WORK = 0 ]; then + debug "Cleaning up data files $DATA_TRIPLES and $DATA_QUADS" + rm -f "$DATA_TRIPLES" "$DATA_QUADS" fi From 3c59213e273711836628d9d030df23dac142ee1b Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Mon, 29 Jun 2015 13:12:03 +0100 Subject: [PATCH 05/16] Fix script usage in dev environment (JENA-977) This commit enhances the distribution module to make it much easier to use in dev environments. The dependency plugin is used with the copy-dependencies goal to produce the lib/ directory during a package phase and then clean plugin is configured to clean the lib/ directory during a clean. This means that developers can now set JENA_HOME to the distribution module directory in their working copy and provided they have done a mvn package all the scripts should work. This also allows the temporary hacks in the new tdbloader2 scripts to be removed so these scripts now run against Jena 3 libraries and don't need the path to the new scripts to be hacked. --- apache-jena/bin/tdbloader2 | 12 +++-- apache-jena/bin/tdbloader2data | 2 - apache-jena/bin/tdbloader2index | 2 - apache-jena/pom.xml | 78 +++++++++++++++++++++++++-------- 4 files changed, 64 insertions(+), 30 deletions(-) diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index 9508031f65e..55a0faf0167 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -198,19 +198,17 @@ DATE="+%H:%M:%S" log "-- TDB Bulk Loader Start" TIME1="$(date +%s)" -TOOL_DIR=$JENA_HOME/bin/ -# DEV - Following is just for debugging -TOOL_DIR= +TOOL_DIR="$JENA_HOME/bin" case "$PHASE" in all) - exec "${TOOL_DIR}tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" - exec "${TOOL_DIR}tdbloader2index" $COMMON_ARGS --loc "$LOC" + exec "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" + exec "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC" ;; data) - exec "${TOOL_DIR}tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" + exec "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" ;; index) - exec "${TOOL_DIR}tdbloader2index" $COMMON_ARGS --loc "$LOC" + exec "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC" ;; *) echo "Unrecognized phase $PHASE" 1>&2 diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data index efb590a2be4..eaf9069ccdf 100755 --- a/apache-jena/bin/tdbloader2data +++ b/apache-jena/bin/tdbloader2data @@ -83,8 +83,6 @@ function debug() { DATE="+%H:%M:%S" PKG=org.apache.jena.tdb.store.bulkloader2 -#DEV - Allows use against Jena 2 API -PKG=com.hp.hpl.jena.tdb.store.bulkloader2 # Process Arguments LOC= diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index index 971b82426b8..f506df9c5f4 100755 --- a/apache-jena/bin/tdbloader2index +++ b/apache-jena/bin/tdbloader2index @@ -90,8 +90,6 @@ function getSize() { DATE="+%H:%M:%S" PKG=org.apache.jena.tdb.store.bulkloader2 -#DEV - Allows use against Jena 2 API -PKG=com.hp.hpl.jena.tdb.store.bulkloader2 # Process Arguments LOC= diff --git a/apache-jena/pom.xml b/apache-jena/pom.xml index b718501a035..cd91792f18c 100644 --- a/apache-jena/pom.xml +++ b/apache-jena/pom.xml @@ -16,7 +16,8 @@ limitations under the License. --> - + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-libs-for-scripts + package + + copy-dependencies + + + true + runtime + jar + lib/ + + + + + + + + maven-clean-plugin + + + + lib/ + + + From c55c1f74b4571eee2c9e333967b5671e862adff7 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Mon, 29 Jun 2015 17:21:18 +0100 Subject: [PATCH 06/16] Further refactoring of tdbloader2 scripts (JENA-977) - Move common functions into tdbloader2common script - Remove duplicated definitions from other scripts and source in the new common script - Add helper function for getting drive information - Add check in tdbloader2index script which will abort the build if there is insufficient free space to sort the data file since the sorted output will be same size in the input so if there are fewer bytes free than the size of the input we can abort early --- apache-jena/bin/tdbloader2 | 29 ++++----- apache-jena/bin/tdbloader2common | 85 ++++++++++++++++++++++++ apache-jena/bin/tdbloader2data | 53 ++++++++------- apache-jena/bin/tdbloader2index | 107 ++++++++++++++----------------- 4 files changed, 169 insertions(+), 105 deletions(-) create mode 100644 apache-jena/bin/tdbloader2common diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index 55a0faf0167..b7a1af2d19b 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -77,12 +77,10 @@ EOF } # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then +if [ -z "$JENA_HOME" ]; then + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then SCRIPT="$(readlink "$0")" # If link is relative case "$SCRIPT" in @@ -91,9 +89,10 @@ if [ -z "$JENA_HOME" ] esac fi - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" fi +source "${JENA_HOME}/bin/tdbloader2common" # ---- Setup JVM_ARGS=${JVM_ARGS:--Xmx1024M} @@ -189,13 +188,8 @@ if [ $TRACE = 1 ]; then COMMON_ARGS="$COMMON_ARGS --trace" fi -log() { echo " $(date $DATE)" "$@" ; } - -#DATE="+%Y-%m-%dT%H:%M:%S%:z" -DATE="+%H:%M:%S" - # ---- Start -log "-- TDB Bulk Loader Start" +info "-- TDB Bulk Loader Start" TIME1="$(date +%s)" TOOL_DIR="$JENA_HOME/bin" @@ -211,13 +205,12 @@ case "$PHASE" in exec "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC" ;; *) - echo "Unrecognized phase $PHASE" 1>&2 - exit 1 + abort 1 "Unrecognized phase $PHASE" ;; esac # ---- End TIME2="$(date +%s)" -log "-- TDB Bulk Loader Finish" +info "-- TDB Bulk Loader Finish" ELAPSED=$(($TIME2-$TIME1)) -log "-- $ELAPSED seconds" \ No newline at end of file +info "-- $ELAPSED seconds" \ No newline at end of file diff --git a/apache-jena/bin/tdbloader2common b/apache-jena/bin/tdbloader2common new file mode 100644 index 00000000000..beae1150952 --- /dev/null +++ b/apache-jena/bin/tdbloader2common @@ -0,0 +1,85 @@ +#!/usr/bin/env bash + +## Licensed to the Apache Software Foundation (ASF) under one +## or more contributor license agreements. See the NOTICE file +## distributed with this work for additional information +## regarding copyright ownership. The ASF licenses this file +## to you under the Apache License, Version 2.0 (the +## "License"); you may not use this file except in compliance +## with the License. You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + +function log() { + echo " $(date $DATE)" "$@" +} + +function debug() { + if [ $DEBUG = 1 ]; then + log "DEBUG" "$@" + fi +} + +function info() { + log "INFO" "$@" +} + +function warn() { + log "WARN" "$@" 1>&2 +} + +function error() { + log "ERROR" "$@" 1>&2 +} + +function abort() { + local EXIT=$1 + + # Trick to check for numeric + # -eq only returns true if the value is integer equals + if [ "$EXIT" -eq "$EXIT" ]; then + # Can use the provided exit code + shift + else + # Caller forgot to provide an exit code so use default of 1 + EXIT=1 + fi + + # Log error and exit + error "$@" + exit $EXIT +} + +function getSize() { + ls -l $1 | awk '{print $5}' +} + +function getDriveInfo() { + local DIR=$1 + + local DRIVE_INFO=$(df "$DIR" | tail -n +2) + local DISK=$(echo $DRIVE_INFO | awk '{print $1}') + local FREE_BYTES=$(echo $DRIVE_INFO | awk '{print $4}') + local USED_PERCENT=$(echo $DRIVE_INFO | awk '{print $5}') + USED_PERCENT=${USED_PERCENT/"%"/} + local FREE_PERCENT=$((100 - $USED_PERCENT)) + + local INFO=() + INFO[0]=$DISK + INFO[1]=$USED_PERCENT + INFO[2]=$FREE_PERCENT + INFO[3]=$FREE_BYTES + + echo ${INFO[@]} +} + +#DATE="+%Y-%m-%dT%H:%M:%S%:z" +DATE="+%H:%M:%S" + +PKG=org.apache.jena.tdb.store.bulkloader2 \ No newline at end of file diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data index eaf9069ccdf..6904c839d95 100755 --- a/apache-jena/bin/tdbloader2data +++ b/apache-jena/bin/tdbloader2data @@ -18,6 +18,13 @@ # The environment for this sub-script is setup by "tdbloader2" +# Pull in common functions +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME is not set" + exit 1 +fi +source "${JENA_HOME}/bin/tdbloader2common" + function printUsage() { cat << EOF tdbloader2data - TDB Bulk Loader - Data Phase @@ -71,19 +78,6 @@ EOF # Exit on error. set -e -log() { echo " $(date $DATE)" "$@" ; } - -function debug() { - if [ $DEBUG = 1 ]; then - log "DEBUG" "$@" - fi -} - -#DATE="+%Y-%m-%dT%H:%M:%S%:z" -DATE="+%H:%M:%S" - -PKG=org.apache.jena.tdb.store.bulkloader2 - # Process Arguments LOC= KEEP_WORK=0 @@ -133,15 +127,18 @@ do done # Verify arguments -if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi -if [ $# = 0 ]; then echo "No data files specified" ; exit 1 ; fi +if [ -z "$LOC" ]; then + abort 1 "No location specified" +fi +if [ $# = 0 ]; then + abort 1 "No data files specified" +fi # Look for any index and data files in the directory. # Skip a possible configuration file if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)" then - echo "Location is not empty: $LOC" - exit 1 + abort 1 "Location is not empty: $LOC" fi if [ ! -e "$LOC" ] ; then @@ -149,12 +146,13 @@ if [ ! -e "$LOC" ] ; then debug "Trying to create new database directory: $LOC" mkdir "$LOC" if [ $? != 0 ]; then - echo "Failed to create new directory: $LOC" - exit 1 + abort 1 "Failed to create new directory: $LOC" fi debug "New database directory created: $LOC" fi -if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi +if [ ! -d "$LOC" ]; then + abort 1 "Location is not a directory: $LOC" +fi # TODO Make LOC absolute @@ -165,22 +163,21 @@ JVM_ARGS=${JVM_ARGS:--Xmx1200M} debug "JVM Arguments are $JVM_ARGS" # Classpath set in "tdbloader2" -if [ -z "$JENA_CP" ] -then - echo "Classpath not provided : set JENA_CP" 1>&2 - exit 1 +if [ -z "$JENA_CP" ]; then + abort 1 "Classpath not provided : set JENA_CP" fi # ---- Data loading phase -log "Data Load Phase" -# Produce nodes file and triples/quads text file. +info "Data Load Phase" +# Produce nodes file and triples/quads text file. DATA_TRIPLES="$LOC/data-triples.tmp" DATA_QUADS="$LOC/data-quads.tmp" -debug "Data files are $DATA_TRIPLES and $DATA_QUADS" +debug "Triples text files is $DATA_TRIPLES" +debug "Quads text file is $DATA_QUADS" java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \ "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES -log "Data Load Phase Completed" +info "Data Load Phase Completed" diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index index f506df9c5f4..5de8d6a12d8 100755 --- a/apache-jena/bin/tdbloader2index +++ b/apache-jena/bin/tdbloader2index @@ -18,6 +18,13 @@ # The environment for this sub-script is setup by "tdbloader2" +# Pull in common functions +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME is not set" + exit 1 +fi +source "${JENA_HOME}/bin/tdbloader2common" + function printUsage() { cat << EOF tdbloader2index - TDB Bulk Loader - Index Phase @@ -70,27 +77,6 @@ set -e # Sort order is ASCII export LC_ALL="C" -log() { echo " $(date $DATE)" "$@" ; } - -function debug() { - if [ $DEBUG = 1 ]; then - log "DEBUG" "$@" - fi -} - -function warn() { - log "WARN" "$@" -} - -function getSize() { - ls -l $1 | awk '{print $5}' -} - -#DATE="+%Y-%m-%dT%H:%M:%S%:z" -DATE="+%H:%M:%S" - -PKG=org.apache.jena.tdb.store.bulkloader2 - # Process Arguments LOC= KEEP_WORK=0 @@ -138,22 +124,26 @@ do done # Verify arguments -if [ -z "$LOC" ] ; then echo "No location specified" ; exit 1 ; fi -if [ ! -e "$LOC" ] ; then echo "Location specified does not exist: $LOC" ; exit 1; fi -if [ ! -d "$LOC" ] ; then echo "Location is not a directory: $LOC" ; exit 1 ; fi +if [ -z "$LOC" ]; then + abort 1 "No location specified" +fi +if [ ! -e "$LOC" ]; then + abort 1 "Location specified does not exist: $LOC" +fi +if [ ! -d "$LOC" ]; then + abort 1 "Location is not a directory: $LOC" +fi # TODO Make LOC absolute DATA_TRIPLES="$LOC/data-triples.tmp" DATA_QUADS="$LOC/data-quads.tmp" -if [ ! -e "$DATA_TRIPLES" ] ; then - echo "No triples data file found in location, please run the tdbloader2data script first" - exit 1 +if [ ! -e "$DATA_TRIPLES" ]; then + abort 1 "No triples text file found in location, please run the tdbloader2data script first" fi if [ ! -e "$DATA_QUADS" ]; then - echo "No quads data file found in location, please run the tdbloader2data script first" - exit 1 + abort 1 "No quads text file found in location, please run the tdbloader2data script first" fi debug "Data files are $DATA_TRIPLES and $DATA_QUADS" @@ -164,16 +154,14 @@ JVM_ARGS=${JVM_ARGS:--Xmx1200M} debug "JVM Arguments are $JVM_ARGS" # Classpath set in "tdbloader2" -if [ -z "$JENA_CP" ] -then - echo "Classpath not provided : set JENA_CP" 1>&2 - exit 1 +if [ -z "$JENA_CP" ]; then + abort 1 "Classpath not provided : set JENA_CP" fi debug "Jena Classpath is $JENA_CP" # ---- Index intermediates ## All files are writtern S P O / G S P O columns per row but in different sort orders. -log "Index Building Phase" +info "Index Building Phase" # Check whether Pipe Viewer is available # Needs to temporarily disable exit on error as which produces an error @@ -204,21 +192,14 @@ else SORT_TEMP_DIR="$TMPDIR" fi debug "Sort Temp Directory: $SORT_TEMP_DIR" +SORT_DRIVE_INFO=($(getDriveInfo "${SORT_TEMP_DIR}")) +debug "Sort Temp Directory ${DIR} is on disk ${SORT_DRIVE_INFO[0]} which has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes)" -# Find out how much space is on the sort directory -SORT_DRIVE_INFO=$(df "$SORT_TEMP_DIR" | tail -n +2) -SORT_DRIVE_DISK=$(echo $SORT_DRIVE_INFO | awk '{print $1}') -SORT_DRIVE_FREE_SPACE=$(echo $SORT_DRIVE_INFO | awk '{print $4}') -SORT_DRIVE_USED=$(echo $SORT_DRIVE_INFO | awk '{print $5}') -SORT_DRIVE_FREE=${SORT_DRIVE_USED/"%"/} -SORT_DRIVE_FREE=$((100 - $SORT_DRIVE_FREE)) -debug "Sort Temp Directory ${SORT_TEMP_DIR} is on disk ${SORT_DRIVE_DISK} which has ${SORT_DRIVE_FREE}% free space (${SORT_DRIVE_FREE_SPACE} bytes)" - -if [ $SORT_DRIVE_FREE -le 10 ]; then - echo - warn "Sort Temp Directory ${SORT_TEMP_DIR} is on disk ${SORT_DRIVE_DISK} which only has ${SORT_DRIVE_FREE}% free space (${SORT_DRIVE_FREE_SPACE} bytes) available" +if [ "${SORT_DRIVE_INFO[2]}" -le 10 ]; then + warn "-----" + warn "Sort Temp Directory ${SORT_TEMP_DIR} is on disk ${SORT_DRIVE_INFO[0]} which only has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes) available" warn "This may result in sort failures if the data to be indexed is large" - echo + warn "-----" fi generate_index() @@ -233,17 +214,26 @@ generate_index() return fi - log "Creating Index $IDX" + info "Creating Index $IDX" + + # For various purposes we need to know the size of the input data + local SIZE=$(getSize "$DATA") + debug "Size of data to be sorted is $SIZE bytes" + + # Verify that we have enough space to sort the data + local WORK_DRIVE_INFO=($(getDriveInfo "${WORK}")) + if [ "${SIZE}" -ge "${WORK_DRIVE_INFO[3]}" ]; then + abort 1 "Insufficient free space on database drive ${WORK_DRIVE_INFO[0]}, there are ${WORK_DRIVE_INFO[3]} bytes free but ${SIZE} bytes are required" + else + debug "Sufficient free space on database drive ${WORK_DRIVE_INFO[0]} to attempt sorting data file ${DATA} (${SIZE} bytes required from ${WORK_DRIVE_INFO[3]} bytes free)" + fi # Sort the input data - log "Sort $IDX" + info "Sort $IDX" debug "Sorting $DATA into work file $WORK" if [ $HAS_PV = 0 ]; then # Use pv (pipe viewer) to monitor sort progress # Note that progress data will only be seen if running in the foreground - # To report progress need to know size of input data - SIZE=$(getSize "$DATA") - debug "Size of data to be sorted is $SIZE bytes" pv -c -N data < "$DATA" | sort $SORT_ARGS -u $KEYS | pv -c -N sort -s $SIZE > $WORK @@ -252,24 +242,23 @@ generate_index() # and we'll continue onwards # Therefore we need to check that the output size is same as input size as this is # the only way to tell if sort suceeded - OUTPUT_SIZE=$(getSize "$WORK") + local OUTPUT_SIZE=$(getSize "$WORK") debug "Size of sorted data is $OUTPUT_SIZE bytes" if [ $SIZE != $OUTPUT_SIZE ]; then - log "Aborting due to sort error" - exit 1 + abort 1 "Aborting due to sort error, see preceding output for error from sort" fi else # Use sort without any progress monitoring sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK fi - log "Sort $IDX Completed" + info "Sort $IDX Completed" # Build into an index - log "Build $IDX" + info "Build $IDX" rm -f "$LOC/$IDX.dat" rm -f "$LOC/$IDX.idn" java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK" - log "Build $IDX Completed" + info "Build $IDX Completed" # Remove work file unless keeping if [ $KEEP_WORK = 0 ]; then @@ -301,7 +290,7 @@ generate_index "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG generate_index "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG -log "Index Building Phase Completed" +info "Index Building Phase Completed" # ---- Clean up. if [ $KEEP_WORK = 0 ]; then From a7ac2797856bf60476204b8997b5a5bf4cfa15c5 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Tue, 30 Jun 2015 13:44:29 +0100 Subject: [PATCH 07/16] Further improvements to tdbloader2 scripts (JENA-977) - Auto-detection of JENA_HOME now exports it so it is visible to the child scripts - Force making database directory path absolute and resolving any symbolic links in the path - Additional checks in tdbloader2index to warn if sort is going to be external and it may run out of temporary disk space for the sort --- apache-jena/bin/tdbloader2 | 5 +- apache-jena/bin/tdbloader2common | 106 +++++++++++++++++++++++++++++++ apache-jena/bin/tdbloader2data | 9 ++- apache-jena/bin/tdbloader2index | 39 ++++++++++-- 4 files changed, 152 insertions(+), 7 deletions(-) diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index b7a1af2d19b..310ee663d8a 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -78,10 +78,11 @@ EOF # If JENA_HOME is empty if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" SCRIPT="$0" # Catch common issue: script has been symlinked if [ -L "$SCRIPT" ]; then - SCRIPT="$(readlink "$0")" + SCRIPT="$(readlink -f "$0")" # If link is relative case "$SCRIPT" in /*) ;; # fine @@ -91,6 +92,8 @@ if [ -z "$JENA_HOME" ]; then # Work out root from script location JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" fi source "${JENA_HOME}/bin/tdbloader2common" diff --git a/apache-jena/bin/tdbloader2common b/apache-jena/bin/tdbloader2common index beae1150952..2830545e087 100644 --- a/apache-jena/bin/tdbloader2common +++ b/apache-jena/bin/tdbloader2common @@ -79,6 +79,112 @@ function getDriveInfo() { echo ${INFO[@]} } +function getFreeMem() { + set +e + local FREE_MEM=-1 + case "$OSTYPE" in + darwin*) + # Have to get this from top + FREE_MEM=$(top -l 1 | grep PhysMem | awk '{print $6}') + FREE_MEM=${FREE_MEM%M} + FREE_MEM=$(($FREE_MEM * 1024 * 1024)) + ;; + *) + # Try to use free if available + which free >/dev/null 2>&1 + if [ $? -eq 0 ]; then + # Have free available + FREE_MEM=$(free -b) + fi + ;; + esac + set -e + + echo "$FREE_MEM" +} + +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|*BSB*|*BSD|BSD*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + +function resolveLinks() { + local NAME=$1 + + if [ -L "$NAME" ]; then + NAME=$(resolveLink "$NAME") + elif [[ "$NAME" == *"/" ]]; then + # If the path ends in a / test -L will report false even + # if the path is actually a symbolic link + # So check if the name without the trailing / is a link and if + # so resolve it + if [ -L "${NAME%/}" ]; then + NAME=${NAME%/} + NAME=$(resolveLink "$NAME") + fi + fi + echo "$NAME" +} + +function makeAbsolute() { + local NAME=$1 + + # Follow links + NAME=$(resolveLinks "$NAME") + + # Put back trailing slash + # Do this before we make the path absolute or we'll absolutize wrong + if [ -d "$NAME" ]; then + if [[ "$NAME" != *"/" ]]; then + NAME="${NAME}/" + fi + fi + + if [[ "$NAME" != "/"* ]]; then + # Now make absolute + case "$OSTYPE" in + darwin*|*BSB*|*BSD|BSD*) + # BSD style readlink does not support the -f for canonicalization + # so have to do this via cd, pwd and basename + local FILENAME=$(basename "$NAME") + NAME=$(cd $(dirname "$NAME"); pwd) + NAME="$NAME/$FILENAME" + ;; + *) + # Otherwise assume standard GNU readlink + NAME=$(readlink -f "$NAME") + ;; + esac + + # Put back trailing slash + if [ -d "$NAME" ]; then + if [[ "$NAME" != *"/" ]]; then + NAME="${NAME}/" + fi + fi + fi + + echo "$NAME" +} + #DATE="+%Y-%m-%dT%H:%M:%S%:z" DATE="+%H:%M:%S" diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data index 6904c839d95..2c48a501e5f 100755 --- a/apache-jena/bin/tdbloader2data +++ b/apache-jena/bin/tdbloader2data @@ -134,6 +134,13 @@ if [ $# = 0 ]; then abort 1 "No data files specified" fi +# Make LOC absolute +ABS_LOC=$(makeAbsolute "$LOC") +if [ "$ABS_LOC" != "$LOC" ]; then + LOC="$ABS_LOC" + debug "Absolute database location is $LOC" +fi + # Look for any index and data files in the directory. # Skip a possible configuration file if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)" @@ -154,8 +161,6 @@ if [ ! -d "$LOC" ]; then abort 1 "Location is not a directory: $LOC" fi -# TODO Make LOC absolute - FILES="$@" ## JVM Arguments diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index index 5de8d6a12d8..15a5832d541 100755 --- a/apache-jena/bin/tdbloader2index +++ b/apache-jena/bin/tdbloader2index @@ -127,6 +127,15 @@ done if [ -z "$LOC" ]; then abort 1 "No location specified" fi + +# Make LOC absolute +ABS_LOC=$(makeAbsolute "$LOC") +if [ "$ABS_LOC" != "$LOC" ]; then + LOC="$ABS_LOC" + debug "Absolute database location is $LOC" +fi + +# Check location if [ ! -e "$LOC" ]; then abort 1 "Location specified does not exist: $LOC" fi @@ -134,8 +143,6 @@ if [ ! -d "$LOC" ]; then abort 1 "Location is not a directory: $LOC" fi -# TODO Make LOC absolute - DATA_TRIPLES="$LOC/data-triples.tmp" DATA_QUADS="$LOC/data-quads.tmp" @@ -146,7 +153,7 @@ if [ ! -e "$DATA_QUADS" ]; then abort 1 "No quads text file found in location, please run the tdbloader2data script first" fi -debug "Data files are $DATA_TRIPLES and $DATA_QUADS" +debug "Data text files are $DATA_TRIPLES and $DATA_QUADS" ##--parallel is not always available. SORT_ARGS="${SORT_ARGS:---buffer-size=50%}" @@ -160,7 +167,7 @@ fi debug "Jena Classpath is $JENA_CP" # ---- Index intermediates -## All files are writtern S P O / G S P O columns per row but in different sort orders. +## All files are written S P O / G S P O columns per row but in different sort orders. info "Index Building Phase" # Check whether Pipe Viewer is available @@ -191,6 +198,7 @@ else # Using the system temp directory SORT_TEMP_DIR="$TMPDIR" fi +SORT_TEMP_DIR=$(makeAbsolute "$SORT_TEMP_DIR") debug "Sort Temp Directory: $SORT_TEMP_DIR" SORT_DRIVE_INFO=($(getDriveInfo "${SORT_TEMP_DIR}")) debug "Sort Temp Directory ${DIR} is on disk ${SORT_DRIVE_INFO[0]} which has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes)" @@ -221,13 +229,36 @@ generate_index() debug "Size of data to be sorted is $SIZE bytes" # Verify that we have enough space to sort the data + + # Firstly check that the output disk has sufficient space local WORK_DRIVE_INFO=($(getDriveInfo "${WORK}")) if [ "${SIZE}" -ge "${WORK_DRIVE_INFO[3]}" ]; then + # If there is insufficient disk space then we can abort now abort 1 "Insufficient free space on database drive ${WORK_DRIVE_INFO[0]}, there are ${WORK_DRIVE_INFO[3]} bytes free but ${SIZE} bytes are required" else debug "Sufficient free space on database drive ${WORK_DRIVE_INFO[0]} to attempt sorting data file ${DATA} (${SIZE} bytes required from ${WORK_DRIVE_INFO[3]} bytes free)" fi + # Secondly check if there is enough space to sort in-memory or if sort may need to do an external sort + # We only issue warnings when the sort is likely to be external because there are various factors + # such as virtual memory and OS file caching that may complicate this + FREE_MEM=$(getFreeMem) + if [ "$FREE_MEM" -ge 0 ]; then + if [ "$SIZE" -ge "$FREE_MEM" ]; then + warn "Insufficient free memory to sort data in-memory, sort will need to perform an external sort using Temp Directory ${SORT_TEMP_DIR}" + + # Check for disk space on temporary disk + SORT_DRIVE_INFO=($(getDriveInfo "${SORT_TEMP_DIR}")) + if [ "$SIZE" -ge "${SORT_DRIVE_INFO[3]}" ]; then + warn "There may be insufficient for sort to perform an external sort using Tempo Directory ${SORT_TEMP_DIR} (${SIZE} bytes required but only ${SORT_DRIVE_INFO[3]} bytes free)" + fi + else + debug "Should be sufficient free memory ($FREE_MEM bytes) for sort to be fully in-memory" + fi + else + warn "Unable to determine free memory on your OS, can't check whether sort will be in-memory or external sort using Temp Directory ${SORT_TEMP_DIR}" + fi + # Sort the input data info "Sort $IDX" debug "Sorting $DATA into work file $WORK" From cc4a80ac3c44d738a8904ac91b1ece71b446d74a Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Tue, 30 Jun 2015 14:25:46 +0100 Subject: [PATCH 08/16] Check for return codes from children in tdbloader2 (JENA-977) Ensures that the main script checks for the return code of the child scripts and aborts if they fail --- apache-jena/bin/tdbloader2 | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index 310ee663d8a..d0d906cd810 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -198,14 +198,38 @@ TIME1="$(date +%s)" TOOL_DIR="$JENA_HOME/bin" case "$PHASE" in all) - exec "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" - exec "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC" + # All Phases + # Data Phase + "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" + RET=$? + if [ $RET -ne 0 ]; then + abort $RET "Failed during data phase" + fi + + # Index Phase + "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC" + RET=$? + if [ $RET -ne 0 ]; then + abort $RET "Failed during data phase" + fi ;; + data) - exec "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" + # Data Phase + "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" + RET=$? + if [ $RET -ne 0 ]; then + abort $RET "Failed during data phase" + fi ;; + index) - exec "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC" + # Index Phase + "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC" + RET=$? + if [ $RET -ne 0 ]; then + abort $RET "Failed during index phase" + fi ;; *) abort 1 "Unrecognized phase $PHASE" From d4a0bc50a6d82ab5bbb43ab90e65216e5b165621 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Tue, 30 Jun 2015 15:04:50 +0100 Subject: [PATCH 09/16] Finish up first pass of work on tdbloader2 script refactoring (JENA-977) - Add options for setting the JVM and sort arguments that do not rely on environment variables. NB - For backwards compatibility the existing environment variables are still honoured if the new command line options are not used - Improve some error messages - Explicitly support -- for separating data files from options for cases where file names may be confused --- apache-jena/bin/tdbloader2 | 76 ++++++++++++++++++++++++++------ apache-jena/bin/tdbloader2common | 4 ++ apache-jena/bin/tdbloader2data | 40 ++++++++++++++--- apache-jena/bin/tdbloader2index | 72 +++++++++++++++++++++++++----- 4 files changed, 162 insertions(+), 30 deletions(-) diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index d0d906cd810..e598aeb8723 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -53,6 +53,20 @@ Advanced additional options are as follows: --debug Enable debug mode, adds extra debug output + -j + --jvm-args + Sets the arguments that should be passed to the JVM for the + JVM based portions of the build. + + Generally it is best to not change these unless you have been + specifically advised to. The scripts will use appropriate + defaults if this is not specified. + + In particular be careful increasing the heap size since many + parts of TDB actually use memory mapped files that live + outside the heap so if the heap is too large the heap may + conflict with the memory mapped files for memory space. + -k --keep-work Keeps the temporary work files around after they are no longer @@ -69,6 +83,14 @@ Advanced additional options are as follows: When no phase is specified it defaults to all + -s + --sort-args + Sets the arguments that should be passed to sort for the sort + based portions of the build. + + Generally it is best not to change these as the scripts will + use appropriate defaults for your system. + -t --trace Enable trace mode, essentially sets -x within the scripts @@ -111,15 +133,7 @@ case "$(uname)" in esac export JENA_CP -# echo JENA_CP -if [ -z "$SORT_ARGS" ]; then - SORT_ARGS="--buffer-size=50%" - sort --parallel=3 < /dev/null 2>/dev/null - if [ $? = 0 ]; then - SORT_ARGS="$SORT_ARGS --parallel=3" - fi -fi -export SORT_ARGS + # Process arguments LOC= @@ -127,6 +141,8 @@ PHASE= KEEP_WORK=0 DEBUG=0 TRACE=0 +JVM_ARGS= +SORT_ARGS= while [ $# -gt 0 ] do @@ -142,6 +158,12 @@ do printUsage exit 0 ;; + -j|--jvm-args) + # JVM Arguments + shift + JVM_ARGS="$1" + shift + ;; -k|--keep-work) # Keep work files shift @@ -164,14 +186,30 @@ do PHASE="$1" shift ;; + -s|--sort-args) + # Sort arguments + shift + SORT_ARGS=$1 + shift + ;; -t|--trace) # Trace mode shift TRACE=1 set -x ;; + --) + # Arguments separator + # All further arguments are treated as data files + shift + break + ;; + -*) + # Looks like an option but not known + abort 1 "Unrecognized option $ARG, if this was meant to be a data file separate options from data files with --" + ;; *) - # Once we see an unrecognized argument treat as start of files to process + # Once we see an unrecognized argument that doesn't look like an option treat as start of files to process break ;; esac @@ -180,7 +218,11 @@ done if [ -z "$PHASE" ]; then PHASE="all" fi + +# Prepare arguments to pass to children COMMON_ARGS= +DATA_ARGS= +INDEX_ARGS= if [ $KEEP_WORK = 1 ]; then COMMON_ARGS="--keep-work" fi @@ -190,6 +232,12 @@ fi if [ $TRACE = 1 ]; then COMMON_ARGS="$COMMON_ARGS --trace" fi +if [ -n "$JVM_ARGS" ]; then + COMMON_ARGS="$COMMON_ARGS --jvm-args $JVM_ARGS" +fi +if [ -n "$SORT_ARGS" ]; then + INDEX_ARGS="--sort-args $SORT_ARGS" +fi # ---- Start info "-- TDB Bulk Loader Start" @@ -200,14 +248,14 @@ case "$PHASE" in all) # All Phases # Data Phase - "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" + "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@" RET=$? if [ $RET -ne 0 ]; then abort $RET "Failed during data phase" fi # Index Phase - "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC" + "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC" RET=$? if [ $RET -ne 0 ]; then abort $RET "Failed during data phase" @@ -216,7 +264,7 @@ case "$PHASE" in data) # Data Phase - "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS --loc "$LOC" "$@" + "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@" RET=$? if [ $RET -ne 0 ]; then abort $RET "Failed during data phase" @@ -225,7 +273,7 @@ case "$PHASE" in index) # Index Phase - "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS --loc "$LOC" + "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC" RET=$? if [ $RET -ne 0 ]; then abort $RET "Failed during index phase" diff --git a/apache-jena/bin/tdbloader2common b/apache-jena/bin/tdbloader2common index 2830545e087..2c116ad374c 100644 --- a/apache-jena/bin/tdbloader2common +++ b/apache-jena/bin/tdbloader2common @@ -80,7 +80,10 @@ function getDriveInfo() { } function getFreeMem() { + # May be called from a script where exit on error is set + # in which case disable for the life of this function set +e + local FREE_MEM=-1 case "$OSTYPE" in darwin*) @@ -98,6 +101,7 @@ function getFreeMem() { fi ;; esac + set -e echo "$FREE_MEM" diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data index 2c48a501e5f..f942e207c48 100755 --- a/apache-jena/bin/tdbloader2data +++ b/apache-jena/bin/tdbloader2data @@ -63,6 +63,20 @@ Advanced additional options are as follows: --debug Enable debug mode, adds extra debug output + -j + --jvm-args + Sets the arguments that should be passed to the JVM for the + JVM based portions of the build. + + Generally it is best to not change these unless you have been + specifically advised to. The scripts will use appropriate + defaults if this is not specified. + + In particular be careful increasing the heap size since many + parts of TDB actually use memory mapped files that live + outside the heap so if the heap is too large the heap may + conflict with the memory mapped files for memory space. + -k --keep-work Keeps the temporary work files around after they are no longer @@ -96,6 +110,12 @@ do printUsage exit 0 ;; + -j|--jvm-args) + # JVM Arguments + shift + JVM_ARGS="$1" + shift + ;; -k|--keep-work) # Keep work files # This option is actually not used by this script but may be passed in @@ -119,6 +139,16 @@ do shift set -x ;; + --) + # Arguments separator + # All further arguments are treated as data files + shift + break + ;; + -*) + # Unrecognized + abort 1 "Unrecognized option $ARG, if this was meant to be a data file separate options from data files with --" + ;; *) # Any further arguments are treated as data files break @@ -128,10 +158,10 @@ done # Verify arguments if [ -z "$LOC" ]; then - abort 1 "No location specified" + abort 1 "Required database location not specified" fi if [ $# = 0 ]; then - abort 1 "No data files specified" + abort 1 "No data files specified, one/more data files must be specified" fi # Make LOC absolute @@ -145,7 +175,7 @@ fi # Skip a possible configuration file if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)" then - abort 1 "Location is not empty: $LOC" + abort 1 "Database location is not empty: $LOC" fi if [ ! -e "$LOC" ] ; then @@ -158,12 +188,12 @@ if [ ! -e "$LOC" ] ; then debug "New database directory created: $LOC" fi if [ ! -d "$LOC" ]; then - abort 1 "Location is not a directory: $LOC" + abort 1 "Database location is not a directory: $LOC" fi FILES="$@" -## JVM Arguments +## Prepare JVM Arguments JVM_ARGS=${JVM_ARGS:--Xmx1200M} debug "JVM Arguments are $JVM_ARGS" diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index index 15a5832d541..b997b39f738 100755 --- a/apache-jena/bin/tdbloader2index +++ b/apache-jena/bin/tdbloader2index @@ -60,11 +60,33 @@ Advanced additional options are as follows: --debug Enable debug mode, adds extra debug output + -j + --jvm-args + Sets the arguments that should be passed to the JVM for the + JVM based portions of the build. + + Generally it is best to not change these unless you have been + specifically advised to. The scripts will use appropriate + defaults if this is not specified. + + In particular be careful increasing the heap size since many + parts of TDB actually use memory mapped files that live + outside the heap so if the heap is too large the heap may + conflict with the memory mapped files for memory space. + -k --keep-work Keeps the temporary work files around after they are no longer needed. May be useful for debugging. + -s + --sort-args + Sets the arguments that should be passed to sort for the sort + based portions of the build. + + Generally it is best not to change these as the scripts will + use appropriate defaults for your system. + -t --trace Enable trace mode, essentially sets -x within the scripts @@ -81,6 +103,8 @@ export LC_ALL="C" LOC= KEEP_WORK=0 DEBUG=0 +JVM_ARGS= +SORT_ARGS= while [ $# -gt 0 ] do @@ -95,6 +119,12 @@ do printUsage exit 0 ;; + -j|--jvm-args) + # JVM Arguments + shift + JVM_ARGS="$1" + shift + ;; -k|--keep-work) # Keep work files shift @@ -111,21 +141,27 @@ do LOC=${ARG/-*loc=/} shift ;; + -s|--sort-args) + # Sort arguments + shift + SORT_ARGS=$1 + shift + ;; -t|--trace) # Trace mode shift set -x ;; *) - # Any further arguments are ignored - break + # Additional options are not supported + abort 1 "Unrecognized option $ARG" ;; esac done # Verify arguments if [ -z "$LOC" ]; then - abort 1 "No location specified" + abort 1 "Required database location not specified" fi # Make LOC absolute @@ -137,26 +173,40 @@ fi # Check location if [ ! -e "$LOC" ]; then - abort 1 "Location specified does not exist: $LOC" + abort 1 "Database location specified does not exist: $LOC" fi if [ ! -d "$LOC" ]; then - abort 1 "Location is not a directory: $LOC" + abort 1 "Database location is not a directory: $LOC" fi +# Locate and check data text files DATA_TRIPLES="$LOC/data-triples.tmp" DATA_QUADS="$LOC/data-quads.tmp" if [ ! -e "$DATA_TRIPLES" ]; then - abort 1 "No triples text file found in location, please run the tdbloader2data script first" + abort 1 "No triples text file found in database location, please run the tdbloader2data script first" fi if [ ! -e "$DATA_QUADS" ]; then - abort 1 "No quads text file found in location, please run the tdbloader2data script first" + abort 1 "No quads text file found in database location, please run the tdbloader2data script first" fi debug "Data text files are $DATA_TRIPLES and $DATA_QUADS" -##--parallel is not always available. -SORT_ARGS="${SORT_ARGS:---buffer-size=50%}" +# Prepare sort arguments +if [ -z "$SORT_ARGS" ]; then + SORT_ARGS="--buffer-size=50%" + + ##--parallel is not always available. + # Temporarily disable exit on error while we check for --parallel support + set +e + sort --parallel=3 < /dev/null 2>/dev/null + if [ $? = 0 ]; then + SORT_ARGS="$SORT_ARGS --parallel=3" + fi + set -e +fi + +# Prepare JVM arguments JVM_ARGS=${JVM_ARGS:--Xmx1200M} debug "JVM Arguments are $JVM_ARGS" @@ -201,7 +251,7 @@ fi SORT_TEMP_DIR=$(makeAbsolute "$SORT_TEMP_DIR") debug "Sort Temp Directory: $SORT_TEMP_DIR" SORT_DRIVE_INFO=($(getDriveInfo "${SORT_TEMP_DIR}")) -debug "Sort Temp Directory ${DIR} is on disk ${SORT_DRIVE_INFO[0]} which has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes)" +debug "Sort Temp Directory is on disk ${SORT_DRIVE_INFO[0]} which has ${SORT_DRIVE_INFO[2]}% free space (${SORT_DRIVE_INFO[3]} bytes)" if [ "${SORT_DRIVE_INFO[2]}" -le 10 ]; then warn "-----" @@ -288,7 +338,7 @@ generate_index() info "Build $IDX" rm -f "$LOC/$IDX.dat" rm -f "$LOC/$IDX.idn" - java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK" + java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK" info "Build $IDX Completed" # Remove work file unless keeping From f64dbdcb6ac77cfb6654916e43797fdca3d4fb5c Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Tue, 30 Jun 2015 15:33:09 +0100 Subject: [PATCH 10/16] Ensure data file paths are absolute (JENA-977) This commit improves the tdbloader2 script to ensure that data file paths are made absolute and any symbolic links are resolved. --- apache-jena/bin/tdbloader2data | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data index f942e207c48..ab0fe873995 100755 --- a/apache-jena/bin/tdbloader2data +++ b/apache-jena/bin/tdbloader2data @@ -191,8 +191,6 @@ if [ ! -d "$LOC" ]; then abort 1 "Database location is not a directory: $LOC" fi -FILES="$@" - ## Prepare JVM Arguments JVM_ARGS=${JVM_ARGS:--Xmx1200M} debug "JVM Arguments are $JVM_ARGS" @@ -205,6 +203,32 @@ fi # ---- Data loading phase info "Data Load Phase" +# Prepare Files +FILES=() +F=0 +while [ $# -gt 0 ]; do + FILE=$1 + shift + + ABS_FILE=$(makeAbsolute "$FILE") + if [ "$FILE" != "$ABS_FILE" ]; then + # Relative path was resolved + FILES[$F]="$ABS_FILE" + debug "Relative data file $FILE was resolved to absolute data file $ABS_FILE" + else + # Already absolute + FILES[$F]="$FILE" + fi + + F=$(($F + 1)) +done +info "Got ${#FILES[@]} data files to load" +F=1 +for file in ${FILES[@]}; do + info "Data file $F: $file" + F=$(($F + 1)) +done + # Produce nodes file and triples/quads text file. DATA_TRIPLES="$LOC/data-triples.tmp" DATA_QUADS="$LOC/data-quads.tmp" @@ -213,6 +237,6 @@ debug "Triples text files is $DATA_TRIPLES" debug "Quads text file is $DATA_QUADS" java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \ - "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES + "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" "${FILES[@]}" info "Data Load Phase Completed" From d9ff26ec96b6cbf15d6649704dbcfe7f1d8d09eb Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Tue, 30 Jun 2015 15:59:33 +0100 Subject: [PATCH 11/16] Fix bug where JENA_HOME is a symbolic link (JENA-977) This commit fixes a bug that can occur when JENA_HOME is a symbolic link, the scripts need to resolve the link as otherwise they cannot source the common function scripts successfully. Scripts now also bail out if they can't find the common functions script to source. --- apache-jena/bin/tdbloader2 | 59 ++++++++++++++++++++++++++++++--- apache-jena/bin/tdbloader2data | 48 ++++++++++++++++++++++++++- apache-jena/bin/tdbloader2index | 48 ++++++++++++++++++++++++++- 3 files changed, 149 insertions(+), 6 deletions(-) diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index e598aeb8723..d8b375c976b 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -98,17 +98,45 @@ Advanced additional options are as follows: EOF } +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|*BSB*|*BSD|BSD*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty if [ -z "$JENA_HOME" ]; then echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" SCRIPT="$0" # Catch common issue: script has been symlinked if [ -L "$SCRIPT" ]; then - SCRIPT="$(readlink -f "$0")" + SCRIPT=$(resolveLink "$0") # If link is relative case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; esac fi @@ -117,7 +145,30 @@ if [ -z "$JENA_HOME" ]; then export JENA_HOME echo "Located JENA_HOME at ${JENA_HOME}" fi -source "${JENA_HOME}/bin/tdbloader2common" +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" +fi + +if [ -e "${JENA_HOME}/bin/tdbloader2common" ]; then + # Can source common functions + source "${JENA_HOME}/bin/tdbloader2common" +else + echo "Unable to locate common functions script tdbloader2common" + exit 1 +fi # ---- Setup JVM_ARGS=${JVM_ARGS:--Xmx1024M} diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data index ab0fe873995..2f8ffa70de8 100755 --- a/apache-jena/bin/tdbloader2data +++ b/apache-jena/bin/tdbloader2data @@ -18,12 +18,58 @@ # The environment for this sub-script is setup by "tdbloader2" +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|*BSB*|*BSD|BSD*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # Pull in common functions if [ -z "$JENA_HOME" ]; then echo "JENA_HOME is not set" exit 1 fi -source "${JENA_HOME}/bin/tdbloader2common" +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" +fi + +if [ -e "${JENA_HOME}/bin/tdbloader2common" ]; then + # Can source common functions + source "${JENA_HOME}/bin/tdbloader2common" +else + echo "Unable to locate common functions script tdbloader2common" + exit 1 +fi function printUsage() { cat << EOF diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index index b997b39f738..78f82b539cb 100755 --- a/apache-jena/bin/tdbloader2index +++ b/apache-jena/bin/tdbloader2index @@ -18,12 +18,58 @@ # The environment for this sub-script is setup by "tdbloader2" +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|*BSB*|*BSD|BSD*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # Pull in common functions if [ -z "$JENA_HOME" ]; then echo "JENA_HOME is not set" exit 1 fi -source "${JENA_HOME}/bin/tdbloader2common" +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" +fi + +if [ -e "${JENA_HOME}/bin/tdbloader2common" ]; then + # Can source common functions + source "${JENA_HOME}/bin/tdbloader2common" +else + echo "Unable to locate common functions script tdbloader2common" + exit 1 +fi function printUsage() { cat << EOF From c25ad5d800779ca829a7bde581f98d62c417719b Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Tue, 30 Jun 2015 16:04:42 +0100 Subject: [PATCH 12/16] Minor clean up of OS type testing (JENA-977) --- apache-jena/bin/tdbloader2 | 2 +- apache-jena/bin/tdbloader2common | 4 ++-- apache-jena/bin/tdbloader2data | 2 +- apache-jena/bin/tdbloader2index | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index d8b375c976b..12168fa03d4 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -103,7 +103,7 @@ function resolveLink() { if [ -L "$NAME" ]; then case "$OSTYPE" in - darwin*|*BSB*|*BSD|BSD*) + darwin*|bsd*) # BSD style readlink behaves differently to GNU readlink # Have to manually follow links while [ -L "$NAME" ]; do diff --git a/apache-jena/bin/tdbloader2common b/apache-jena/bin/tdbloader2common index 2c116ad374c..2c73f7fee85 100644 --- a/apache-jena/bin/tdbloader2common +++ b/apache-jena/bin/tdbloader2common @@ -112,7 +112,7 @@ function resolveLink() { if [ -L "$NAME" ]; then case "$OSTYPE" in - darwin*|*BSB*|*BSD|BSD*) + darwin*|bsd*) # BSD style readlink behaves differently to GNU readlink # Have to manually follow links while [ -L "$NAME" ]; do @@ -165,7 +165,7 @@ function makeAbsolute() { if [[ "$NAME" != "/"* ]]; then # Now make absolute case "$OSTYPE" in - darwin*|*BSB*|*BSD|BSD*) + darwin*|bsd*) # BSD style readlink does not support the -f for canonicalization # so have to do this via cd, pwd and basename local FILENAME=$(basename "$NAME") diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data index 2f8ffa70de8..d0ca066f7f8 100755 --- a/apache-jena/bin/tdbloader2data +++ b/apache-jena/bin/tdbloader2data @@ -23,7 +23,7 @@ function resolveLink() { if [ -L "$NAME" ]; then case "$OSTYPE" in - darwin*|*BSB*|*BSD|BSD*) + darwin*|bsd*) # BSD style readlink behaves differently to GNU readlink # Have to manually follow links while [ -L "$NAME" ]; do diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index index 78f82b539cb..4d50f93fa40 100755 --- a/apache-jena/bin/tdbloader2index +++ b/apache-jena/bin/tdbloader2index @@ -23,7 +23,7 @@ function resolveLink() { if [ -L "$NAME" ]; then case "$OSTYPE" in - darwin*|*BSB*|*BSD|BSD*) + darwin*|bsd*) # BSD style readlink behaves differently to GNU readlink # Have to manually follow links while [ -L "$NAME" ]; do From 12dc2cc66640e432a4e2f5b45ebf2fb16c995440 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Tue, 30 Jun 2015 16:08:52 +0100 Subject: [PATCH 13/16] Final pieces of tdbloader2 script clean up (JENA-977) - Fix white space inconsistencies in tdbloader2 scripts - Removed defunct tdbloader2worker script - Removed defunct and broken scripts from jena-tdb/bin/ --- apache-jena/bin/tdbloader2 | 16 ++-- apache-jena/bin/tdbloader2data | 2 +- apache-jena/bin/tdbloader2index | 10 +- apache-jena/bin/tdbloader2worker | 154 ------------------------------- jena-tdb/bin/tdbloader2 | 47 ---------- jena-tdb/bin/tdbloader2worker | 154 ------------------------------- 6 files changed, 14 insertions(+), 369 deletions(-) delete mode 100755 apache-jena/bin/tdbloader2worker delete mode 100755 jena-tdb/bin/tdbloader2 delete mode 100755 jena-tdb/bin/tdbloader2worker diff --git a/apache-jena/bin/tdbloader2 b/apache-jena/bin/tdbloader2 index 12168fa03d4..52950bfdfbb 100755 --- a/apache-jena/bin/tdbloader2 +++ b/apache-jena/bin/tdbloader2 @@ -122,14 +122,14 @@ function resolveLink() { } # If JENA_HOME is empty -if [ -z "$JENA_HOME" ]; then +if [ -z "$JENA_HOME" ]; then echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" SCRIPT="$0" # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ]; then - SCRIPT=$(resolveLink "$0") - # If link is relative - case "$SCRIPT" in + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in /*) # Already absolute ;; @@ -137,8 +137,8 @@ if [ -z "$JENA_HOME" ]; then # Relative, make absolute SCRIPT=$( dirname "$0" )/$SCRIPT ;; - esac - fi + esac + fi # Work out root from script location JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" @@ -149,7 +149,7 @@ fi if [ -L "${JENA_HOME}" ]; then JENA_HOME=$(resolveLink "$JENA_HOME") # If link is relative - case "$JENA_HOME" in + case "$JENA_HOME" in /*) # Already absolute ;; diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data index d0ca066f7f8..ff4469554d0 100755 --- a/apache-jena/bin/tdbloader2data +++ b/apache-jena/bin/tdbloader2data @@ -50,7 +50,7 @@ fi if [ -L "${JENA_HOME}" ]; then JENA_HOME=$(resolveLink "$JENA_HOME") # If link is relative - case "$JENA_HOME" in + case "$JENA_HOME" in /*) # Already absolute ;; diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index index 4d50f93fa40..c057b49b02e 100755 --- a/apache-jena/bin/tdbloader2index +++ b/apache-jena/bin/tdbloader2index @@ -50,7 +50,7 @@ fi if [ -L "${JENA_HOME}" ]; then JENA_HOME=$(resolveLink "$JENA_HOME") # If link is relative - case "$JENA_HOME" in + case "$JENA_HOME" in /*) # Already absolute ;; @@ -247,7 +247,7 @@ if [ -z "$SORT_ARGS" ]; then set +e sort --parallel=3 < /dev/null 2>/dev/null if [ $? = 0 ]; then - SORT_ARGS="$SORT_ARGS --parallel=3" + SORT_ARGS="$SORT_ARGS --parallel=3" fi set -e fi @@ -315,8 +315,8 @@ generate_index() if [ ! -s "$DATA" ]; then debug "Skipping Index $IDX as no relevant data to index" - return - fi + return + fi info "Creating Index $IDX" @@ -390,7 +390,7 @@ generate_index() # Remove work file unless keeping if [ $KEEP_WORK = 0 ]; then debug "Cleaning up work file $WORK" - rm "$WORK" + rm "$WORK" fi } diff --git a/apache-jena/bin/tdbloader2worker b/apache-jena/bin/tdbloader2worker deleted file mode 100755 index ca26d82de3d..00000000000 --- a/apache-jena/bin/tdbloader2worker +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env bash - -## Licensed to the Apache Software Foundation (ASF) under one -## or more contributor license agreements. See the NOTICE file -## distributed with this work for additional information -## regarding copyright ownership. The ASF licenses this file -## to you under the Apache License, Version 2.0 (the -## "License"); you may not use this file except in compliance -## with the License. You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. - -# The environment for this sub-script is setup by "tdbloader2" - -# Exit on error. -set -e - -# Sort order is ASCII -export LC_ALL="C" - -log() { echo " $(date $DATE)" "$@" ; } - -TMP=$$ -#DATE="+%Y-%m-%dT%H:%M:%S%:z" -DATE="+%H:%M:%S" - -##--parallel is not always available. -SORT_ARGS="${SORT_ARGS:---buffer-size=50%}" -JVM_ARGS=${JVM_ARGS:--Xmx1200M} - -# Classpath set in "tdbloader2" -if [ -z "$JENA_CP" ] -then - echo "Classpath not provided : set JENA_CP" 1>&2 - exit 1 -fi - -USAGE="Usage: tdbloader2 --loc location datafile ..." -PKG=org.apache.jena.tdb.store.bulkloader2 - -if [ "$#" -lt 2 ] ; then echo "$USAGE" 1>&2 ; exit 1 ; fi - -## Process --loc. Yuk. -ARG1="$1" -shift -if [ "$ARG1" = "-loc" -o "$ARG1" = "--loc" ] -then - LOC="$1" - shift -else - LOC="${ARG1/-*loc=/}" - if [ "$ARG1" = "$LOC" ] ; then echo $USAGE 1>&2 ; exit 1 ; fi -fi - -# Look for any index and data files in the directory. -# Skip a possible configuration file -if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)" -then - echo "Not empty: $LOC" - exit 1 -fi - -if [ ! -e "$LOC" ] ; then mkdir "$LOC" ; fi -if [ ! -d "$LOC" ] ; then echo "Not a directory: $LOC" ; exit 1 ; fi - -FILES="$@" -## Stdin? -KEEPWORKFILES="${KEEPWORKFILES:-}" -# ---- Start -log "-- TDB Bulk Loader Start" -TIME1="$(date +%s)" - -# ---- Data loading phase -log "Data phase" -# Produce nodes file and triples/quads text file. - -DATA_TRIPLES="$LOC/data-triples.$TMP" -DATA_QUADS="$LOC/data-quads.$TMP" - -java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \ - "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES - -# ---- Index intermediates -## All files are writtern S P O / G S P O columns per row but in different sort orders. -log "Index phase" - -process_rows() -{ - local KEYS="$1" - local DATA="$2" - local IDX=$3 - local WORK="$LOC/$IDX-txt" - - if [ ! -s "$DATA" ] - then - return - fi - - log "Index $IDX" - sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK - log "Build $IDX" - rm -f "$LOC/$IDX.dat" - rm -f "$LOC/$IDX.idn" - java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK" - # Remove intermediary file. - if [ "$KEEPWORKFILES" != "yes" ] - then - rm "$WORK" - fi -} - -K1="-k 1,1" -K2="-k 2,2" -K3="-k 3,3" -K4="-k 4,4" - -process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO - -process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS - -process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP - -process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO - -process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS - -process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP - -process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG - -process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG - -process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG - -log "Index phase end" -TIME2="$(date +%s)" - -# ---- Clean up. - -if [ "$KEEPWORKFILES" != "yes" ] -then - rm -f "$DATA_TRIPLES" "$DATA_QUADS" -fi - -# ---- End -log "-- TDB Bulk Loader Finish" -ELAPSED=$(($TIME2-$TIME1)) -log "-- $ELAPSED seconds" diff --git a/jena-tdb/bin/tdbloader2 b/jena-tdb/bin/tdbloader2 deleted file mode 100755 index fff135855a7..00000000000 --- a/jena-tdb/bin/tdbloader2 +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -## Licensed to the Apache Software Foundation (ASF) under one -## or more contributor license agreements. See the NOTICE file -## distributed with this work for additional information -## regarding copyright ownership. The ASF licenses this file -## to you under the Apache License, Version 2.0 (the -## "License"); you may not use this file except in compliance -## with the License. You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. - -if [ "$TDBROOT" = "" ] - then - echo "TDBROOT is not set" 1>&2 - exit 1 -fi - -INIT="$TDBROOT/bin/tdb_init" - -if [ ! -r "$INIT" ] -then - echo "Script $INIT (\$TDBROOT/bin/tdb_init) does not exist or is not readable" - exit 1 -fi - -. "$INIT" - -#echo "$TDB_CP" -export JENA_CP="$TDB_CP" -if [ -z "$SORT_ARGS" ] -then - SORT_ARGS="--buffer-size=50%" - if $(sort --parallel=3 < /dev/null 2>/dev/null) - then - SORT_ARGS="$SORT_ARGS --parallel=3" - fi -fi -export SORT_ARGS - -exec "$TDBROOT/bin/tdbloader2worker" "$@" diff --git a/jena-tdb/bin/tdbloader2worker b/jena-tdb/bin/tdbloader2worker deleted file mode 100755 index ca26d82de3d..00000000000 --- a/jena-tdb/bin/tdbloader2worker +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env bash - -## Licensed to the Apache Software Foundation (ASF) under one -## or more contributor license agreements. See the NOTICE file -## distributed with this work for additional information -## regarding copyright ownership. The ASF licenses this file -## to you under the Apache License, Version 2.0 (the -## "License"); you may not use this file except in compliance -## with the License. You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. - -# The environment for this sub-script is setup by "tdbloader2" - -# Exit on error. -set -e - -# Sort order is ASCII -export LC_ALL="C" - -log() { echo " $(date $DATE)" "$@" ; } - -TMP=$$ -#DATE="+%Y-%m-%dT%H:%M:%S%:z" -DATE="+%H:%M:%S" - -##--parallel is not always available. -SORT_ARGS="${SORT_ARGS:---buffer-size=50%}" -JVM_ARGS=${JVM_ARGS:--Xmx1200M} - -# Classpath set in "tdbloader2" -if [ -z "$JENA_CP" ] -then - echo "Classpath not provided : set JENA_CP" 1>&2 - exit 1 -fi - -USAGE="Usage: tdbloader2 --loc location datafile ..." -PKG=org.apache.jena.tdb.store.bulkloader2 - -if [ "$#" -lt 2 ] ; then echo "$USAGE" 1>&2 ; exit 1 ; fi - -## Process --loc. Yuk. -ARG1="$1" -shift -if [ "$ARG1" = "-loc" -o "$ARG1" = "--loc" ] -then - LOC="$1" - shift -else - LOC="${ARG1/-*loc=/}" - if [ "$ARG1" = "$LOC" ] ; then echo $USAGE 1>&2 ; exit 1 ; fi -fi - -# Look for any index and data files in the directory. -# Skip a possible configuration file -if test -n "$(find "$LOC" -maxdepth 1 -type f ! -name 'this.*' -print -quit)" -then - echo "Not empty: $LOC" - exit 1 -fi - -if [ ! -e "$LOC" ] ; then mkdir "$LOC" ; fi -if [ ! -d "$LOC" ] ; then echo "Not a directory: $LOC" ; exit 1 ; fi - -FILES="$@" -## Stdin? -KEEPWORKFILES="${KEEPWORKFILES:-}" -# ---- Start -log "-- TDB Bulk Loader Start" -TIME1="$(date +%s)" - -# ---- Data loading phase -log "Data phase" -# Produce nodes file and triples/quads text file. - -DATA_TRIPLES="$LOC/data-triples.$TMP" -DATA_QUADS="$LOC/data-quads.$TMP" - -java $JVM_ARGS -cp "$JENA_CP" "$PKG".CmdNodeTableBuilder \ - "--loc=$LOC" "--triples=$DATA_TRIPLES" "--quads=$DATA_QUADS" $FILES - -# ---- Index intermediates -## All files are writtern S P O / G S P O columns per row but in different sort orders. -log "Index phase" - -process_rows() -{ - local KEYS="$1" - local DATA="$2" - local IDX=$3 - local WORK="$LOC/$IDX-txt" - - if [ ! -s "$DATA" ] - then - return - fi - - log "Index $IDX" - sort $SORT_ARGS -u $KEYS < "$DATA" > $WORK - log "Build $IDX" - rm -f "$LOC/$IDX.dat" - rm -f "$LOC/$IDX.idn" - java -cp "$JENA_CP" "$PKG".CmdIndexBuild "$LOC" "$IDX" "$WORK" - # Remove intermediary file. - if [ "$KEEPWORKFILES" != "yes" ] - then - rm "$WORK" - fi -} - -K1="-k 1,1" -K2="-k 2,2" -K3="-k 3,3" -K4="-k 4,4" - -process_rows "$K1 $K2 $K3" "$DATA_TRIPLES" SPO - -process_rows "$K2 $K3 $K1" "$DATA_TRIPLES" POS - -process_rows "$K3 $K1 $K2" "$DATA_TRIPLES" OSP - -process_rows "$K1 $K2 $K3 $K4" "$DATA_QUADS" GSPO - -process_rows "$K1 $K3 $K4 $K2" "$DATA_QUADS" GPOS - -process_rows "$K1 $K4 $K2 $K3" "$DATA_QUADS" GOSP - -process_rows "$K2 $K3 $K4 $K1" "$DATA_QUADS" SPOG - -process_rows "$K3 $K4 $K2 $K1" "$DATA_QUADS" POSG - -process_rows "$K4 $K2 $K3 $K1" "$DATA_QUADS" OSPG - -log "Index phase end" -TIME2="$(date +%s)" - -# ---- Clean up. - -if [ "$KEEPWORKFILES" != "yes" ] -then - rm -f "$DATA_TRIPLES" "$DATA_QUADS" -fi - -# ---- End -log "-- TDB Bulk Loader Finish" -ELAPSED=$(($TIME2-$TIME1)) -log "-- $ELAPSED seconds" From 9b0703919277bf4197241504feef355a14f1fe56 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Tue, 30 Jun 2015 16:36:38 +0100 Subject: [PATCH 14/16] Comment style consistent (JENA-977) --- apache-jena/bin/tdbloader2data | 2 +- apache-jena/bin/tdbloader2index | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apache-jena/bin/tdbloader2data b/apache-jena/bin/tdbloader2data index ff4469554d0..d0dde293fc8 100755 --- a/apache-jena/bin/tdbloader2data +++ b/apache-jena/bin/tdbloader2data @@ -237,7 +237,7 @@ if [ ! -d "$LOC" ]; then abort 1 "Database location is not a directory: $LOC" fi -## Prepare JVM Arguments +# Prepare JVM Arguments JVM_ARGS=${JVM_ARGS:--Xmx1200M} debug "JVM Arguments are $JVM_ARGS" diff --git a/apache-jena/bin/tdbloader2index b/apache-jena/bin/tdbloader2index index c057b49b02e..458698cfa5a 100755 --- a/apache-jena/bin/tdbloader2index +++ b/apache-jena/bin/tdbloader2index @@ -242,7 +242,7 @@ debug "Data text files are $DATA_TRIPLES and $DATA_QUADS" if [ -z "$SORT_ARGS" ]; then SORT_ARGS="--buffer-size=50%" - ##--parallel is not always available. + # --parallel is not always available. # Temporarily disable exit on error while we check for --parallel support set +e sort --parallel=3 < /dev/null 2>/dev/null @@ -263,7 +263,7 @@ fi debug "Jena Classpath is $JENA_CP" # ---- Index intermediates -## All files are written S P O / G S P O columns per row but in different sort orders. +# All files are written S P O / G S P O columns per row but in different sort orders. info "Index Building Phase" # Check whether Pipe Viewer is available From 40fe2cd93861e99e53184f76383323565407e372 Mon Sep 17 00:00:00 2001 From: Rob Vesse Date: Wed, 1 Jul 2015 10:32:45 +0100 Subject: [PATCH 15/16] Update template.bin to improve JENA_HOME resolution (JENA-977) Applies the JENA_HOME resolution fixes from the tdbloader2 script changes to the template.bin template that is used to generate the various Jena command scripts --- apache-jena/README | 9 +++-- apache-jena/cmd-maker | 12 ++----- apache-jena/template.bin | 77 +++++++++++++++++++++++++++++++--------- 3 files changed, 69 insertions(+), 29 deletions(-) diff --git a/apache-jena/README b/apache-jena/README index 5f80997bb56..e283a8c5e72 100644 --- a/apache-jena/README +++ b/apache-jena/README @@ -27,20 +27,23 @@ line tools. The scripts can be copied to a convenient place on the command path. - To use the Jena tools from the command line you need to set the JENAROOT + To use the Jena tools from the command line you need to set the JENA_HOME environment variable to point to the location where you have unzipped the Jena distribution: Windows: - set JENA_HOME=\path\to\apache-jena-2.7.5 + set JENA_HOME=\path\to\apache-jena-x.y.z bat\sparql.bat --version Linux: The command scripts automatically set JENA_HOME but if you want to switch to a different version fro the same scripts: - export JENA_HOME=/path/to/apache-jena-2.7.5 + export JENA_HOME=/path/to/apache-jena-x.y.z bin/sparql --version + + Where x.y.z is the version of the Jena command line tools you have + downloaded If you receive a class not found exception when trying to run one of the scripts then you may have set JENA_HOME incorrectly. A quick and easy way diff --git a/apache-jena/cmd-maker b/apache-jena/cmd-maker index eb2a4e41193..71e297cf18b 100755 --- a/apache-jena/cmd-maker +++ b/apache-jena/cmd-maker @@ -19,14 +19,8 @@ # Not tdbloader2. ## tdbloader2 is slightly different. ## The main program is not a java program -## It is split into tdbloader2 and tdbloader2worker -## tdbloader2worker (the mainporgram) is the same in -## developement and here. tdbloader2 is like the script -## wrappers except it execs tdbloader2worker, not -## java. It needs manually updating. -## Replace the java exec with: -## export JENA_CP -## exec tdbloader2worker "$@" +## It is split into several scripts that leverage a mixture of +## POSIX and java tools and should be maintained separately CMDS=$(cat < Date: Wed, 1 Jul 2015 10:34:50 +0100 Subject: [PATCH 16/16] Regenerate commands with JENA_HOME fix (JENA-977) This commit regenerates the commands with the fix for JENA_HOME resolution in the template --- apache-jena/bin/arq | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/infer | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/iri | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/juuid | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/nquads | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/ntriples | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/qparse | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/rdfcat | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/rdfcompare | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/rdfcopy | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/rdfparse | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/rdfxml | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/riot | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/rset | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/rsparql | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/rupdate | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/schemagen | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/sparql | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/tdbbackup | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/tdbdump | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/tdbloader | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/tdbquery | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/tdbstats | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/tdbupdate | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/trig | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/turtle | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/uparse | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/update | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/utf8 | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/wwwdec | 77 ++++++++++++++++++++++++++++++-------- apache-jena/bin/wwwenc | 77 ++++++++++++++++++++++++++++++-------- 31 files changed, 1891 insertions(+), 496 deletions(-) diff --git a/apache-jena/bin/arq b/apache-jena/bin/arq index 00ad2b430d4..8d2f3710a24 100755 --- a/apache-jena/bin/arq +++ b/apache-jena/bin/arq @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/infer b/apache-jena/bin/infer index 4f25d2558f9..d0ba197205a 100755 --- a/apache-jena/bin/infer +++ b/apache-jena/bin/infer @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/iri b/apache-jena/bin/iri index d67bda127ee..f5e9e0b24f2 100755 --- a/apache-jena/bin/iri +++ b/apache-jena/bin/iri @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/juuid b/apache-jena/bin/juuid index f6d9c0b0baf..dcf6b7f25b8 100755 --- a/apache-jena/bin/juuid +++ b/apache-jena/bin/juuid @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/nquads b/apache-jena/bin/nquads index 005bf036b07..592f6a9dade 100755 --- a/apache-jena/bin/nquads +++ b/apache-jena/bin/nquads @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/ntriples b/apache-jena/bin/ntriples index ca46add57b1..34db759cabe 100755 --- a/apache-jena/bin/ntriples +++ b/apache-jena/bin/ntriples @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/qparse b/apache-jena/bin/qparse index 488827184cc..02560fc98f7 100755 --- a/apache-jena/bin/qparse +++ b/apache-jena/bin/qparse @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/rdfcat b/apache-jena/bin/rdfcat index d84cf6ca63b..8553e940886 100755 --- a/apache-jena/bin/rdfcat +++ b/apache-jena/bin/rdfcat @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/rdfcompare b/apache-jena/bin/rdfcompare index d231e3824f5..e9ff1c1469b 100755 --- a/apache-jena/bin/rdfcompare +++ b/apache-jena/bin/rdfcompare @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/rdfcopy b/apache-jena/bin/rdfcopy index 44673efea65..bd95e533c3a 100755 --- a/apache-jena/bin/rdfcopy +++ b/apache-jena/bin/rdfcopy @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/rdfparse b/apache-jena/bin/rdfparse index 57570efcbb3..1d226218ba0 100755 --- a/apache-jena/bin/rdfparse +++ b/apache-jena/bin/rdfparse @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/rdfxml b/apache-jena/bin/rdfxml index 35a584069af..bebb6e17e27 100755 --- a/apache-jena/bin/rdfxml +++ b/apache-jena/bin/rdfxml @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/riot b/apache-jena/bin/riot index 681c0ed8891..d3b21b5b08a 100755 --- a/apache-jena/bin/riot +++ b/apache-jena/bin/riot @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/rset b/apache-jena/bin/rset index c14f584a20b..7d494a806b2 100755 --- a/apache-jena/bin/rset +++ b/apache-jena/bin/rset @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/rsparql b/apache-jena/bin/rsparql index a9a0bc83ca5..ddea6955a3f 100755 --- a/apache-jena/bin/rsparql +++ b/apache-jena/bin/rsparql @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/rupdate b/apache-jena/bin/rupdate index a569d043989..76120843b9f 100755 --- a/apache-jena/bin/rupdate +++ b/apache-jena/bin/rupdate @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/schemagen b/apache-jena/bin/schemagen index 0a87241d596..a2fbc3f7057 100755 --- a/apache-jena/bin/schemagen +++ b/apache-jena/bin/schemagen @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/sparql b/apache-jena/bin/sparql index 85f0fa2374f..dc6b3349271 100755 --- a/apache-jena/bin/sparql +++ b/apache-jena/bin/sparql @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/tdbbackup b/apache-jena/bin/tdbbackup index d6786a39976..b5f3d9bd737 100755 --- a/apache-jena/bin/tdbbackup +++ b/apache-jena/bin/tdbbackup @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/tdbdump b/apache-jena/bin/tdbdump index 89b4c2f5678..2ead54e7d16 100755 --- a/apache-jena/bin/tdbdump +++ b/apache-jena/bin/tdbdump @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/tdbloader b/apache-jena/bin/tdbloader index 187888a65f8..06b4356e65c 100755 --- a/apache-jena/bin/tdbloader +++ b/apache-jena/bin/tdbloader @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/tdbquery b/apache-jena/bin/tdbquery index 18c6fb63bea..b4bd8fae094 100755 --- a/apache-jena/bin/tdbquery +++ b/apache-jena/bin/tdbquery @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/tdbstats b/apache-jena/bin/tdbstats index a9a9cd9b376..aa404459288 100755 --- a/apache-jena/bin/tdbstats +++ b/apache-jena/bin/tdbstats @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/tdbupdate b/apache-jena/bin/tdbupdate index fe5aa52ea1c..3c94419f359 100755 --- a/apache-jena/bin/tdbupdate +++ b/apache-jena/bin/tdbupdate @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/trig b/apache-jena/bin/trig index b3a9121e324..e984da0f114 100755 --- a/apache-jena/bin/trig +++ b/apache-jena/bin/trig @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/turtle b/apache-jena/bin/turtle index 716eb7cd7a0..9adb88fc247 100755 --- a/apache-jena/bin/turtle +++ b/apache-jena/bin/turtle @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/uparse b/apache-jena/bin/uparse index d8b7226f721..d2eef6182c0 100755 --- a/apache-jena/bin/uparse +++ b/apache-jena/bin/uparse @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/update b/apache-jena/bin/update index 7d3c164b34a..a7eac43381f 100755 --- a/apache-jena/bin/update +++ b/apache-jena/bin/update @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/utf8 b/apache-jena/bin/utf8 index 090bcb06acc..67ea2e35285 100755 --- a/apache-jena/bin/utf8 +++ b/apache-jena/bin/utf8 @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/wwwdec b/apache-jena/bin/wwwdec index 12e2c8d02c6..f979d5c86a9 100755 --- a/apache-jena/bin/wwwdec +++ b/apache-jena/bin/wwwdec @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup diff --git a/apache-jena/bin/wwwenc b/apache-jena/bin/wwwenc index c34ba5e8476..9e4a9d4f68e 100755 --- a/apache-jena/bin/wwwenc +++ b/apache-jena/bin/wwwenc @@ -1,23 +1,68 @@ #!/bin/sh ## Licensed under the terms of http://www.apache.org/licenses/LICENSE-2.0 +function resolveLink() { + local NAME=$1 + + if [ -L "$NAME" ]; then + case "$OSTYPE" in + darwin*|bsd*) + # BSD style readlink behaves differently to GNU readlink + # Have to manually follow links + while [ -L "$NAME" ]; do + NAME=$(readlink "$NAME") + done + ;; + *) + # Assuming standard GNU readlink with -f for + # canonicalize and follow + NAME=$(readlink -f "$NAME") + ;; + esac + fi + + echo "$NAME" +} + # If JENA_HOME is empty -if [ -z "$JENA_HOME" ] - then - SCRIPT="$0" - # Catch common issue: script has been symlinked - if [ -L "$SCRIPT" ] - then - SCRIPT="$(readlink "$0")" - # If link is relative - case "$SCRIPT" in - /*) ;; # fine - *) SCRIPT=$( dirname "$0" )/$SCRIPT;; # fix - esac - fi - - # Work out root from script location - JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" +if [ -z "$JENA_HOME" ]; then + echo "JENA_HOME not set, attempting to locate JENA_HOME automatically" + SCRIPT="$0" + # Catch common issue: script has been symlinked + if [ -L "$SCRIPT" ]; then + SCRIPT=$(resolveLink "$0") + # If link is relative + case "$SCRIPT" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + SCRIPT=$( dirname "$0" )/$SCRIPT + ;; + esac + fi + + # Work out root from script location + JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )" + export JENA_HOME + echo "Located JENA_HOME at ${JENA_HOME}" +fi +# If JENA_HOME is a symbolic link need to resolve +if [ -L "${JENA_HOME}" ]; then + JENA_HOME=$(resolveLink "$JENA_HOME") + # If link is relative + case "$JENA_HOME" in + /*) + # Already absolute + ;; + *) + # Relative, make absolute + JENA_HOME=$(dirname "$JENA_HOME") + ;; + esac + export JENA_HOME + echo "Resolved symbolic links for JENA_HOME to $JENA_HOME" fi # ---- Setup