-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[TACHYON-655] Implement FUSE connector #2216
Changes from all commits
df9559f
17b3711
a4ceb94
8d2f83d
59b958f
0d30732
a71fd7d
4d0d803
446b287
88ed6ab
ef01040
0534c17
c8bd9bd
8a22cd4
3526dbc
178a7d9
32bafb4
154ca84
3de9ece
7273650
2429097
b1924f4
7dec5b3
8d22610
81965f6
033fecb
a98cac7
363aa29
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
#!/usr/bin/env bash | ||
|
||
if [[ -n "$BASH_VERSION" ]]; then | ||
BIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | ||
elif [[ -n "$ZSH_VERSION" ]]; then | ||
BIN_DIR="$( cd "$( dirname "${(%):-%x}" )" && pwd )" | ||
else | ||
echo "Please, launch your scripts from zsh or bash only." >&2 | ||
exit 1 | ||
fi | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. extra blank line |
||
get_env () { | ||
DEFAULT_LIBEXEC_DIR="$BIN_DIR"/../libexec | ||
TACHYON_LIBEXEC_DIR=${TACHYON_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} | ||
. $TACHYON_LIBEXEC_DIR/tachyon-config.sh | ||
|
||
TACHYON_MASTER_PORT=${TACHYON_MASTER_PORT:-19998} | ||
TACHYON_FUSE_JAR=${BIN_DIR}/../fuse/target/tachyon-fuse-${VERSION}-jar-with-dependencies.jar | ||
FUSE_MAX_WRITE=131072 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this can be a parameter of the command? |
||
} | ||
|
||
check_java_version () { | ||
local java_mjr_vers=$(${JAVA} -version 2>&1 | awk -F '"' '/version/ {print $2}' | awk -F'.' '{print $1 $2}') | ||
if [[ ${java_mjr_vers} -lt 18 ]]; then | ||
echo "It seems you are running a version of Java which is older then Java8. Please, use Java 8 to use tachyon-fuse" >&2 | ||
return 1 | ||
else | ||
return 0 | ||
fi | ||
} | ||
|
||
check_tfuse_jar () { | ||
if ! [[ -f ${TACHYON_FUSE_JAR} ]]; then | ||
echo "Cannot find ${TACHYON_FUSE_JAR}. Was tachyon compiled with java8 or more recent?" | ||
return 1 | ||
else | ||
return 0 | ||
fi | ||
} | ||
|
||
set_java_opt () { | ||
JAVA_OPTS+=" | ||
-server | ||
-Xms1G | ||
-Xmx1G | ||
" | ||
|
||
TACHYON_FUSE_OPTS+=" | ||
-Dtachyon.logger.type=tachyon.fuse | ||
-Dtachyon.master.port=${TACHYON_MASTER_PORT} | ||
-Dtachyon.master.hostname=${TACHYON_MASTER_ADDRESS} | ||
-Dtachyon.logs.dir=$TACHYON_LOGS_DIR | ||
-Dtachyon.logger.type="FUSE_LOGGER" | ||
-Dlog4j.configuration=file:$TACHYON_CONF_DIR/log4j.properties | ||
" | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. extra blank line There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cleaned |
||
mount_fuse() { | ||
if fuse_stat > /dev/null ; then | ||
echo "tachyon-fuse is already running on the local host. Please, stop it first." >&2 | ||
return 1 | ||
fi | ||
echo "Starting tachyon-fuse on local host." | ||
local mount_point=$1 | ||
(nohup $JAVA -cp ${TACHYON_FUSE_JAR} ${JAVA_OPTS} ${TACHYON_FUSE_OPTS}\ | ||
tachyon.fuse.TachyonFuse \ | ||
-m ${mount_point} \ | ||
-o big_writes > $TACHYON_LOGS_DIR/fuse.out 2>&1) & | ||
# sleep: workaround to let the bg java process exit on errors, if any | ||
sleep 2s | ||
if kill -0 $! > /dev/null 2>&1 ; then | ||
return 0 | ||
else | ||
echo "tachyon-fuse not started. See ${TACHYON_LOGS_DIR}/fuse.out for details" >&2 | ||
return 1 | ||
fi | ||
} | ||
|
||
umount_fuse () { | ||
local fuse_pid=$(fuse_stat) | ||
if [[ $? -eq 0 ]]; then | ||
echo "Stopping tachyon-fuse on local host (PID: ${fuse_pid})." | ||
kill ${fuse_pid} | ||
return $? | ||
else | ||
echo "tachyon-fuse is not running on local host." >&2 | ||
return 1 | ||
fi | ||
} | ||
|
||
fuse_stat() { | ||
local fuse_pid=$(${JAVA_HOME}/bin/jps | grep TachyonFuse | awk -F' ' '{print $1}') | ||
if [[ -z ${fuse_pid} ]]; then | ||
if [[ $1 == "-v" ]]; then | ||
echo "TachyonFuse: not running" | ||
return 1 | ||
else | ||
return 1 | ||
fi | ||
else | ||
local fuse_mount=$(mount | grep tachyon-fuse | awk -F' ' '{print $3" "$6}') | ||
if [[ $1 == "-v" ]]; then | ||
echo "TachyonFuse mounted on ${fuse_mount} [PID: ${fuse_pid}]" | ||
return 0 | ||
else | ||
echo ${fuse_pid} | ||
return 0 | ||
fi | ||
fi | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. extra blank line |
||
USAGE_MSG="Usage:\n\t$0 [mount|umount|stat]" | ||
|
||
if [[ $# -lt 1 ]]; then | ||
echo -e "${USAGE_MSG}" >&2 | ||
exit 1 | ||
fi | ||
|
||
get_env | ||
check_java_version && check_tfuse_jar | ||
if [[ $? -ne 0 ]] ; then | ||
exit 1 | ||
fi | ||
|
||
case $1 in | ||
mount) | ||
if [[ $# -ne 2 ]]; then | ||
echo -e "Usage\n\t$0 mount [mount_point]" >&2 | ||
exit 1 | ||
fi | ||
mount_fuse $2 | ||
exit $? | ||
;; | ||
umount) | ||
umount_fuse | ||
exit $? | ||
;; | ||
stat) | ||
fuse_stat -v | ||
;; | ||
*) | ||
echo "${USAGE_MSG}" >&2 | ||
exit 1 | ||
;; | ||
esac | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
--- | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you auto format to line break at 100 characters? |
||
layout: global | ||
title: Mounting Tachyon with FUSE (Beta) | ||
nickname: Tachyon-FUSE | ||
group: Features | ||
priority: 4 | ||
--- | ||
|
||
* Table of Contents | ||
{:toc} | ||
|
||
Tachyon-FUSE is a new experimental feature that allows to mount a distributed Tachyon File System | ||
within the local file system hierarchy of a Linux node. Using this feature, standard tools | ||
(for example, `ls`, `cat` or `echo`) and legacy POSIX applications will have basic access to | ||
the distributed Tachyon data store. | ||
|
||
Given the intrinsic characteristics of Tachyon, like its write-once/read-many-times file | ||
data model, the mounted file system will not have full POSIX semantics and will have specific | ||
limitations. Please, read the rest of this document before using this feature to understand | ||
what it can and cannot do for you. | ||
|
||
# Requirements | ||
* Linux kernel 2.6.9 or newer | ||
* JDK 1.8 or newer | ||
* libfuse 2.9.3 or newer | ||
(2.8.3 has been reported to also work - with some warnings) | ||
|
||
# Building | ||
tachyon-fuse is only built with Tachyon when the `buildFuse` maven profile is active. This | ||
profile will be automatically activated by maven when it is detected that you are building | ||
Tachyon with a JDK version 8 or newer. | ||
|
||
For compatibility with Java 6 and 7, binary tachyon distributions may ship without tachyon-fuse | ||
support, so you will need to build your own Tachyon if you want to use tachyon-fuse on your | ||
deployment. | ||
|
||
The best way to do so is to either clone the Tachyon [GitHub | ||
repository](https://github.com/amplab/tachyon) and choose your favourite branch from git, or to | ||
grab a [source distribution](https://github.com/amplab/tachyon/releases) directly. Please, refer to | ||
[this page](http://tachyon-project.org/documentation/master/Building-Tachyon-Master-Branch.html)) | ||
for building instructions. | ||
|
||
# Usage | ||
|
||
## Mount Tachyon-FUSE | ||
After having properly configured and started the tachyon cluster, and from the node where you | ||
wish to mount Tachyon, point a shell to your `$TACHYON_HOME` and run: | ||
```bash | ||
$ bin/tachyon-fuse.sh mount <mount_point> | ||
``` | ||
This will spawn a background user-space java process (tachyon-fuse) that will mount the file | ||
system on the specified *<mount_point>*. Note that *<mount_point>* must be an existing and empty | ||
path in your local file system hierarchy and that the user that runs the `tachyon-fuse.sh` | ||
script must own the mount point and have read and write permissions on it. Also note that, | ||
currently, you are limited to have only one Tachyon-FUSE mount per node. | ||
|
||
## Unmount Tachyon-FUSE | ||
To umount a previoulsy mounted Tachyon-FUSE file sytem, on the node where the file system is | ||
mounted, point a shell to your `$TACHYON_HOME` and run: | ||
```bash | ||
$ bin/tachyon-fuse.sh umount | ||
``` | ||
This will stop the background tachyon-fuse java process and unmount the file system. | ||
|
||
## Check if Tachyon-FUSE is running | ||
```bash | ||
$ bin/tachyon-fuse.sh stat | ||
``` | ||
|
||
## Optional configuration steps | ||
Tachyon-FUSE is based on the standard java tachyon-client to perform its operations. You might | ||
want to customize the behaviour of the tachyon client used by Tachyon-FUSE the same way you | ||
would for any other client application. | ||
|
||
One possibility, for example, is to edit `$TACHYON_HOME/bin/tachyon-fuse.sh` and add your | ||
specific tachyon client options in the `TACHYON_JAVA_OPTS` variable. | ||
|
||
# Operational assumptions and status | ||
Currently, most basic file system operations are supported. However, due to Tachyon implicit | ||
characteristics, please, be aware that: | ||
* Files can be written only once, only sequentially, and never modified. | ||
* Due to the above, any further access to a file must be read-only. | ||
|
||
This translates in the following constraints on the UNIX system calls that will operate on the | ||
file system: | ||
|
||
## `open(const char* pathname, int flags, mode_t mode)` | ||
(see also `man 2 open`) | ||
|
||
If `pathname` indicates the path of a non-existing regular file in Tachyon, then an open will | ||
only succeed if: | ||
1. The base directory of `pathname` exists in Tachyon; | ||
2. `O_CREAT` and `O_WRONLY` are passed among the `flags` bitfield. | ||
|
||
Equivalently, `creat(const char* pathname )` calls will succeed as long as (1) holds and | ||
`pathname` does not exist yet. | ||
|
||
If `pathname`, instead, points to an existing regular file in Tachyon, then an open call will | ||
only succeed if: | ||
1. `O_RDONLY` is passed among the `flags` bitfield. | ||
|
||
Note that, in either cases, the `mode` parameter is currently ignored by Tachyon-FUSE. | ||
|
||
## `read(int fd, void* buf, size_t count)` | ||
(see also `man 2 read`) | ||
|
||
A read system call will only succeed when `fd` refers to a Tachyon file that has been previously | ||
opened with the `O_RDONLY` flags. | ||
|
||
## `lseek(int fd, off_t off, int whence)` | ||
(see also `man 2 lseek`) | ||
|
||
Seeking is supported only on files open for reading, i.e., on files that have been opened with an | ||
`O_RDONLY` flag. | ||
|
||
## `write(int fd, const void* buf, size_t count)` | ||
(see also `man 2 write`) | ||
|
||
A write system call will only succeed when `fd` refers to a Tachyon file that has been previously | ||
opened with the `O_WRONLY` flag. | ||
|
||
# Performance considerations | ||
Due to the conjunct use of FUSE and JNR, the performance of the mounted file system is expected | ||
to be considerably worse than what you would see by using the `tachyon-client` directly. In other | ||
words, if you are concerned about performance rather then functionality, then Tachyon-FUSE is | ||
not what you are looking for. | ||
|
||
Most of the problems come from the fact that there are several memory copies going on for each | ||
call on `read` or `write` operations, and that FUSE caps the maximum granularity of writes to | ||
128KB. This could be probably improved by a large extent by leveraging the FUSE cache write-backs | ||
feature introduced in kernel 3.15 (not supported yet, however, by libfuse 2.x userspace libs). | ||
|
||
# Configuration Parameters For Tachyon-FUSE | ||
|
||
These are the configuration parameters for Tachyon-FUSE. | ||
|
||
<table class="table table-striped"> | ||
<tr><th>Parameter</th><th>Default Value</th><th>Description</th></tr> | ||
<tr> | ||
<td>tachyon.fuse.maxwrite.bytes</td> | ||
<td>131072</td> | ||
<td> | ||
The desired granularity of FUSE write upcalls in bytes. Note that 128K is currently an upper | ||
bound imposed by the linux kernel. | ||
</td> | ||
</tr> | ||
<tr> | ||
<td>tachyon.fuse.debug.enabled</td> | ||
<td>false</td> | ||
<td> | ||
Enable FUSE debug output. This output will be redirected in a `fuse.out` log file inside | ||
`tachyon.logs.dir`. | ||
</td> | ||
</tr> | ||
<tr> | ||
<td>tachyon.fuse.cachedpaths.max</td> | ||
<td>500</td> | ||
<td> | ||
Defines the size of the internal Tachyon-FUSE cache that maintains the most frequently used | ||
translations between local file system paths and Tachyon file URIs. | ||
</td> | ||
</tr> | ||
<tr> | ||
<td>tachyon.fuse.mount.default</td> | ||
<td>/mnt/tachyon</td> | ||
<td> | ||
Default mount point, only used when the user does not specify something else at mount time. | ||
</td> | ||
</tr> | ||
<tr> | ||
<td>tachyon.fuse.fs.root</td> | ||
<td>/mnt</td> | ||
<td> | ||
Path, within the Tachyon namespace, that will be used as the root of the FUSE mount. | ||
</td> | ||
</tr> | ||
<tr> | ||
<td>tachyon.fuse.fs.name</td> | ||
<td>tachyon-fuse</td> | ||
<td> | ||
Descriptive name used by FUSE to mount the file system. | ||
</td> | ||
</tr> | ||
</table> | ||
|
||
# Acknowledgements | ||
This project uses [jnr-fuse](https://github.com/SerCeMan/jnr-fuse) for FUSE on Java. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just put this as a note: as the initial PR, we are happy to use bash to parse command-line argument and set up environment variables. But later on (after this adapted and used by more people), we will probably refactor this functionality into some other Java class and leave bash script a simple wrapper----the reason is it will become harder to maintain a bash script as we are adding more features / arguments in the future.
All these, for sure, are out of the scope of this PR :)