Skip to content

Commit

Permalink
Add file:advise/4 - a wrapper to the POSIX syscall posix_fadvise
Browse files Browse the repository at this point in the history
Useful for informing the Operating System about the access pattern
for a file's data, so that it can adapt the caching strategy to
maximize disk IO performance.
  • Loading branch information
fdmanana authored and RaimoNiskanen committed May 20, 2010
1 parent 3f53a96 commit 21a67b7
Show file tree
Hide file tree
Showing 13 changed files with 327 additions and 5 deletions.
1 change: 1 addition & 0 deletions erts/configure.in
Expand Up @@ -1054,6 +1054,7 @@ fi


AC_SUBST(ERTS_BUILD_SMP_EMU) AC_SUBST(ERTS_BUILD_SMP_EMU)


AC_CHECK_FUNCS([posix_fadvise])




# #
Expand Down
34 changes: 34 additions & 0 deletions erts/emulator/drivers/common/efile_drv.c
Expand Up @@ -54,6 +54,7 @@
#define FILE_ALTNAME 28 #define FILE_ALTNAME 28
#define FILE_READ_LINE 29 #define FILE_READ_LINE 29
#define FILE_FDATASYNC 30 #define FILE_FDATASYNC 30
#define FILE_FADVISE 31


/* Return codes */ /* Return codes */


Expand Down Expand Up @@ -358,6 +359,11 @@ struct t_data
struct t_readdir_buf *first_buf; struct t_readdir_buf *first_buf;
struct t_readdir_buf *last_buf; struct t_readdir_buf *last_buf;
} read_dir; } read_dir;
struct {
Sint64 offset;
Sint64 length;
int advise;
} fadvise;
} c; } c;
char b[1]; char b[1];
}; };
Expand Down Expand Up @@ -1647,6 +1653,18 @@ static void invoke_open(void *data)
d->result_ok = status; d->result_ok = status;
} }


static void invoke_fadvise(void *data)
{
struct t_data *d = (struct t_data *) data;
int fd = (int) d->fd;
off_t offset = (off_t) d->c.fadvise.offset;
off_t length = (off_t) d->c.fadvise.length;
int advise = (int) d->c.fadvise.advise;

d->again = 0;
d->result_ok = efile_fadvise(&d->errInfo, fd, offset, length, advise);
}

static void free_readdir(void *data) static void free_readdir(void *data)
{ {
struct t_data *d = (struct t_data *) data; struct t_data *d = (struct t_data *) data;
Expand Down Expand Up @@ -1936,6 +1954,7 @@ file_async_ready(ErlDrvData e, ErlDrvThreadData data)
case FILE_SYMLINK: case FILE_SYMLINK:
case FILE_RENAME: case FILE_RENAME:
case FILE_WRITE_INFO: case FILE_WRITE_INFO:
case FILE_FADVISE:
reply(desc, d->result_ok, &d->errInfo); reply(desc, d->result_ok, &d->errInfo);
free_data(data); free_data(data);
break; break;
Expand Down Expand Up @@ -2355,6 +2374,21 @@ file_output(ErlDrvData e, char* buf, int count)
goto done; goto done;
} }


case FILE_FADVISE:
{
d = EF_SAFE_ALLOC(sizeof(struct t_data));

d->fd = fd;
d->command = command;
d->invoke = invoke_fadvise;
d->free = free_data;
d->level = 2;
d->c.fadvise.offset = get_int64((uchar*) buf);
d->c.fadvise.length = get_int64(((uchar*) buf) + sizeof(Sint64));
d->c.fadvise.advise = get_int32(((uchar*) buf) + 2 * sizeof(Sint64));
goto done;
}

} }


/* /*
Expand Down
2 changes: 2 additions & 0 deletions erts/emulator/drivers/common/erl_efile.h
Expand Up @@ -151,3 +151,5 @@ int efile_altname(Efile_error* errInfo, char *name,
int efile_link(Efile_error* errInfo, char* old, char* new); int efile_link(Efile_error* errInfo, char* old, char* new);
int efile_symlink(Efile_error* errInfo, char* old, char* new); int efile_symlink(Efile_error* errInfo, char* old, char* new);
int efile_may_openfile(Efile_error* errInfo, char *name); int efile_may_openfile(Efile_error* errInfo, char *name);
int efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset, Sint64 length,
int advise);
9 changes: 9 additions & 0 deletions erts/emulator/drivers/common/ram_file_drv.c
Expand Up @@ -46,6 +46,8 @@
#define RAM_FILE_UUENCODE 35 /* uuencode file */ #define RAM_FILE_UUENCODE 35 /* uuencode file */
#define RAM_FILE_UUDECODE 36 /* uudecode file */ #define RAM_FILE_UUDECODE 36 /* uudecode file */
#define RAM_FILE_SIZE 37 /* get file size */ #define RAM_FILE_SIZE 37 /* get file size */
#define RAM_FILE_ADVISE 38 /* predeclare the access
* pattern for file data */
/* possible new operations include: /* possible new operations include:
DES_ENCRYPT DES_ENCRYPT
DES_DECRYPT DES_DECRYPT
Expand Down Expand Up @@ -693,6 +695,13 @@ static void rfile_command(ErlDrvData e, char* buf, int count)
case RAM_FILE_UUDECODE: /* uudecode file */ case RAM_FILE_UUDECODE: /* uudecode file */
ram_file_uudecode(f); ram_file_uudecode(f);
break; break;

case RAM_FILE_ADVISE:
if (f->flags == 0)
error_reply(f, EBADF);
else
reply(f, 1, 0);
break;
} }
/* /*
* Ignore anything else -- let the caller hang. * Ignore anything else -- let the caller hang.
Expand Down
11 changes: 11 additions & 0 deletions erts/emulator/drivers/unix/unix_efile.c
Expand Up @@ -1448,3 +1448,14 @@ efile_symlink(Efile_error* errInfo, char* old, char* new)
return check_error(symlink(old, new), errInfo); return check_error(symlink(old, new), errInfo);
#endif #endif
} }

int
efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset,
Sint64 length, int advise)
{
#ifdef HAVE_POSIX_FADVISE
return check_error(posix_fadvise(fd, offset, length, advise), errInfo);
#else
return check_error(0, errInfo);
#endif
}
9 changes: 9 additions & 0 deletions erts/emulator/drivers/win32/win_efile.c
Expand Up @@ -1433,3 +1433,12 @@ efile_symlink(Efile_error* errInfo, char* old, char* new)
errno = ENOTSUP; errno = ENOTSUP;
return check_error(-1, errInfo); return check_error(-1, errInfo);
} }

int
efile_fadvise(Efile_error* errInfo, int fd, Sint64 offset,
Sint64 length, int advise)
{
/* posix_fadvise is not available on Windows, do nothing */
errno = ERROR_SUCCESS;
return check_error(0, errInfo);
}
38 changes: 37 additions & 1 deletion erts/preloaded/src/prim_file.erl
Expand Up @@ -25,7 +25,7 @@
%%% Interface towards a single file's contents. Uses ?FD_DRV. %%% Interface towards a single file's contents. Uses ?FD_DRV.


%% Generic file contents operations %% Generic file contents operations
-export([open/2, close/1, datasync/1, sync/1, position/2, truncate/1, -export([open/2, close/1, datasync/1, sync/1, advise/4, position/2, truncate/1,
write/2, pwrite/2, pwrite/3, read/2, read_line/1, pread/2, pread/3, copy/3]). write/2, pwrite/2, pwrite/3, read/2, read_line/1, pread/2, pread/3, copy/3]).


%% Specialized file operations %% Specialized file operations
Expand Down Expand Up @@ -97,6 +97,7 @@
-define(FILE_ALTNAME, 28). -define(FILE_ALTNAME, 28).
-define(FILE_READ_LINE, 29). -define(FILE_READ_LINE, 29).
-define(FILE_FDATASYNC, 30). -define(FILE_FDATASYNC, 30).
-define(FILE_ADVISE, 31).


%% Driver responses %% Driver responses
-define(FILE_RESP_OK, 0). -define(FILE_RESP_OK, 0).
Expand Down Expand Up @@ -131,6 +132,13 @@
%% IPREAD variants %% IPREAD variants
-define(IPREAD_S32BU_P32BU, 0). -define(IPREAD_S32BU_P32BU, 0).


%% POSIX file advises
-define(POSIX_FADV_NORMAL, 0).
-define(POSIX_FADV_RANDOM, 1).
-define(POSIX_FADV_SEQUENTIAL, 2).
-define(POSIX_FADV_WILLNEED, 3).
-define(POSIX_FADV_DONTNEED, 4).
-define(POSIX_FADV_NOREUSE, 5).




%%%----------------------------------------------------------------- %%%-----------------------------------------------------------------
Expand Down Expand Up @@ -221,7 +229,35 @@ close(#file_descriptor{module = ?MODULE, data = {Port, _}}) ->
close(Port) when is_port(Port) -> close(Port) when is_port(Port) ->
drv_close(Port). drv_close(Port).


-define(ADVISE(Offs, Len, Adv),
<<?FILE_ADVISE, Offs:64/signed, Len:64/signed,
Adv:32/signed>>).


%% Returns {error, Reason} | ok.
advise(#file_descriptor{module = ?MODULE, data = {Port, _}},
Offset, Length, Advise) ->
case Advise of
normal ->
Cmd = ?ADVISE(Offset, Length, ?POSIX_FADV_NORMAL),
drv_command(Port, Cmd);
random ->
Cmd = ?ADVISE(Offset, Length, ?POSIX_FADV_RANDOM),
drv_command(Port, Cmd);
sequential ->
Cmd = ?ADVISE(Offset, Length, ?POSIX_FADV_SEQUENTIAL),
drv_command(Port, Cmd);
will_need ->
Cmd = ?ADVISE(Offset, Length, ?POSIX_FADV_WILLNEED),
drv_command(Port, Cmd);
dont_need ->
Cmd = ?ADVISE(Offset, Length, ?POSIX_FADV_DONTNEED),
drv_command(Port, Cmd);
no_reuse ->
Cmd = ?ADVISE(Offset, Length, ?POSIX_FADV_NOREUSE),
drv_command(Port, Cmd);
_ ->
{error, einval}
end.


%% Returns {error, Reason} | ok. %% Returns {error, Reason} | ok.
write(#file_descriptor{module = ?MODULE, data = {Port, _}}, Bytes) -> write(#file_descriptor{module = ?MODULE, data = {Port, _}}, Bytes) ->
Expand Down
19 changes: 19 additions & 0 deletions lib/kernel/doc/src/file.xml
Expand Up @@ -61,6 +61,25 @@ time() = {{Year, Month, Day}, {Hour, Minute, Second}}
Must denote a valid date and time</code> Must denote a valid date and time</code>
</section> </section>
<funcs> <funcs>
<func>
<name>advise(IoDevice, Offset, Length, Advise) -> ok | {error, Reason}</name>
<fsummary>Predeclare an access pattern for file data</fsummary>
<type>
<v>IoDevice = io_device()</v>
<v>Offset = int()</v>
<v>Length = int()</v>
<v>Advise = posix_file_advise()</v>
<v>posix_file_advise() = normal | sequential | random | no_reuse
| will_need | dont_need</v>
<v>Reason = ext_posix()</v>
</type>
<desc>
<p><c>advise/4</c> can be used to announce an intention to access file
data in a specific pattern in the future, thus allowing the
operating system to perform appropriate optimizations.</p>
<p>On some platforms, this function might have no effect.</p>
</desc>
</func>
<func> <func>
<name>change_group(Filename, Gid) -> ok | {error, Reason}</name> <name>change_group(Filename, Gid) -> ok | {error, Reason}</name>
<fsummary>Change group of a file</fsummary> <fsummary>Change group of a file</fsummary>
Expand Down
16 changes: 15 additions & 1 deletion lib/kernel/src/file.erl
Expand Up @@ -36,7 +36,7 @@
%% Specialized %% Specialized
-export([ipread_s32bu_p32bu/3]). -export([ipread_s32bu_p32bu/3]).
%% Generic file contents. %% Generic file contents.
-export([open/2, close/1, -export([open/2, close/1, advise/4,
read/2, write/2, read/2, write/2,
pread/2, pread/3, pwrite/2, pwrite/3, pread/2, pread/3, pwrite/2, pwrite/3,
read_line/1, read_line/1,
Expand Down Expand Up @@ -89,6 +89,8 @@
-type date() :: {pos_integer(), pos_integer(), pos_integer()}. -type date() :: {pos_integer(), pos_integer(), pos_integer()}.
-type time() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}. -type time() :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}.
-type date_time() :: {date(), time()}. -type date_time() :: {date(), time()}.
-type posix_file_advise() :: 'normal' | 'sequential' | 'random' | 'no_reuse' |
'will_need' | 'dont_need'.


%%%----------------------------------------------------------------- %%%-----------------------------------------------------------------
%%% General functions %%% General functions
Expand Down Expand Up @@ -352,6 +354,18 @@ close(#file_descriptor{module = Module} = Handle) ->
close(_) -> close(_) ->
{error, badarg}. {error, badarg}.


-spec advise(File :: io_device(), Offset :: integer(),
Length :: integer(), Advise :: posix_file_advise()) ->
'ok' | {'error', posix()}.

advise(File, Offset, Length, Advise) when is_pid(File) ->
R = file_request(File, {advise, Offset, Length, Advise}),
wait_file_reply(File, R);
advise(#file_descriptor{module = Module} = Handle, Offset, Length, Advise) ->
Module:advise(Handle, Offset, Length, Advise);
advise(_, _, _, _) ->
{error, badarg}.

-spec read(File :: io_device(), Size :: non_neg_integer()) -> -spec read(File :: io_device(), Size :: non_neg_integer()) ->
'eof' | {'ok', [char()] | binary()} | {'error', posix()}. 'eof' | {'ok', [char()] | binary()} | {'error', posix()}.


Expand Down
8 changes: 8 additions & 0 deletions lib/kernel/src/file_io_server.erl
Expand Up @@ -198,6 +198,14 @@ io_reply(From, ReplyAs, Reply) ->
%%%----------------------------------------------------------------- %%%-----------------------------------------------------------------
%%% file requests %%% file requests


file_request({advise,Offset,Length,Advise},
#state{handle=Handle}=State) ->
case ?PRIM_FILE:advise(Handle, Offset, Length, Advise) of
{error,_}=Reply ->
{stop,normal,Reply,State};
Reply ->
{reply,Reply,State}
end;
file_request({pread,At,Sz}, file_request({pread,At,Sz},
#state{handle=Handle,buf=Buf,read_mode=ReadMode}=State) -> #state{handle=Handle,buf=Buf,read_mode=ReadMode}=State) ->
case position(Handle, At, Buf) of case position(Handle, At, Buf) of
Expand Down
33 changes: 32 additions & 1 deletion lib/kernel/src/ram_file.erl
Expand Up @@ -28,7 +28,7 @@


%% Specialized file operations %% Specialized file operations
-export([get_size/1, get_file/1, set_file/2, get_file_close/1]). -export([get_size/1, get_file/1, set_file/2, get_file_close/1]).
-export([compress/1, uncompress/1, uuencode/1, uudecode/1]). -export([compress/1, uncompress/1, uuencode/1, uudecode/1, advise/4]).


-export([open_mode/1]). %% used by ftp-file -export([open_mode/1]). %% used by ftp-file


Expand Down Expand Up @@ -71,6 +71,7 @@
-define(RAM_FILE_UUENCODE, 35). -define(RAM_FILE_UUENCODE, 35).
-define(RAM_FILE_UUDECODE, 36). -define(RAM_FILE_UUDECODE, 36).
-define(RAM_FILE_SIZE, 37). -define(RAM_FILE_SIZE, 37).
-define(RAM_FILE_ADVISE, 38).


%% Open modes for RAM_FILE_OPEN %% Open modes for RAM_FILE_OPEN
-define(RAM_FILE_MODE_READ, 1). -define(RAM_FILE_MODE_READ, 1).
Expand All @@ -91,6 +92,14 @@
-define(RAM_FILE_RESP_NUMBER, 3). -define(RAM_FILE_RESP_NUMBER, 3).
-define(RAM_FILE_RESP_INFO, 4). -define(RAM_FILE_RESP_INFO, 4).


%% POSIX file advises
-define(POSIX_FADV_NORMAL, 0).
-define(POSIX_FADV_RANDOM, 1).
-define(POSIX_FADV_SEQUENTIAL, 2).
-define(POSIX_FADV_WILLNEED, 3).
-define(POSIX_FADV_DONTNEED, 4).
-define(POSIX_FADV_NOREUSE, 5).

%% -------------------------------------------------------------------------- %% --------------------------------------------------------------------------
%% Generic file contents operations. %% Generic file contents operations.
%% %%
Expand Down Expand Up @@ -352,6 +361,28 @@ uudecode(#file_descriptor{module = ?MODULE, data = Port}) ->
uudecode(#file_descriptor{}) -> uudecode(#file_descriptor{}) ->
{error, enotsup}. {error, enotsup}.


advise(#file_descriptor{module = ?MODULE, data = Port}, Offset,
Length, Advise) ->
Cmd0 = <<?RAM_FILE_ADVISE, Offset:64/signed, Length:64/signed>>,
case Advise of
normal ->
call_port(Port, <<Cmd0/binary, ?POSIX_FADV_NORMAL:32/signed>>);
random ->
call_port(Port, <<Cmd0/binary, ?POSIX_FADV_RANDOM:32/signed>>);
sequential ->
call_port(Port, <<Cmd0/binary, ?POSIX_FADV_SEQUENTIAL:32/signed>>);
will_need ->
call_port(Port, <<Cmd0/binary, ?POSIX_FADV_WILLNEED:32/signed>>);
dont_need ->
call_port(Port, <<Cmd0/binary, ?POSIX_FADV_DONTNEED:32/signed>>);
no_reuse ->
call_port(Port, <<Cmd0/binary, ?POSIX_FADV_NOREUSE:32/signed>>);
_ ->
{error, einval}
end;
advise(#file_descriptor{}, _Offset, _Length, _Advise) ->
{error, enotsup}.





%%%----------------------------------------------------------------- %%%-----------------------------------------------------------------
Expand Down

0 comments on commit 21a67b7

Please sign in to comment.