diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..ababacb --- /dev/null +++ b/AUTHORS @@ -0,0 +1 @@ +FURUHASHI Sadayuki diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..5f100cd --- /dev/null +++ b/COPYING @@ -0,0 +1,14 @@ +Copyright (C) 2008 FURUHASHI Sadayuki + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..e69de29 diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..d3c5b40 --- /dev/null +++ b/INSTALL @@ -0,0 +1,237 @@ +Installation Instructions +************************* + +Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, +2006, 2007 Free Software Foundation, Inc. + +This file is free documentation; the Free Software Foundation gives +unlimited permission to copy, distribute and modify it. + +Basic Installation +================== + +Briefly, the shell commands `./configure; make; make install' should +configure, build, and install this package. The following +more-detailed instructions are generic; see the `README' file for +instructions specific to this package. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, and a +file `config.log' containing compiler output (useful mainly for +debugging `configure'). + + It can also use an optional file (typically called `config.cache' +and enabled with `--cache-file=config.cache' or simply `-C') that saves +the results of its tests to speed up reconfiguring. Caching is +disabled by default to prevent problems with accidental use of stale +cache files. + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If you are using the cache, and at +some point `config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file `configure.ac' (or `configure.in') is used to create +`configure' by a program called `autoconf'. You need `configure.ac' if +you want to change it or regenerate `configure' using a newer version +of `autoconf'. + +The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. + + Running `configure' might take a while. While running, it prints + some messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package. + + 4. Type `make install' to install the programs and any data files and + documentation. + + 5. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + + 6. Often, you can also type `make uninstall' to remove the installed + files again. + +Compilers and Options +===================== + +Some systems require unusual options for compilation or linking that the +`configure' script does not know about. Run `./configure --help' for +details on some of the pertinent environment variables. + + You can give `configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here +is an example: + + ./configure CC=c99 CFLAGS=-g LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + +You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you can use GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. + + With a non-GNU `make', it is safer to compile the package for one +architecture at a time in the source code directory. After you have +installed the package for one architecture, use `make distclean' before +reconfiguring for another architecture. + +Installation Names +================== + +By default, `make install' installs the package's commands under +`/usr/local/bin', include files under `/usr/local/include', etc. You +can specify an installation prefix other than `/usr/local' by giving +`configure' the option `--prefix=PREFIX'. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +pass the option `--exec-prefix=PREFIX' to `configure', the package uses +PREFIX as the prefix for installing programs and libraries. +Documentation and other data files still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=DIR' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + +Optional Features +================= + +Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + +Specifying the System Type +========================== + +There may be some features `configure' cannot figure out automatically, +but needs to determine by the type of machine the package will run on. +Usually, assuming the package is built to be run on the _same_ +architectures, `configure' can figure that out, but if it prints a +message saying it cannot guess the machine type, give it the +`--build=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS KERNEL-OS + + See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the option `--target=TYPE' to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with `--host=TYPE'. + +Sharing Defaults +================ + +If you want to set default values for `configure' scripts to share, you +can create a site shell script called `config.site' that gives default +values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Defining Variables +================== + +Variables not defined in a site shell script can be set in the +environment passed to `configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the `configure' command line, using `VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +causes the specified `gcc' to be used as the C compiler (unless it is +overridden in the site shell script). + +Unfortunately, this technique does not work for `CONFIG_SHELL' due to +an Autoconf bug. Until the bug is fixed you can use this workaround: + + CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash + +`configure' Invocation +====================== + +`configure' recognizes the following options to control how it operates. + +`--help' +`-h' + Print a summary of the options to `configure', and exit. + +`--version' +`-V' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally `config.cache'. FILE defaults to `/dev/null' to + disable caching. + +`--config-cache' +`-C' + Alias for `--cache-file=config.cache'. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`configure' also accepts some other, not widely useful, options. Run +`configure --help' for more details. + diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..12e59b6 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,25 @@ +export ERB +export RUBY +export RAGEL +SUBDIRS = src + +DOC_WFDOC = \ + doc/memo.txt \ + doc/kumoctl.1.txt \ + doc/kumolog.1.txt \ + doc/kumostat.1.txt + +DOC_FILES = \ + doc/memo.pdf \ + doc/memo.html \ + doc/kumoctl.1 \ + doc/kumolog.1 \ + doc/kumostat.1 + +EXTRA_DIST = $(DOC_WFDOC) $(DOC_FILES) + +man_MANS = \ + doc/kumoctl.1 \ + doc/kumolog.1 \ + doc/kumostat.1 + diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..e69de29 diff --git a/README b/README new file mode 100644 index 0000000..5f100cd --- /dev/null +++ b/README @@ -0,0 +1,14 @@ +Copyright (C) 2008 FURUHASHI Sadayuki + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/bootstrap b/bootstrap new file mode 100755 index 0000000..853ed96 --- /dev/null +++ b/bootstrap @@ -0,0 +1,113 @@ +#!/bin/sh +# vim:ts=4:sw=4 +# Calls autotools to build configure script and Makefile.in. +# Generated automatically using bootstrapper 0.2.1 +# http://bootstrapper.sourceforge.net/ +# +# Copyright (C) 2002 Anthony Ventimiglia +# +# This bootstrap script is free software; you can redistribute +# it and/or modify it under the terms of the GNU General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# +# Calls proper programs to create configure script and Makefile.in files. +# if run with the --clean option, bootstrap removes files it generates. To +# clean all autogenerated files (eg: for cvs imports) first run +# make distclean, then bootstrap --clean +# see bootstrapper(1) for more infor + + +if test x"$1" = x"--help"; then + echo "$0: automatic bootstrapping utility for GNU Autotools" + echo " cleans up old autogenerated files and runs autoconf," + echo " automake and aclocal on local directory" + echo + echo " --clean clean up auto-generated files without" + echo " creating new scripts" + echo + exit 0 +fi + + +ACLOCAL="aclocal" +ACLOCAL_FILES="aclocal.m4" +ALWAYS_CLEAN="config.status config.log config.cache libtool" +AUTOCONF="autoconf" +AUTOCONF_FILES="configure" +AUTOHEADER="autoheader" +AUTOHEADER_FILES="" +AUTOMAKE="automake --add-missing --copy" +AUTOMAKE_FILES="config.sub stamp-h.in ltmain.sh missing mkinstalldirs install-sh config.guess" +CONFIG_AUX_DIR="." +CONFIG_FILES="stamp-h ltconfig" +CONFIG_HEADER="" +if [ x`uname` = x"Darwin" ]; then + LIBTOOLIZE="glibtoolize --force --copy" +else + LIBTOOLIZE="libtoolize --force --copy" +fi +LIBTOOLIZE_FILES="config.sub ltmain.sh config.guess" +RM="rm" +SUBDIRS="[]" + + +# These are files created by configure, so we'll always clean them +for i in $ALWAYS_CLEAN; do + test -f $i && \ + $RM $i +done + +if test x"$1" = x"--clean"; then + # + #Clean Files left by previous bootstrap run + # + if test -n "$CONFIG_AUX_DIR"; + then CONFIG_AUX_DIR="$CONFIG_AUX_DIR/" + fi + # Clean Libtoolize generated files + for cf in $LIBTOOLIZE_FILES; do + cf="$CONFIG_AUX_DIR$cf" + test -f $cf && \ + $RM $cf + done + #aclocal.m4 created by aclocal + test -f $ACLOCAL_FILES && $RM $ACLOCAL_FILES + #Clean Autoheader Generated files + for cf in $AUTOHEADER_FILES; do + cf=$CONFIG_AUX_DIR$cf + test -f $cf && \ + $RM $cf + done + # remove config header (Usaually config.h) + test -n "$CONFIG_HEADER" && test -f $CONFIG_HEADER && $RM $CONFIG_HEADER + #Clean Automake generated files + for cf in $AUTOMAKE_FILES; do + cf=$CONFIG_AUX_DIR$cf + test -f $cf && \ + $RM $cf + done + for i in $SUBDIRS; do + test -f $i/Makefile.in && \ + $RM $i/Makefile.in + done + #Autoconf generated files + for cf in $AUTOCONF_FILES; do + test -f $cf && \ + $RM $cf + done + for cf in $CONFIG_FILES; do + cf="$CONFIG_AUX_DIR$cf" + test -f $cf && \ + $RM $cf + done +else + $LIBTOOLIZE + $ACLOCAL + $AUTOHEADER + $AUTOMAKE + $AUTOCONF +fi + + diff --git a/configure.in b/configure.in new file mode 100644 index 0000000..57046db --- /dev/null +++ b/configure.in @@ -0,0 +1,172 @@ +AC_INIT(src/logic/boot.h) +AC_CONFIG_AUX_DIR(ac) +AM_INIT_AUTOMAKE(kumofs, 0.2.0) +AC_CONFIG_HEADER(config.h) + + +AC_SUBST(CFLAGS) +if test "" = "$CFLAGS"; then + CFLAGS="-O4" +fi +CFLAGS="-O4 -Wall $CFLAGS" + + +AC_SUBST(CXXFLAGS) +if test "" = "$CXXFLAGS"; then + CXXFLAGS="-O4" +fi +CXXFLAGS="-O4 -Wall $CXXFLAGS" + + +AC_CHECK_PROG(RUBY, ruby, ruby) +if test "x$RUBY" = x; then + AC_MSG_ERROR([cannot find ruby. Ruby is needed to build.]) +fi + +AC_CHECK_PROG(ERB, erb, erb) +if test "x$ERB" = x; then + AC_MSG_ERROR([cannot find erb. Ruby is needed to build.]) +fi + +AC_CHECK_PROG(RAGEL, ragel, ragel) +if test "x$RAGEL" = x; then + AC_MSG_ERROR([cannot find ragel. Ragel is needed to build.]) +fi + + +AC_PROG_CC +AC_PROG_CXX + +AC_PROG_LIBTOOL +AM_PROG_AS +AM_PROG_CC_C_O + + +AC_ARG_WITH([msgpack], + AS_HELP_STRING([--with-msgpack=DIR], + [specify the root directory for msgpack library]), + [msgpack_path="$withval"], []) +if test "$msgpack_path" != ""; then + CXXFLAGS="$CXXFLAGS -I$msgpack_path/include" + CFLAGS="$CFLAGS -I$msgpack_path/include" + LDFLAGS="$LDFLAGS -L$msgpack_path/lib" +fi + + +storage_type="tokyocabinet" + + +AC_ARG_WITH([tokyocabinet], + AS_HELP_STRING([--with-tokyocabinet@<:@=DIR@:>@], + [use Tokyo Cabinet for the backend storage]), + [tokyocabinet_path="$withval"], []) +if test "$tokyocabinet_path" != ""; then + storage_type="tokyocabinet" + CXXFLAGS="$CXXFLAGS -I$tokyocabinet_path/include" + CFLAGS="$CFLAGS -I$tokyocabinet_path/include" + LDFLAGS="$LDFLAGS -L$tokyocabinet_path/lib" +fi + + +#AC_ARG_WITH([luxio], +# AS_HELP_STRING([--with-luxio@<:@=DIR@:>@], +# [use LuxIO for the backend storage (NOT IMPLEMENTED)]), +# [luxio_path="$withval"], []) +#if test "$luxio_path" != ""; then +# storage_type="luxio" +# CXXFLAGS="$CXXFLAGS -I$luxio_path/include" +# CFLAGS="$CFLAGS -I$luxio_path/include" +# LDFLAGS="$LDFLAGS -L$luxio_path/lib" +#fi + + +AC_MSG_CHECKING([if tcmalloc is enabled]) +AC_ARG_WITH([tcmalloc], + AS_HELP_STRING([--with-tcmalloc@<:@=DIR@:>@], + [link libtcmalloc]), + [with_tcmalloc="$withval"], [with_tcmalloc="no"]) +AC_MSG_RESULT($with_tcmalloc) +if test "$with_tcmalloc" != "no"; then + if test "$with_tcmalloc" != "yes"; then + LDFLAGS="$LDFLAGS -L$with_tcmalloc/lib" + LDFLAGS="$LDFLAGS -L$with_tcmalloc/lib64" + fi + AC_CHECK_LIB(tcmalloc,malloc,, + AC_MSG_ERROR([Can't find tcmalloc library])) +fi + + +AC_CHECK_LIB(stdc++, main) + +AC_CHECK_LIB(pthread,pthread_create,, + AC_MSG_ERROR([Can't find pthread library])) + +AC_CHECK_LIB(z,deflate,, + AC_MSG_ERROR([Can't find zlib library])) + +AC_CHECK_HEADERS(openssl/sha.h,, + AC_MSG_ERROR([Can't find openssl header])) +AC_CHECK_LIB(crypto,SHA1,, + AC_MSG_ERROR([Can't find openssl library])) + +AC_CHECK_LIB(msgpack,main,, + AC_MSG_ERROR([Can't find msgpack library])) + +AC_MSG_CHECKING([storage backend]) +AC_MSG_RESULT($storage_type) +AM_CONDITIONAL(USE_TOKYOCABINET, test "$storage_type" = "tokyocabinet") +if test "$storage_type" = "tokyocabinet"; then + AC_CHECK_HEADERS(tchdb.h,, + AC_MSG_ERROR([Can't find tokyo cabinet header])) + AC_CHECK_LIB(tokyocabinet,tchdbget,, + AC_MSG_ERROR([Can't find tokyocabinet library])) + CXXFLAGS="$CXXFLAGS -DUSE_TOKYOCABINET" + CFLAGS="$CFLAGS -DUSE_TOKYOCABINET" +else + AC_LANG_PUSH(C++) + AC_CHECK_HEADERS(luxio/btree.h) # FIXME luxio UINT8_MAX + #AC_CHECK_HEADERS(luxio/btree.h,, + # AC_MSG_ERROR([Can't find luxio header])) + AC_LANG_POP +fi + + + +AC_MSG_CHECKING([if debug option is enabled]) +AC_ARG_ENABLE(debug, + AS_HELP_STRING([--disable-debug], + [disable assert macros and omit -g option.]) ) +if test "$enable_debug" != "no"; then + CXXFLAGS="$CXXFLAGS -g" + CFLAGS="$CFLAGS -g" +else + CXXFLAGS="$CXXFLAGS -DNDEBUG" + CFLAGS="$CFLAGS -DNDEBUG" +fi +AC_MSG_RESULT($enable_debug) + + +AC_MSG_CHECKING([if trace message is enabled]) +AC_ARG_ENABLE(trace, + AS_HELP_STRING([--enable-trace], [enable trace messages.]) ) +if test "$enable_trace" = "yes"; then + CXXFLAGS="$CXXFLAGS -DMLOGGER_LEVEL=0" + CFLAGS="$CFLAGS -DMLOGGER_LEVEL=0" +else + CXXFLAGS="$CXXFLAGS -DMLOGGER_LEVEL=2" + CFLAGS="$CFLAGS -DMLOGGER_LEVEL=2" +fi +AC_MSG_RESULT($enable_trace) + + +AC_OUTPUT([src/mp/Makefile + src/mpsrc/Makefile + src/log/Makefile + src/kazuhiki/Makefile + src/rpc/Makefile + src/logic/protogen/Makefile + src/logic/Makefile + src/command/Makefile + src/Makefile + Makefile]) + diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..f5d3542 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,41 @@ +#SRCS=$(wildcard *txt) +#default: $(SRCS:.txt=.pdf) $(SRCS:.txt=.html) +#clean: $(SRCS:.txt=.clean) +#distclean: $(SRCS:.txt=.distclean) + +default: memo.pdf memo.html kumoctl.1 kumostat.1 kumolog.1 +clean: memo.clean +distclean: memo.distclean + + +%.sdoc : %.txt + wfdoc -i $< -o $@ + +%.html : %.txt + wfdoc -f html -i $< -o $@ + +%.tex : %.sdoc + sdoc -toc -format:latex2e -latex2e.driver=dvipdfm -latex2e.ref=HyperRefLaTeX2eRefHandler $< + +%.etex : %.tex + wftex $< $@ + +%.toc : %.etex + platex $< + +%.dvi : %.etex %.toc + platex $< + +%.pdf : %.dvi + dvipdfmx $< + rm -f $*.aux $*.log + +%.clean : %.txt + rm -f $*.sdoc $*.tex $*.etex $*.dvi $*.toc $*.aux $*.out $*.log $*.pdf + +%.distclean : %.txt + rm -f $*.sdoc $*.tex $*.etex $*.dvi $*.toc $*.aux $*.out $*.log $*.pdf $*.mk + +%.1 : %.1.txt + wfdoc -f man $< -o $@ + diff --git a/doc/kumoctl.1 b/doc/kumoctl.1 new file mode 100644 index 0000000..1c33d61 --- /dev/null +++ b/doc/kumoctl.1 @@ -0,0 +1,34 @@ +.TH kumoctl +.SH NAME +kumoctl +.SH SYNOPSIS +kumoctl address[:port=19750] command [options] +.SS COMMANDS +.TP +.B status +get status +.TP +.B attach +attach all new servers and start replace +.TP +.B attach-noreplace +attach all new servers +.TP +.B detach +detach all fault servers and start replace +.TP +.B detach-noreplace +detach all fault servers +.TP +.B replace +start replace without attach/detach +.TP +.B backup [suffix=20090304] +create backup with specified suffix +.TP +.B enable-auto-replace +enable auto replace +.TP +.B disable-auto-replace +disable auto replace +.SH DESCRIPTION diff --git a/doc/kumoctl.1.txt b/doc/kumoctl.1.txt new file mode 100644 index 0000000..d3f656b --- /dev/null +++ b/doc/kumoctl.1.txt @@ -0,0 +1,20 @@ +*? kumoctl +*NAME +kumoctl + +*SYNOPSIS +kumoctl address[:port=19750] command [options] + +**COMMANDS +:status :get status +:attach :attach all new servers and start replace +:attach-noreplace :attach all new servers +:detach :detach all fault servers and start replace +:detach-noreplace :detach all fault servers +:replace :start replace without attach/detach +:backup [suffix=20090304] :create backup with specified suffix +:enable-auto-replace :enable auto replace +:disable-auto-replace :disable auto replace + +*DESCRIPTION + diff --git a/doc/kumolog.1 b/doc/kumolog.1 new file mode 100644 index 0000000..738063c --- /dev/null +++ b/doc/kumolog.1 @@ -0,0 +1,6 @@ +.TH kumolog +.SH NAME +kumolog +.SH SYNOPSIS +kumolog +.SH DESCRIPTION diff --git a/doc/kumolog.1.txt b/doc/kumolog.1.txt new file mode 100644 index 0000000..b918354 --- /dev/null +++ b/doc/kumolog.1.txt @@ -0,0 +1,9 @@ +*? kumolog +*NAME +kumolog + +*SYNOPSIS +kumolog + +*DESCRIPTION + diff --git a/doc/kumostat.1 b/doc/kumostat.1 new file mode 100644 index 0000000..4ed9856 --- /dev/null +++ b/doc/kumostat.1 @@ -0,0 +1,31 @@ +.TH kumostat +.SH NAME +kumostat +.SH SYNOPSIS +kumostat address[:port=19800] command [options] +.SS COMMANDS +.TP +.B pid +get pid of server process +.TP +.B uptime +get uptime +.TP +.B time +get UNIX time +.TP +.B version +get version +.TP +.B cmd_get +get number of get requests +.TP +.B cmd_set +get number of set requests +.TP +.B cmd_delete +get number of delete requests +.TP +.B items +get number of stored items +.SH DESCRIPTION diff --git a/doc/kumostat.1.txt b/doc/kumostat.1.txt new file mode 100644 index 0000000..dd28a9f --- /dev/null +++ b/doc/kumostat.1.txt @@ -0,0 +1,19 @@ +*? kumostat +*NAME +kumostat + +*SYNOPSIS +kumostat address[:port=19800] command [options] + +**COMMANDS +:pid :get pid of server process +:uptime :get uptime +:time :get UNIX time +:version :get version +:cmd_get :get number of get requests +:cmd_set :get number of set requests +:cmd_delete :get number of delete requests +:items :get number of stored items + +*DESCRIPTION + diff --git a/doc/memo.html b/doc/memo.html new file mode 100644 index 0000000..44235c9 --- /dev/null +++ b/doc/memo.html @@ -0,0 +1,469 @@ + + + + + + + + Kumofs memo -- Kumo Fast Storage rev.948 + + + +

概要

+

kumofsはkey-value型のデータを保存する分散ストレージ。key=>valueを保存するset、keyを取得するget、keyを削除するdeleteの3つの操作をサポートする。

+

データを保持するServer、Server群を管理するManager、アプリケーションからのリクエストをServerに中継するGatewayの3種類のノードでシステムを構成する。

+

データは複数のServerに分散して保存されるため、Serverを追加するほど性能が向上する。

+

データは3台のServerにコピーされて保存される。2台までならServerがダウンしても動作し続ける。

+

Server群はManagerによって死活監視されている。Serverがダウンしたら、そのServerは直ちにシステムから切り離される。ただし1台か2台のManagerが起動していないとServerの切り離しが行われないので、Managerが1台も起動していない状態でServerがダウンするとシステムが停止してしまう。

+

Serverを追加したり切り離したりした後、その状態をシステムに反映するには、レプリケーションされたデータの再配置を行う必要がある。これは自動では行われず、kumoctlコマンドを使って手動で行う。

+
+

Consistent Hashing

+

Consistent Hashingを使ってデータを分散して保存する + *1 +

+

Serverがダウンしたときは、そのServerの仮想ノードにfaultフラグがセットされる。set/get/deleteはfaultフラグがセットされたServerをスキップして行われる。つまり、通常動作時はレプリケーションは3つ作成されるが、1台がfault状態ならコピーは2つ、2台がfault状態ならコピーは1つしか作成されないkeyが存在することになる。fault状態のServerが3台以上になると、get/set/deleteが失敗し続けるkeyが存在することになる。

+

Serverがダウンしてもfaultフラグがセットされるだけで、レプリケーションの再配置は行われない。faultフラグがセットされたServerが存在する状態で、kumoctlコマンドを使ってdetachコマンドをManagerに送信すると、faultフラグがセットされたServerがハッシュ空間から取り除かれる。同時にレプリケーションの再配置が行われ、すべてのkeyに対してレプリケーションが3つ作成されるようにデータがコピーされる。

+

Serverが追加されてもすぐにはハッシュ空間には追加されず、レプリケーションの再配置は行われない。新たなServerが起動している状態で、kumoctlコマンドを使ってattachコマンドをManagerに送信すると、新しいServerがハッシュ空間に追加される。同時にレプリケーションの再配置が行われ、すべてのkeyに対してレプリケーションが3つだけ存在するようにデータが移動される。

+

# TODO: auto-replace

+

set/get/deleteの保証範囲

+

set(key, value)

+

key=>valueを保存する。保存できれば成功を返す。保存できなければエラーを返す。

+

既にkeyが保存されていたとき、setが成功した場合はkeyの値は確実に上書きされている。

+

setが失敗したとき、keyの値は不定になっている。これは失敗したときにロールバックを行わないため。ロールバックを一貫性を損なうことなく行うための高級なアルゴリズムは実装されていない/使うと性能が低下してしまう。

+

Serverはレプリケーション先の2台〜0台のすべてのServerにデータが受信されたことを確認してからGatewayにレスポンスを返す。どれか1台でもコピー処理が失敗したらエラーを返す。つまり、アプリケーションに成功が返されたときはfault状態でないすべてのServerにレプリケーションがコピーされており、それ以降に古いデータが読み出されることはない。ただしディスクに書き込まれているとは限らない。

+

get(key)

+

keyをsetするリクエストが成功していた場合は、そのkeyに対応するvalueを返す。setが失敗していた場合は、nullか、setに失敗したvalueが返る。それ以外であればnullを返す。

+

keyをsetするリクエストが成功してvalueが保存されていたとしても、レプリケーションされたすべてのServerの負荷が非常に高いために応答できない場合は、getがタイムアウトする可能性がある。

+

keyが保存されていなかった場合はエラーにならないが、タイムアウトした場合はエラーになる。

+

delete(key)

+

keyを削除する。

+

再配置処理を行っている間にdeleteを行うと、削除されないことがある。また同じkeyに対してdeleteとsetをほぼ同時に行うと、削除されないことがある。これはレプリケーションを行うServer同士のやりとりが、Gatewayが送出したdeleteリクエストと入れ違う可能性があるため。

+

Serverの引数を調整することで、deleteが一貫性を保たない確率を減らすことができる。

+
+

動作環境と制限

+

サーバーの時刻設定

+

ManagerとServerを動作させるホストの時刻設定は、TIME_ERROR_MARGIN秒(コンパイル時に決定。デフォルトでは5秒)以上ずれていると正常に動作しない。またUTCとlocaltimeはどちらかに揃える必要がある。

+

# TODO

+
+
+

+ *1ハッシュ関数はSHA-1で、下位64ビットのみ使う。仮想ノードは128台

+
+
+

インストールと実行

+

依存関係

+

動作環境

+
    +
  • linux >= 2.6.18
  • +
  • glibc >= XXX
  • +
+

コンパイル時に必要なもの

+
    +
  • g++ >= 4.1
  • +
  • ragel >= 6.3
  • +
  • git >= XXX
  • +
+

コンパイル時と実行時に必要なもの

+
    +
  • ruby >= 1.8
  • +
  • rubygems
  • +
  • libcrypto(openssl)
  • +
  • zlib >= XXX
  • +
  • Tokyo Cabinet >= 1.4.9
  • +
+

コンパイル

+

まず最新のMessagePackをインストールする。

+
$ git clone git://git.sourceforge.jp/gitroot/msgpack/msgpack.git
+$ cd msgpack
+$ ./bootstrap && ./configure && make
+$ sudo make install
+$ cd ruby
+$ ./gengem
+$ sudo gem install gem/pkg/msgpack-*.gem
+
+

MessagePackのインストール

+

次にkumofsをインストールする。

+
$ ./configure && make && make install
+
+

以下の4つのコマンドがインストールされる:

+
+
kumo-manager
+
Managerノード。Serverノードの管理をする。
+
kumo-server
+
Serverノード。実際にデータを保存する。
+
kumo-gateway
+
Gatewayノード。memcachedプロトコルのサーバーで、アプリケーションからの要求をServerノードに中継する。
+
kumoctl
+
Managerノードを制御するための管理コマンド
+
kumolog
+
バイナリフォーマットのログをテキストフォーマットに変換する
+
kumomergedb
+
コールドバックアップファイルをマージする
+
+

configureフラグ

+
+
--with-msgpack=DIR
+
MessagePackがインストールされているディレクトリを指定する
+
--with-tokyocabinet=DIR
+
Tokyo Cabinetがインストールされているディレクトリを指定する
+
--enable-trace
+
画面を埋め尽くすほど冗長なデバッグ用のメッセージを出力するようにする
+
--with-tcmalloc[=DIR]
+
tcmallocとリンクする
+
+

実行例

+

Manager 2台, Server 4台を使った冗長構成

+

+ s1s4の4台でクラスタを構成し、c1c2で動作するアプリケーションから利用する例。

+

s1〜s4でServerを起動し、s1s2では同時にManagerも起動する。c1c2ではGatewayを起動する。

+
[s1]$ kumo-manager -v -l s1 -p s2    # Manager同士は互いに指定する
+[s2]$ kumo-manager -v -l s2 -p s1    # Manager同士は互いに指定する
+[s1]$ kumo-server  -v -m s1 -p s2 -l s1 -s database.tch    # -mと-pでManagerを指定する
+[s2]$ kumo-server  -v -m s1 -p s2 -l s2 -s database.tch    # -lは常に自ホストのアドレス
+[s3]$ kumo-server  -v -m s1 -p s2 -l s3 -s database.tch    # -sはデータベース名
+[s4]$ kumo-server  -v -m s1 -p s2 -l s4 -s database.tch    # -vは冗長なメッセージを出力
+[c1]$ kumo-gateway -v -m s1 -p s2 -t 11211    # 11211/tcpでmemcachedテキストプロトコル
+[c2]$ kumo-gateway -v -m s1 -p s2 -t 11211    # を待ち受ける
+
+

localhostでクラスタを構成する

+

localhostでManagerノード1台、Server 2台を使ってクラスタを構成する例。

+
[localhost]$ kumo-manager -v -l localhost   # Managerを1台で運用するときは-pを省略
+                           # kumo-serverはポートを変えて起動する
+[localhost]$ kumo-server  -v -m localhost -l localhost:19801 -L 19901 -s database1.tch
+[localhost]$ kumo-server  -v -m localhost -l localhost:19802 -L 19902 -s database2.tch
+[localhost]$ kumo-gateway -v -m localhost -t 11211
+
+

主な引数

+

共通

+
+
-o <path.log>
+
ログを標準出力ではなく指定されたファイルに出力する
+
-g <path.mpac>
+
バイナリログを指定されたファイルに出力する
+
-d <path.pid>
+
デーモンになる。指定されたファイルにpidを書き出す
+
-v
+
WARNよりレベルの低いメッセージを出力する
+
-Ci <sec>
+
タイマークロックの間隔を秒で指定する。小数を指定できる
+
-Ys <sec>
+
connect(2)のタイムアウト時間を秒で指定する。小数を指定できる
+
-Yn <num>
+
connect(2)のリトライ回数を指定する
+
-TR <num>
+
送信用スレッドの数を指定する
+
-TW <num>
+
受信用スレッドの数を指定する
+
+

kumo-manager

+
+
-l <address>
+
待ち受けるアドレス。他のノードから見て接続できるホスト名とポート番号を指定する
+
-p <address>
+
もし存在するなら、もう一台のkumo-managerのホスト名とポート番号を指定する
+
-c <port>
+
kumoctlからのコマンドを受け付けるポート番号を指定する
+
--auto-replace
+
Serverが追加・切断されたときに、マニュアル操作を待たずにレプリケーションの再配置を自動的に行うようにする。実行中でもkumoctlコマンドを使って変更できる
+
+

kumo-server

+
+
-l <address>
+
待ち受けるアドレス。他のノードから見て接続できるホスト名とポート番号を指定する
+
-L <port>
+
kumo-serverが待ち受けるもう一つのポートのポート番号を指定する
+
-m <address>
+
kumo-managerのホスト名とポート番号を指定する
+
-p <address>
+
もし存在するなら、もう一台のkumo-managerのホスト名とポート番号を指定する
+
-s <path.tch>
+
データを保存するデータベースファイルのパスを指定する
+
-f <dir>
+
レプリケーションの再配置に使う一時ファイルを保存するディレクトリを指定する。データベースファイルのサイズに応じて十分な空き容量が必要
+
-gS <seconds>
+
deleteしたエントリのクロックを保持しておくメモリ使用量の上限をKB単位で指定する
+
-gN <seconds>
+
deleteしたエントリのクロックを保持しておく最小時間を指定する。メモリ使用量が上限に達していると、最大時間に満たなくても最小時間を過ぎていれば削除される。
+
-gX <seconds>
+
deleteしたエントリのクロックを保持しておく最大時間を指定する
+
+

kumo-gateway

+
+
-m <address>
+
kumo-managerのホスト名とポート番号を指定する
+
-p <address>
+
もし存在するなら、もう一台のkumo-managerのホスト名とポート番号を指定する
+
-t <port>
+
memcachedテキストプロトコルを待ち受けるポート番号を指定する
+
-G <number>
+
getの最大リトライ回数を指定する
+
-S <number>
+
setの最大リトライ回数を指定する
+
-D <number>
+
deleteの最大リトライ回数を指定する
+
-As
+
set操作でレプリケーションするとき、レプリケーション完了の応答を待たずに成功を返すようにする
+
-Ad
+
delete操作でレプリケーションするとき、レプリケーション完了の応答を待たずに成功を返すようにする
+
+
+

+ *1ハッシュ関数はSHA-1で、下位64ビットのみ使う。仮想ノードは128台

+
+
+

kumoctl

+

kumoctlコマンドを使うとManagerの状態を取得したり、コマンドを送ったりできる。

+

Rubyで書かれたスクリプト。実行するにはgemでmsgpackパッケージをインストールする。

+

第1引数にManagerのホスト名とポート番号を指定し、第2引数にコマンドを指定する。

+
$ kumoctl --help
+Usage: kumoctl address[:port=19799] command [options]
+command:
+   status                     get status
+   attach                     attach all new servers and start replace
+   attach-noreplace           attach all new servers
+   detach                     detach all fault servers and start replace
+   detach-noreplace           detach all fault servers
+   replace                    start replace without attach/detach
+   backup  [suffix=????????]  create backup with specified suffix
+   enable-auto-replace        enable auto replace
+   disable-auto-replace       disable auto replace
+
+

status

+

Managerが持っているハッシュ空間を取得して表示する。

+
$ kumoctl localhost status
+hash space timestamp:
+  Wed Dec 03 22:15:45 +0900 2008 clock 58
+attached node:
+  127.0.0.1:8000  (active)
+  127.0.0.1:8001  (fault)
+not attached node:
+  127.0.0.1:8002
+
+

statusの実行例

+

+ attached nodeはハッシュ空間に入っているServerの一覧を示している。(active)は正常動作中のServerで、(fault)はfaultフラグが立っているServerを示している。

+

+ not attached nodeはハッシュ空間に入っていないか、入っているが(fault)状態でまだ再attachされていないServerの一覧を示している。

+

レプリケーションの再配置を行ったとき、Managerが2台起動していれば2つのManager間で新しいハッシュ空間が同期される。ただし新しいハッシュ空間が空の時は同期されない。

+

この理由は、障害が発生していたManagerを復旧したときに空のハッシュ空間が同期されてしまう可能性があるため。起動した直後はクロック(後述)が調整されていないために、ハッシュ空間の新旧の比較が正常に機能しない。このため空のハッシュ空間を受け取ったときは無視するようになっている。 # FIXME この動作は正しい?もっと良い回避方法は無いか?

+
+

attach

+

statusでnot attached nodeに表示されているServerをハッシュ空間に組み入れ、レプリケーションの再配置を開始する。

+

attach-noreplace

+

attachと同じだがレプリケーションの再配置を開始しない。ただし再配置をしないまま長い間放置してはいけない。

+

再配置を行わないと、エラーが積もってGatewayから最新のハッシュ空間を要求されたとき(後述)、Gatewayが持っているハッシュ空間とServerが持っているハッシュ空間が食い違ってしまう。食い違うとsetやdeleteがいつまで経っても成功しなくなってしまう。

+

detach

+

statusでattached nodeに表示されていて(fault)状態のServerをハッシュ空間から取り除き、レプリケーションの再配置を開始する。

+

detach-noreplace

+

detachと同じだがレプリケーションの再配置を開始しない。再配置をしないまま長い間放置してはいけない。

+

replace

+

レプリケーションの再配置を開始する。

+

backup

+

コールドバックアップを作成する。バックアップはServerで作成され、元のデータベース名にsuffixを付けた名前のファイルにデータベースがコピーされる。手元にバックアップを持ってくるには、rsyncやscpなどを使ってServerからダウンロードする。

+

suffixは省略するとその日の日付(YYMMDD)が使われる。

+

作成されたバックアップファイルは、kumomergedbコマンドを使って1つのファイルに結合することができる。

+
$ kumomergedb backup.tch-20090101 \
+              server1.tch-20090101 server2.tch-20090101 server3.tch-20090101
+
+

kumomergedbコマンドの実行例

+
+

kumostat

+

kumostatコマンドを使うとServerの状態を取得することができる。

+

Rubyで書かれたスクリプト。実行するにはgemでmsgpackパッケージをインストールする。

+

第1引数にServerのホスト名とポート番号を指定し、第2引数にコマンドを指定する。

+
Usage: kumostat address[:port=19800] command [options]
+command:
+   pid                        get pid of server process
+   uptime                     get uptime
+   time                       get UNIX time
+   version                    get version
+   cmd_get                    get number of get requests
+   cmd_set                    get number of set requests
+   cmd_delete                 get number of delete requests
+   items                      get number of stored items
+
+

pid

+

kumo-serverプロセスのpidを取得する

+

uptime

+

kumo-serverプロセスの起動時間を取得する。単位は秒。

+

time

+

kumo-serverプロセスが走っているホストのUNIXタイムを取得する。

+

version

+

バージョンを取得する。

+

cmd_get / cmd_set / cmd_delete

+

それぞれGatewayからのGetリクエスト、Setリクエスト、Deleteリクエストを処理した回数を取得する。

+

items

+

データベースに入っているエントリの数を取得する。

+
+
+

ログ

+

kumo-manager, kumo-server, kumo-gatewayは、それぞれ2種類のログを出力する:

+
+
テキストログ
+
行区切りのテキストフォーマットのログ。通常標準出力に出力される
+
バイナリログ
+
MessagePackでシリアライズされたログ
+
+

テキストログは常に出力される。-vオプションを付けると冗長なログも出力されるようになる。テキストログはファイルに書き出すこともできるが、ログローテーションはサポートしていない。-d <path.pid>オプションを指定してデーモンとして起動するか、-o -オプションを指定すると、ログに色が付かなくなる。

+

バイナリログは-g <path.mpac>オプションを付けたときだけ出力される。-vオプションは影響しない。バイナリログはSIGHUPシグナルを受け取るとログファイルを開き直すため、logrotateなどを使ってログローテーションができる。

+

バイナリログはkumologコマンドを使ってテキストに変換して読むことができる。

+
$ kumolog manager.mpac
+
+

kumologコマンドの実行例

+
+

チューニング

+

データベースのチューニング

+

Tokyo Cabinetのチューニングによって性能が大きく変わる。kumo-serverを起動する前にあらかじめtchmgrコマンドでデータベースファイルを作成しておく。

+

チューニングのパラメータはTokyo Cabinetのドキュメント参照。http://tokyocabinet.sourceforge.net/spex-ja.html +

+
$ tchmgr create /path/to/database.tch 1048568  # バケット数を2097136個にして作成
+$ kumo-server -m localhost -s /path/to/database.tch
+
+

タイムアウト時間とkeepalive間隔の調整

+

# TODO

+
+

死活監視と再配置

+

障害の検出

+

ManagerとServerの接続では、あるノードにリクエストまたはレスポンスを送信しようとしたときに、そのノードとのコネクションが一本も存在せず、さらにconnect(2)が4回 + *2 + 連続して失敗したら、そのノードはダウンしたと見なす。

+

ManagerとServerは2秒間隔 + *3 + でkeepaliveメッセージをやりとりしているので、いつも何らかのリクエストかレスポンスを送ろうとしている状態になっている。

+

connect(2)は次の条件で失敗する:

+
    +
  • 接続相手から明示的に接続を拒否された(Connection Refused)
  • +
  • 接続相手からの応答がない時間が3ステップ + *4 + 続いた。1ステップは0.5秒 + *5 + +
  • +
+
+

接続の検出

+

ManagerとServerの接続では、あるノードから接続を受け付けた後、そのノードから初期ネゴシエーションメッセージを受け取り、かつそのメッセージのフォーマットが正しければ、そのノードが新たに起動したと見なす。

+
+

ハッシュ空間の更新

+

Consistent Hashingのハッシュ空間を更新できるのはManagerだけで、最新のハッシュ空間は常にManagerが持っている。

+

通常動作時には1種類のハッシュ空間しか存在しないが、レプリケーションの再配置を行っている間は2種類のバージョンが存在する。最新のもの(Serverの追加/切り離しの更新が反映されている)はwhs、1つ前のバージョン(Serverの追加/切り離しの更新が反映されていない)はrhsという名前が付いている。

+
+

Managerはkumoctlコマンドでレプリケーションの再配置を行うように指令されると、まずServerの追加/切り離しをwhsに反映する。もう1台のManagerが存在すればそのManagerに更新したwhsを送信する。

+

次に認識しているすべてのServerにwhsを送信し、レプリケーションのコピーを行うようにコマンドを送る。Serverは自分が持っているwhsとManagerから送られてきたwhsを比較し、必要なら他のServerにデータのコピーを行う(このときデータベースを上から下まで読み込む)。Serverはコピーが終わったらwhsをrhsにコピーする。

+

Serverはすべてのデータを確認し終えたら、Managerにコピーが終了した旨を通知する。ManagerはすべてのServerでコピーが終了した通知を受け取ったら、whsをrhsにコピーする。また、認識しているすべてのサーバーにレプリケーションの削除を行うようにコマンドを送る。Serverはwhsを参照して、自分が持っている必要がないデータがデータベースの中に入っていたら、それを削除する(このときもデータベースを上から下まで読み込む)。

+

Managerはレプリケーションのコピーを行っている最中にServerがダウンしたことを検知したら、すべてのServerからレプリケーションのコピーが終了した通知を受け取っても、レプリケーションの削除を行わない。

+

ServerはGatewayからget/set/deleteリクエストを受け取ったとき、そのkeyに対する割り当てノードが本当に自分であるか確認するために、getの場合はrhsを、set/deleteの場合はwhsを参照する。

+
+

レプリケーションの再配置アルゴリズム

+

# TODO レプリケーションの再配置アルゴリズム

+

logic/srv_replace.cc:Server::replace_copy()

+
+

レプリケーション

+

set/deleteの伝播

+

Gatewayにsetリクエストを送信すると、keyにハッシュ関数を適用してハッシュ空間から検索し、一番最初にヒットしたServerに対してsetリクエストが送信される。

+

setリクエストを受け取ったServerは、keyのハッシュをハッシュ空間から検索し、自分が確かに最初にヒットするServerかどうか確かめる。そうでなければGatewayに「ハッシュ空間が古いぞ」とエラーを返す。

+

次にServerは、自分の次のServerと次の次のServerにデータをコピーする。このときコピー先のServerにfaultフラグが立っていたら、そのServerにはコピーしない。

+

Gatewayはset/deleteが何回失敗しても、次のServerにフォールバックすることはない。set先のServerが別のServerに切り替わるのは、Managerから新しいハッシュ空間を届いたときのみ。

+

以上の仕組みから、あるkeyをset/deleteするときは必ず単一のServerを経由することになる。このためほぼ同時にset/deleteされても必ず順序が付けられ、常に最新の結果がだけが残る。

+
+

getのフォールバック

+

Gatewayはgetリクエストがタイムアウトしたり失敗したりすると、ハッシュ空間上の次のServerにリクエストする。それでもタイムアウトしたときは次の次のServerにリクエストする。リトライ回数の上限に達するまで、最初のServer→次のServer→次の次のServer→最初のServer→…とリトライが繰り返される。

+

getはManagerから新しいハッシュ空間が届くのを待つことなくフォールバックする。

+
+

タイムアウト

+

GatewayでもServerでもManagerでも、リクエストを送ってから10ステップ(1ステップは0.5秒 + *6 + )の間にレスポンスが返ってこないと、そのリクエストはタイムアウトしてエラーになる。

+

プログラムから見てTCPコネクションが確立しているか否かはタイムアウトには関係しない。コネクションが確立していなくても時間以内に再接続してレスポンスが返れば正常通り処理が続行され、コネクションが確立していても時間以内にレスポンスが返ってこなければタイムアウトする。

+

GatewayはServerに送ったリクエストがエラーになった回数が5回 + *7 + 以上失敗すると、Managerから最新のハッシュ空間を取得する。

+
+

リトライ

+

Gatewayはsetは最大20回 + *8 + まで、deleteは最大20回 + *9 + まで、getは最大5×(レプリケーション数==3 + 1)回 + *10 + までリトライする。制限回数までリトライしても失敗したらアプリケーションにエラーが返される。

+
+

+ *1ハッシュ関数はSHA-1で、下位64ビットのみ使う。仮想ノードは128台

+

+ *2--connect-retry-limitで指定

+

+ *3--keep-alive-interval引数で指定

+

+ *4--connect-timeout-steps引数で指定

+

+ *5--clock-interval引数で指定

+

+ *6--clock-interval引数で指定

+

+ *7--renew-threashold引数で指定

+

+ *8--set-retry引数で指定

+

+ *9--delete-retry引数で指定

+

+ *10係数は--get-retry引数で指定

+
+
+

クロック

+

データベースに保存されているすべてのvalueや、ハッシュ空間には、クロック(=タイムスタンプ)が付与されている。value同士やハッシュ空間同士でどちらが新しいかを比べるために利用している。

+

ref:Lamport Clockの解説 +

+

クロックのフォーマット

+

クロックは64ビットの整数で、上位32ビットにはUNIXタイム(精度は秒)、下位32ビットにはLamport Clockが入っている。

+

UNIXタイムが上位に入っているので、Server/Manager同士の時刻が1秒以上ずれていると、Lamport Clockに関係なく間違った比較が行われてしまう。

+

データベースのフォーマット

+

データベースにkeyを保存するとき、先頭の64ビットにkeyのハッシュを負荷して保存する。

+

データベースにvalueを保存するとき、先頭の64ビットにクロックを付加して保存する。またその次の64ビットも予約してあるが、使っていない。

+
Database entry format
+Big endian
+
+key:
++--------+-----------------+
+|   64   |       ...       |
++--------+-----------------+
+hash
+         key
+
+value:
++--------+--------+-----------------+
+|   64   |   64   |       ...       |
++--------+--------+-----------------+
+clocktime
+         meta
+                  data
+
+

レプリケーションでの利用

+

Serverから別のServerにデータをコピーするとき、後から来たsetリクエストのレプリケーションが、先に来たsetリクエストのレプリケーションを追い抜いて先行してしまうことが発生し得る。Serverはレプリケーションを受け取ったとき、既に保存されているvalueのクロックと新たに届いたvalueのクロックを比べ、新たに届いた方が新しかった場合のみデータベースを更新する。

+

レプリケーションの再配置を行うとき、ほとんどの場合はレプリケーションされたどのServerも同じデータを持っているが、setが失敗していた場合は異なるデータを持っている可能性がある。このときどのServerが持っているデータが最新なのか比べる必要があり、クロックを利用して比較する。

+

Manager間の協調動作での利用

+

Managerが2台動作しているとき、どちらが持っているハッシュ空間が最新なのかを比べる必要がある。ハッシュ空間を更新するときに更新した時のクロックを付与しておき、比較するときにこのクロックを利用する。

+
+

+ *1ハッシュ関数はSHA-1で、下位64ビットのみ使う。仮想ノードは128台

+

+ *2--connect-retry-limitで指定

+

+ *3--keep-alive-interval引数で指定

+

+ *4--connect-timeout-steps引数で指定

+

+ *5--clock-interval引数で指定

+

+ *6--clock-interval引数で指定

+

+ *7--renew-threashold引数で指定

+

+ *8--set-retry引数で指定

+

+ *9--delete-retry引数で指定

+

+ *10係数は--get-retry引数で指定

+
+ + diff --git a/doc/memo.pdf b/doc/memo.pdf new file mode 100644 index 0000000..26191f1 Binary files /dev/null and b/doc/memo.pdf differ diff --git a/doc/memo.txt b/doc/memo.txt new file mode 100644 index 0000000..9381b43 --- /dev/null +++ b/doc/memo.txt @@ -0,0 +1,401 @@ +?title Kumofs memo -- Kumo Fast Storage rev.948 +?author FURUHASHI Sadayuki + + +*概要 +kumofsはkey-value型のデータを保存する分散ストレージ。key=>valueを保存する''set''、keyを取得する''get''、keyを削除する''delete''の3つの操作をサポートする。 +データを保持する''Server''、Server群を管理する''Manager''、アプリケーションからのリクエストをServerに中継する''Gateway''の3種類のノードでシステムを構成する。 +データは複数のServerに分散して保存されるため、Serverを追加するほど性能が向上する。 +データは3台のServerにコピーされて保存される。2台までならServerがダウンしても動作し続ける。 +Server群はManagerによって死活監視されている。Serverがダウンしたら、そのServerは直ちにシステムから切り離される。ただし1台か2台のManagerが起動していないとServerの切り離しが行われないので、Managerが1台も起動していない状態でServerがダウンするとシステムが停止してしまう。 +Serverを追加したり切り離したりした後、その状態をシステムに反映するには、レプリケーションされたデータの再配置を行う必要がある。これは自動では行われず、''kumoctlコマンド''を使って手動で行う。 + + +**Consistent Hashing +Consistent Hashingを使ってデータを分散して保存する((ハッシュ関数はSHA-1で、下位64ビットのみ使う。仮想ノードは128台))。 +Serverがダウンしたときは、そのServerの仮想ノードに''faultフラグ''がセットされる。set/get/deleteはfaultフラグがセットされたServerをスキップして行われる。つまり、通常動作時はレプリケーションは3つ作成されるが、1台がfault状態ならコピーは2つ、2台がfault状態ならコピーは1つしか作成されないkeyが存在することになる。fault状態のServerが3台以上になると、get/set/deleteが失敗し続けるkeyが存在することになる。 +Serverがダウンしてもfaultフラグがセットされるだけで、レプリケーションの再配置は行われない。faultフラグがセットされたServerが存在する状態で、kumoctlコマンドを使って''detach''コマンドをManagerに送信すると、faultフラグがセットされたServerがハッシュ空間から取り除かれる。同時にレプリケーションの再配置が行われ、すべてのkeyに対してレプリケーションが3つ作成されるようにデータがコピーされる。 +Serverが追加されてもすぐにはハッシュ空間には追加されず、レプリケーションの再配置は行われない。新たなServerが起動している状態で、kumoctlコマンドを使って''attach''コマンドをManagerに送信すると、新しいServerがハッシュ空間に追加される。同時にレプリケーションの再配置が行われ、すべてのkeyに対してレプリケーションが3つだけ存在するようにデータが移動される。 + +# TODO: auto-replace + +**set/get/deleteの保証範囲 +***set(key, value) +key=>valueを保存する。保存できれば成功を返す。保存できなければエラーを返す。 +既にkeyが保存されていたとき、setが成功した場合はkeyの値は確実に上書きされている。 +setが失敗したとき、keyの値は不定になっている。これは失敗したときにロールバックを行わないため。ロールバックを一貫性を損なうことなく行うための高級なアルゴリズムは実装されていない/使うと性能が低下してしまう。 +Serverはレプリケーション先の2台〜0台のすべてのServerにデータが受信されたことを確認してからGatewayにレスポンスを返す。どれか1台でもコピー処理が失敗したらエラーを返す。つまり、アプリケーションに成功が返されたときはfault状態でないすべてのServerにレプリケーションがコピーされており、それ以降に古いデータが読み出されることはない。ただしディスクに書き込まれているとは限らない。 + +***get(key) +keyをsetするリクエストが成功していた場合は、そのkeyに対応するvalueを返す。setが失敗していた場合は、nullか、setに失敗したvalueが返る。それ以外であればnullを返す。 +keyをsetするリクエストが成功してvalueが保存されていたとしても、レプリケーションされたすべてのServerの負荷が非常に高いために応答できない場合は、getがタイムアウトする可能性がある。 +keyが保存されていなかった場合はエラーにならないが、タイムアウトした場合はエラーになる。 + +***delete(key) +keyを削除する。 +再配置処理を行っている間にdeleteを行うと、削除されないことがある。また同じkeyに対してdeleteとsetをほぼ同時に行うと、削除されないことがある。これはレプリケーションを行うServer同士のやりとりが、Gatewayが送出したdeleteリクエストと入れ違う可能性があるため。 +Serverの引数を調整することで、deleteが一貫性を保たない確率を減らすことができる。 + + +**動作環境と制限 +***サーバーの時刻設定 +ManagerとServerを動作させるホストの時刻設定は、TIME_ERROR_MARGIN秒(コンパイル時に決定。デフォルトでは5秒)以上ずれていると正常に動作しない。またUTCとlocaltimeはどちらかに揃える必要がある。 + +# TODO + + +@footnote + + +*インストールと実行 +**依存関係 +***動作環境 +-linux >= 2.6.18 +-glibc >= XXX + +***コンパイル時に必要なもの +-g++ >= 4.1 +-ragel >= 6.3 +-git >= XXX + +***コンパイル時と実行時に必要なもの +-ruby >= 1.8 +-rubygems +-libcrypto(openssl) +-zlib >= XXX +-Tokyo Cabinet >= 1.4.9 + +**コンパイル +まず最新のMessagePackをインストールする。 +>|sh| +$ git clone git://git.sourceforge.jp/gitroot/msgpack/msgpack.git +$ cd msgpack +$ ./bootstrap && ./configure && make +$ sudo make install +$ cd ruby +$ ./gengem +$ sudo gem install gem/pkg/msgpack-*.gem +||< +^title MessagePackのインストール + +次にkumofsをインストールする。 +>|sh| +$ ./configure && make && make install +||< + +以下の4つのコマンドがインストールされる: +:kumo-manager:Managerノード。Serverノードの管理をする。 +:kumo-server:Serverノード。実際にデータを保存する。 +:kumo-gateway:Gatewayノード。memcachedプロトコルのサーバーで、アプリケーションからの要求をServerノードに中継する。 +:kumoctl:Managerノードを制御するための管理コマンド +:kumolog:バイナリフォーマットのログをテキストフォーマットに変換する +:kumomergedb:コールドバックアップファイルをマージする + +***configureフラグ +:--with-msgpack=DIR:MessagePackがインストールされているディレクトリを指定する +:--with-tokyocabinet=DIR:Tokyo Cabinetがインストールされているディレクトリを指定する +:--enable-trace:画面を埋め尽くすほど冗長なデバッグ用のメッセージを出力するようにする +:--with-tcmalloc[=DIR]:tcmallocとリンクする + +**実行例 +***Manager 2台, Server 4台を使った冗長構成 +''s1''〜''s4''の4台でクラスタを構成し、''c1''と''c2''で動作するアプリケーションから利用する例。 +s1〜s4でServerを起動し、''s1''と''s2''では同時にManagerも起動する。''c1''と''c2''ではGatewayを起動する。 +>|sh| +[s1]$ kumo-manager -v -l s1 -p s2 # Manager同士は互いに指定する +[s2]$ kumo-manager -v -l s2 -p s1 # Manager同士は互いに指定する +[s1]$ kumo-server -v -m s1 -p s2 -l s1 -s database.tch # -mと-pでManagerを指定する +[s2]$ kumo-server -v -m s1 -p s2 -l s2 -s database.tch # -lは常に自ホストのアドレス +[s3]$ kumo-server -v -m s1 -p s2 -l s3 -s database.tch # -sはデータベース名 +[s4]$ kumo-server -v -m s1 -p s2 -l s4 -s database.tch # -vは冗長なメッセージを出力 +[c1]$ kumo-gateway -v -m s1 -p s2 -t 11211 # 11211/tcpでmemcachedテキストプロトコル +[c2]$ kumo-gateway -v -m s1 -p s2 -t 11211 # を待ち受ける +||< + +***localhostでクラスタを構成する +localhostでManagerノード1台、Server 2台を使ってクラスタを構成する例。 +>|sh| +[localhost]$ kumo-manager -v -l localhost # Managerを1台で運用するときは-pを省略 + # kumo-serverはポートを変えて起動する +[localhost]$ kumo-server -v -m localhost -l localhost:19801 -L 19901 -s database1.tch +[localhost]$ kumo-server -v -m localhost -l localhost:19802 -L 19902 -s database2.tch +[localhost]$ kumo-gateway -v -m localhost -t 11211 +||< + + +**主な引数 +**共通 +:-o :ログを標準出力ではなく指定されたファイルに出力する +:-g :バイナリログを指定されたファイルに出力する +:-d :デーモンになる。指定されたファイルにpidを書き出す +:-v:WARNよりレベルの低いメッセージを出力する +:-Ci :タイマークロックの間隔を秒で指定する。小数を指定できる +:-Ys :connect(2)のタイムアウト時間を秒で指定する。小数を指定できる +:-Yn :connect(2)のリトライ回数を指定する +:-TR :送信用スレッドの数を指定する +:-TW :受信用スレッドの数を指定する + +***kumo-manager +:-l
:待ち受けるアドレス。''他のノードから見て''接続できるホスト名とポート番号を指定する +:-p
:もし存在するなら、もう一台のkumo-managerのホスト名とポート番号を指定する +:-c :kumoctlからのコマンドを受け付けるポート番号を指定する +:--auto-replace:Serverが追加・切断されたときに、マニュアル操作を待たずにレプリケーションの再配置を自動的に行うようにする。実行中でもkumoctlコマンドを使って変更できる + +***kumo-server +:-l
:待ち受けるアドレス。''他のノードから見て''接続できるホスト名とポート番号を指定する +:-L :kumo-serverが待ち受けるもう一つのポートのポート番号を指定する +:-m
:kumo-managerのホスト名とポート番号を指定する +:-p
:もし存在するなら、もう一台のkumo-managerのホスト名とポート番号を指定する +:-s :データを保存するデータベースファイルのパスを指定する +:-f :レプリケーションの再配置に使う一時ファイルを保存するディレクトリを指定する。データベースファイルのサイズに応じて十分な空き容量が必要 +:-gS :deleteしたエントリのクロックを保持しておくメモリ使用量の上限をKB単位で指定する +:-gN :deleteしたエントリのクロックを保持しておく最小時間を指定する。メモリ使用量が上限に達していると、最大時間に満たなくても最小時間を過ぎていれば削除される。 +:-gX :deleteしたエントリのクロックを保持しておく最大時間を指定する + +***kumo-gateway +:-m
:kumo-managerのホスト名とポート番号を指定する +:-p
:もし存在するなら、もう一台のkumo-managerのホスト名とポート番号を指定する +:-t :memcachedテキストプロトコルを待ち受けるポート番号を指定する +:-G :getの最大リトライ回数を指定する +:-S :setの最大リトライ回数を指定する +:-D :deleteの最大リトライ回数を指定する +:-As:set操作でレプリケーションするとき、レプリケーション完了の応答を待たずに成功を返すようにする +:-Ad:delete操作でレプリケーションするとき、レプリケーション完了の応答を待たずに成功を返すようにする + +@footnote + + +*kumoctl +kumoctlコマンドを使うとManagerの状態を取得したり、コマンドを送ったりできる。 +Rubyで書かれたスクリプト。実行するにはgemでmsgpackパッケージをインストールする。 +第1引数にManagerのホスト名とポート番号を指定し、第2引数にコマンドを指定する。 +>|sh| +$ kumoctl --help +Usage: kumoctl address[:port=19799] command [options] +command: + status get status + attach attach all new servers and start replace + attach-noreplace attach all new servers + detach detach all fault servers and start replace + detach-noreplace detach all fault servers + replace start replace without attach/detach + backup [suffix=????????] create backup with specified suffix + enable-auto-replace enable auto replace + disable-auto-replace disable auto replace +||< + +**status +Managerが持っているハッシュ空間を取得して表示する。 +>|| +$ kumoctl localhost status +hash space timestamp: + Wed Dec 03 22:15:45 +0900 2008 clock 58 +attached node: + 127.0.0.1:8000 (active) + 127.0.0.1:8001 (fault) +not attached node: + 127.0.0.1:8002 +||< +^title statusの実行例 +''attached node''はハッシュ空間に入っているServerの一覧を示している。''(active)''は正常動作中のServerで、''(fault)''はfaultフラグが立っているServerを示している。 +''not attached node''はハッシュ空間に入っていないか、入っているが(fault)状態でまだ再attachされていないServerの一覧を示している。 + +レプリケーションの再配置を行ったとき、Managerが2台起動していれば2つのManager間で新しいハッシュ空間が同期される。ただし新しいハッシュ空間が空の時は同期されない。 +この理由は、障害が発生していたManagerを復旧したときに空のハッシュ空間が同期されてしまう可能性があるため。起動した直後はクロック(後述)が調整されていないために、ハッシュ空間の新旧の比較が正常に機能しない。このため空のハッシュ空間を受け取ったときは無視するようになっている。 # FIXME この動作は正しい?もっと良い回避方法は無いか? + + +**attach +statusで''not attached node''に表示されているServerをハッシュ空間に組み入れ、レプリケーションの再配置を開始する。 + +**attach-noreplace +attachと同じだがレプリケーションの再配置を開始しない。ただし再配置をしないまま長い間放置してはいけない。 +再配置を行わないと、エラーが積もってGatewayから最新のハッシュ空間を要求されたとき(後述)、Gatewayが持っているハッシュ空間とServerが持っているハッシュ空間が食い違ってしまう。食い違うとsetやdeleteがいつまで経っても成功しなくなってしまう。 + +**detach +statusで''attached node''に表示されていて(fault)状態のServerをハッシュ空間から取り除き、レプリケーションの再配置を開始する。 + +**detach-noreplace +detachと同じだがレプリケーションの再配置を開始しない。再配置をしないまま長い間放置してはいけない。 + +**replace +レプリケーションの再配置を開始する。 + +**backup +コールドバックアップを作成する。バックアップはServerで作成され、元のデータベース名にsuffixを付けた名前のファイルにデータベースがコピーされる。手元にバックアップを持ってくるには、rsyncやscpなどを使ってServerからダウンロードする。 +suffixは省略するとその日の日付(YYMMDD)が使われる。 +作成されたバックアップファイルは、kumomergedbコマンドを使って1つのファイルに結合することができる。 +>|sh| +$ kumomergedb backup.tch-20090101 \ + server1.tch-20090101 server2.tch-20090101 server3.tch-20090101 +||< +^title kumomergedbコマンドの実行例 + + +*kumostat +kumostatコマンドを使うとServerの状態を取得することができる。 +Rubyで書かれたスクリプト。実行するにはgemでmsgpackパッケージをインストールする。 +第1引数にServerのホスト名とポート番号を指定し、第2引数にコマンドを指定する。 +>|| +Usage: kumostat address[:port=19800] command [options] +command: + pid get pid of server process + uptime get uptime + time get UNIX time + version get version + cmd_get get number of get requests + cmd_set get number of set requests + cmd_delete get number of delete requests + items get number of stored items +||< + +**pid +kumo-serverプロセスのpidを取得する + +**uptime +kumo-serverプロセスの起動時間を取得する。単位は秒。 + +**time +kumo-serverプロセスが走っているホストのUNIXタイムを取得する。 + +**version +バージョンを取得する。 + +**cmd_get / cmd_set / cmd_delete +それぞれGatewayからのGetリクエスト、Setリクエスト、Deleteリクエストを処理した回数を取得する。 + +**items +データベースに入っているエントリの数を取得する。 + + + +*ログ +kumo-manager, kumo-server, kumo-gatewayは、それぞれ2種類のログを出力する: +:テキストログ:行区切りのテキストフォーマットのログ。通常標準出力に出力される +:バイナリログ:MessagePackでシリアライズされたログ + +テキストログは常に出力される。''-v''オプションを付けると冗長なログも出力されるようになる。テキストログはファイルに書き出すこともできるが、ログローテーションはサポートしていない。''-d ''オプションを指定してデーモンとして起動するか、''-o -''オプションを指定すると、ログに色が付かなくなる。 + +バイナリログは''-g ''オプションを付けたときだけ出力される。''-v''オプションは影響しない。バイナリログはSIGHUPシグナルを受け取るとログファイルを開き直すため、logrotateなどを使ってログローテーションができる。 + +バイナリログは''kumolog''コマンドを使ってテキストに変換して読むことができる。 +>|sh| +$ kumolog manager.mpac +||< +^title kumologコマンドの実行例 + + +*チューニング +**データベースのチューニング +Tokyo Cabinetのチューニングによって性能が大きく変わる。kumo-serverを起動する前にあらかじめ''tchmgr''コマンドでデータベースファイルを作成しておく。 +チューニングのパラメータはTokyo Cabinetのドキュメント参照。http://tokyocabinet.sourceforge.net/spex-ja.html +>|sh| +$ tchmgr create /path/to/database.tch 1048568 # バケット数を2097136個にして作成 +$ kumo-server -m localhost -s /path/to/database.tch +||< + +**タイムアウト時間とkeepalive間隔の調整 +# TODO + + +*死活監視と再配置 +**障害の検出 +ManagerとServerの接続では、あるノードにリクエストまたはレスポンスを送信しようとしたときに、そのノードとのコネクションが一本も存在せず、さらにconnect(2)が4回((--connect-retry-limitで指定))連続して失敗したら、そのノードはダウンしたと見なす。 +ManagerとServerは2秒間隔((--keep-alive-interval引数で指定))でkeepaliveメッセージをやりとりしているので、いつも何らかのリクエストかレスポンスを送ろうとしている状態になっている。 +connect(2)は次の条件で失敗する: +-接続相手から明示的に接続を拒否された(Connection Refused) +-接続相手からの応答がない時間が3ステップ((--connect-timeout-steps引数で指定))続いた。1ステップは0.5秒((--clock-interval引数で指定)) + + +**接続の検出 +ManagerとServerの接続では、あるノードから接続を受け付けた後、そのノードから初期ネゴシエーションメッセージを受け取り、かつそのメッセージのフォーマットが正しければ、そのノードが新たに起動したと見なす。 + + +**ハッシュ空間の更新 +Consistent Hashingのハッシュ空間を更新できるのはManagerだけで、最新のハッシュ空間は常にManagerが持っている。 +通常動作時には1種類のハッシュ空間しか存在しないが、レプリケーションの再配置を行っている間は2種類のバージョンが存在する。最新のもの(Serverの追加/切り離しの更新が反映されている)は''whs''、1つ前のバージョン(Serverの追加/切り離しの更新が反映されていない)は''rhs''という名前が付いている。 + + +Managerはkumoctlコマンドでレプリケーションの再配置を行うように指令されると、まずServerの追加/切り離しをwhsに反映する。もう1台のManagerが存在すればそのManagerに更新したwhsを送信する。 +次に認識しているすべてのServerにwhsを送信し、レプリケーションのコピーを行うようにコマンドを送る。Serverは自分が持っているwhsとManagerから送られてきたwhsを比較し、必要なら他のServerにデータのコピーを行う(このときデータベースを上から下まで読み込む)。Serverはコピーが終わったらwhsをrhsにコピーする。 +Serverはすべてのデータを確認し終えたら、Managerにコピーが終了した旨を通知する。ManagerはすべてのServerでコピーが終了した通知を受け取ったら、whsをrhsにコピーする。また、認識しているすべてのサーバーにレプリケーションの削除を行うようにコマンドを送る。Serverはwhsを参照して、自分が持っている必要がないデータがデータベースの中に入っていたら、それを削除する(このときもデータベースを上から下まで読み込む)。 + +Managerはレプリケーションのコピーを行っている最中にServerがダウンしたことを検知したら、すべてのServerからレプリケーションのコピーが終了した通知を受け取っても、レプリケーションの削除を行わない。 + +ServerはGatewayからget/set/deleteリクエストを受け取ったとき、そのkeyに対する割り当てノードが本当に自分であるか確認するために、getの場合はrhsを、set/deleteの場合はwhsを参照する。 + + +**レプリケーションの再配置アルゴリズム +# TODO レプリケーションの再配置アルゴリズム +logic/srv_replace.cc:Server::replace_copy() + + +*レプリケーション +**set/deleteの伝播 +Gatewayにsetリクエストを送信すると、keyにハッシュ関数を適用してハッシュ空間から検索し、一番最初にヒットしたServerに対してsetリクエストが送信される。 +setリクエストを受け取ったServerは、keyのハッシュをハッシュ空間から検索し、自分が確かに最初にヒットするServerかどうか確かめる。そうでなければGatewayに「ハッシュ空間が古いぞ」とエラーを返す。 +次にServerは、自分の次のServerと次の次のServerにデータをコピーする。このときコピー先のServerにfaultフラグが立っていたら、そのServerにはコピーしない。 + +Gatewayはset/deleteが何回失敗しても、次のServerにフォールバックすることはない。set先のServerが別のServerに切り替わるのは、Managerから新しいハッシュ空間を届いたときのみ。 + +以上の仕組みから、あるkeyをset/deleteするときは必ず単一のServerを経由することになる。このためほぼ同時にset/deleteされても必ず順序が付けられ、常に最新の結果がだけが残る。 + + +**getのフォールバック +Gatewayはgetリクエストがタイムアウトしたり失敗したりすると、ハッシュ空間上の次のServerにリクエストする。それでもタイムアウトしたときは次の次のServerにリクエストする。リトライ回数の上限に達するまで、最初のServer→次のServer→次の次のServer→最初のServer→…とリトライが繰り返される。 + +getはManagerから新しいハッシュ空間が届くのを待つことなくフォールバックする。 + + +**タイムアウト +GatewayでもServerでもManagerでも、リクエストを送ってから10ステップ(1ステップは0.5秒((--clock-interval引数で指定)))の間にレスポンスが返ってこないと、そのリクエストはタイムアウトしてエラーになる。 +プログラムから見てTCPコネクションが確立しているか否かはタイムアウトには関係しない。コネクションが確立していなくても時間以内に再接続してレスポンスが返れば正常通り処理が続行され、コネクションが確立していても時間以内にレスポンスが返ってこなければタイムアウトする。 + +GatewayはServerに送ったリクエストがエラーになった回数が5回((--renew-threashold引数で指定))以上失敗すると、Managerから最新のハッシュ空間を取得する。 + + +**リトライ +Gatewayはsetは最大20回((--set-retry引数で指定))まで、deleteは最大20回((--delete-retry引数で指定))まで、getは最大5×(レプリケーション数==3 + 1)回((係数は--get-retry引数で指定))までリトライする。制限回数までリトライしても失敗したらアプリケーションにエラーが返される。 + +@footnote + + +*クロック +データベースに保存されているすべてのvalueや、ハッシュ空間には、クロック(=タイムスタンプ)が付与されている。value同士やハッシュ空間同士でどちらが新しいかを比べるために利用している。 +ref:[[Lamport Clockの解説>http://funini.com/kei/logos/clock.shtml]] + +**クロックのフォーマット +クロックは64ビットの整数で、上位32ビットにはUNIXタイム(精度は秒)、下位32ビットにはLamport Clockが入っている。 +UNIXタイムが上位に入っているので、Server/Manager同士の時刻が1秒以上ずれていると、Lamport Clockに関係なく間違った比較が行われてしまう。 + +**データベースのフォーマット +データベースにkeyを保存するとき、先頭の64ビットにkeyのハッシュを負荷して保存する。 +データベースにvalueを保存するとき、先頭の64ビットにクロックを付加して保存する。またその次の64ビットも予約してあるが、使っていない。 +>|| +Database entry format +Big endian + +key: ++--------+-----------------+ +| 64 | ... | ++--------+-----------------+ +hash + key + +value: ++--------+--------+-----------------+ +| 64 | 64 | ... | ++--------+--------+-----------------+ +clocktime + meta + data +||< + +**レプリケーションでの利用 +Serverから別のServerにデータをコピーするとき、後から来たsetリクエストのレプリケーションが、先に来たsetリクエストのレプリケーションを追い抜いて先行してしまうことが発生し得る。Serverはレプリケーションを受け取ったとき、既に保存されているvalueのクロックと新たに届いたvalueのクロックを比べ、新たに届いた方が新しかった場合のみデータベースを更新する。 +レプリケーションの再配置を行うとき、ほとんどの場合はレプリケーションされたどのServerも同じデータを持っているが、setが失敗していた場合は異なるデータを持っている可能性がある。このときどのServerが持っているデータが最新なのか比べる必要があり、クロックを利用して比較する。 + +**Manager間の協調動作での利用 +Managerが2台動作しているとき、どちらが持っているハッシュ空間が最新なのかを比べる必要がある。ハッシュ空間を更新するときに更新した時のクロックを付与しておき、比較するときにこのクロックを利用する。 + +@footnote + diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..2a2e5b3 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,6 @@ +export ERB +export RUBY +export RAGEL + +SUBDIRS = mp mpsrc log kazuhiki rpc logic command + diff --git a/src/command/Makefile.am b/src/command/Makefile.am new file mode 100644 index 0000000..3b3a41f --- /dev/null +++ b/src/command/Makefile.am @@ -0,0 +1,16 @@ + +AM_CPPFLAGS = -I.. -I../logic +AM_C_CPPFLAGS = -I.. -I../logic + +bin_PROGRAMS = kumomergedb +bin_SCRIPTS = kumoctl kumostat kumolog + +EXTRA_DIST = $(bin_SCRIPTS) + +kumomergedb_SOURCES = \ + mergedb.cc + +kumomergedb_LDADD = \ + ../logic/libkumo_storage.a \ + ../log/libkumo_log.a + diff --git a/src/command/kumoctl b/src/command/kumoctl new file mode 100644 index 0000000..7ec1675 --- /dev/null +++ b/src/command/kumoctl @@ -0,0 +1,285 @@ +#!/usr/bin/env ruby + +begin + require 'rubygems' +rescue LoadError +end +require 'msgpack' +require 'socket' + + +class KumoRPC + def initialize(host, port) + @sock = TCPSocket.open(host, port) + @pk = MessagePack::Unpacker.new + @buffer = '' + @nread = 0 + @seqid = rand(1<<16) # FIXME 1 << 32 + @callback = {} + end + + private + def send_request(seq, cmd, param) + @sock.write [true, seq, cmd, param].to_msgpack + @sock.flush + rescue + @sock.close + raise + end + + def receive_message + while true + if @buffer.length > @nread + @nread = @pk.execute(@buffer, @nread) + if @pk.finished? + msg = @pk.data + @pk.reset + @buffer.slice!(0, @nread) + @nread = 0 + if msg[0] + process_request(msg[1], msg[2], msg[3]) + else + process_response(msg[1], msg[3], msg[2]) + end + return msg[1] + end + end + @buffer << @sock.sysread(1024) + end + end + + def process_request(seqid, cmd, param) + raise "request received, excpect response" + end + + def process_response(seqid, res, err) + if cb = @callback[seqid] + cb.call(res, err) + end + end + + def synchronize_response(seqid) + while receive_message != seqid; end + end + + def send_request_async(cmd, param, &callback) + seqid = @seqid + # FIXME 1 << 32 + @seqid += 1; if @seqid >= 1<<16 then @seqid = 0 end + @callback[seqid] = callback if callback + send_request(seqid, cmd, param) + seqid + end + + def send_request_sync(cmd, param) + res = nil + err = nil + seqid = send_request_async(cmd, param) {|rres, rerr| + res = rres + err = rerr + } + synchronize_response(seqid) + return [res, err] + end + + def send_request_sync_ex(cmd, param) + res, err = send_request_sync(cmd, param) + raise "error #{err}" if err + res + end + + + def rpc_addr(raw) + if raw.length == 6 + addr = Socket.pack_sockaddr_in(0, '0.0.0.0') + addr[2,6] = raw[0,6] + else + addr = Socket.pack_sockaddr_in(0, '::') + addr[2,2] = raw[0,2] + addr[8,20] = raw[2,20] + end + Socket.unpack_sockaddr_in(addr).reverse + end + + public + def GetStatus + res = send_request_sync_ex(84, []) + form = {} + nodes = res[0] + + clocktime = nodes.slice!(-1) + date = Time.at(clocktime >> 32) + clock = clocktime & ((1<<32)-1) + + nodes.each {|nodes| + nodes.map! {|raw| + active = (raw.slice!(0) == "\1"[0]) + rpc_addr(raw) << active + } + } + + newcomers = res[1] + res[1].map! {|raw| + rpc_addr(raw) + } + + return [nodes, newcomers, date, clock] + end + + def AttachNewServers(replace) + send_request_sync_ex(85, [replace]) + end + + def DetachFaultServers(replace) + send_request_sync_ex(86, [replace]) + end + + def CreateBackup(suffix) + send_request_sync_ex(87, [suffix]) + end + + def SetAutoReplace(enable) + send_request_sync_ex(88, [enable]) + end + + def StartReplace() + send_request_sync_ex(89, []) + end + + module Protocol + ControlGetStatus = 84 + ControlAttachNewServers = 85 + ControlDetachFaultServers = 86 + ControlCreateBackup = 87 + ControlSetAutoReplace = 88 + ControlStartReplace = 89 + GetStatus = 112 + SetConfig = 112 + end + + CONTROL_DEFAULT_PORT = 19750 + MANAGER_DEFAULT_PORT = 19700 + SERVER_DEFAULT_PORT = 19800 +end + +if $0 == __FILE__ + + + +class KumoManager < KumoRPC + def initialize(host, port) + super(host, port) + end + + def AttachNewServers(replace) + send_request_sync_ex(Protocol::ControlAttachNewServers, [replace]) + end + + def DetachFaultServers(replace) + send_request_sync_ex(Protocol::ControlDetachFaultServers, [replace]) + end + + def CreateBackup(suffix) + send_request_sync_ex(Protocol::ControlCreateBackup, [suffix]) + end + + def SetAutoReplace(enable) + send_request_sync_ex(Protocol::ControlSetAutoReplace, [enable]) + end + + def StartReplace() + send_request_sync_ex(Protocol::ControlStartReplace, []) + end +end + + +$now = Time.now.strftime("%Y%m%d") + +def usage + puts "Usage: #{File.basename($0)} address[:port=#{KumoRPC::CONTROL_DEFAULT_PORT}] command [options]" + puts "command:" + puts " status get status" + puts " attach attach all new servers and start replace" + puts " attach-noreplace attach all new servers" + puts " detach detach all fault servers and start replace" + puts " detach-noreplace detach all fault servers" + puts " replace start replace without attach/detach" + puts " backup [suffix=#{$now }] create backup with specified suffix" + puts " enable-auto-replace enable auto replace" + puts " disable-auto-replace disable auto replace" + exit 1 +end + +if ARGV.length < 2 + usage +end + +addr = ARGV.shift +host, port = addr.split(':', 2) +port ||= KumoRPC::CONTROL_DEFAULT_PORT + +cmd = ARGV.shift + +case cmd +when "stat", "status" + usage if ARGV.length != 0 + attached, not_attached, date, clock = + KumoManager.new(host, port).GetStatus + puts "hash space timestamp:" + puts " #{date} clock #{clock}" + puts "attached node:" + attached.each {|addr, port, active| + puts " #{addr}:#{port} (#{active ? "active":"fault"})" + } + puts "not attached node:" + not_attached.each {|addr, port| + puts " #{addr}:#{port}" + } + +when "attach" + usage if ARGV.length != 0 + p KumoManager.new(host, port).AttachNewServers(true) + +when "attach-noreplace" + usage if ARGV.length != 0 + p KumoManager.new(host, port).AttachNewServers(false) + +when "detach" + usage if ARGV.length != 0 + p KumoManager.new(host, port).DetachFaultServers(true) + +when "detach-noreplace" + usage if ARGV.length != 0 + p KumoManager.new(host, port).DetachFaultServers(false) + +when "enable-auto-replace" + usage if ARGV.length != 0 + p KumoManager.new(host, port).SetAutoReplace(true) + +when "disable-auto-replace" + usage if ARGV.length != 0 + p KumoManager.new(host, port).SetAutoReplace(false) + +when "backup" + if ARGV.length == 0 + suffix = $now + elsif ARGV.length == 1 + suffix = ARGV.shift + else + usage + end + puts "suffix=#{suffix}" + p KumoManager.new(host, port).CreateBackup(suffix) + +when "replace" + usage if ARGV.length != 0 + p KumoManager.new(host, port).StartReplace() + +else + puts "unknown command #{cmd}" + puts "" + usage +end + + +end # if $0 == __FILE__ diff --git a/src/command/kumolog b/src/command/kumolog new file mode 100644 index 0000000..92d0ac3 --- /dev/null +++ b/src/command/kumolog @@ -0,0 +1,212 @@ +#!/usr/bin/env ruby +begin + require 'rubygems' +rescue LoadError +end +require 'msgpack' +require 'yaml' +require 'pp' + +def fixstr(code) + r = "" + 8.times {|i| + c = ((code >> (8*(7-i))) & 0xff) + r << c.chr if c != 0 + } + r +end + +def do_recover(src, off) + puts "recover at #{off}" + sz = src.length + pk = MessagePack::Unpacker.new + + while (sz - off) >= 4 + br = src[off,4].unpack('N')[0] + doff = off + 4 + + failed = false + begin + #noff = pk.execute_limit(src, doff, doff+br) + noff = pk.execute(src, doff) + rescue + failed = true + end + + if !failed && pk.finished? && noff - doff == br + return off + end + + pk.reset + off += 1 + end + return sz +end + +def do_parse(src, count, &block) + off = 0 + noff = 0 + sz = src.length + + pk = MessagePack::Unpacker.new + + while true + return if (sz - off) < 4 + + br = src[off,4].unpack('N')[0] + off += 4 + + if (sz - off) < br + off = do_recover(src, off-3) + next + end + + failed = false + begin + pk.reset + #noff = pk.execute_limit(src, off, off+br) + noff = pk.execute(src, off) + rescue + failed = true + end + + if failed || !pk.finished? || noff - off != br + off = do_recover(src, off-3) + next + end + + obj = pk.data + + name = nil + version = 0 + hash = {} + begin + unless obj.is_a?(Array) && obj.length == 3 && + obj[0].is_a?(Numeric) && obj[1].is_a?(Numeric) && + obj[2].is_a?(Hash) + off = do_recover(src, off-3) + next + end + name = fixstr(obj[0]) + version = obj[1] + obj[2].each_pair {|k,v| + hash[fixstr(k)] = v + } + rescue + off = do_recover(src, off-3) + next + end + + off += br + + block.call(name, version, hash) + end +end + +class Hash + def hmap(&block) + m = {} + each_pair {|k, v| + m[k] = block.call(k, v) + } + m + end +end + + +if ARGV.length == 0 + puts "usage: #{File.basename($0)} " + exit 1 +end + + +conf = YAML.load DATA.read.gsub(/(^\t+)/) { + ' ' * $+.length +} + +msgdb = conf["message"] + +filterdb = conf["filter"].hmap {|name, hash| + hash.hmap {|key, proc| + [ proc[0], eval("lambda{|val|#{proc[1]}}") ] + } +} + +do_parse(File.read(ARGV[0]), 1<<30) {|name, version, hash| + msg = msgdb[name] || "#{name}.#{version}" + + if filter = filterdb["#{name}.#{version}"] + filter.each_pair {|key, proc| + val = hash.delete(key) + hash[proc[0]] = proc[1].call(val) + } + end + + vals = hash.map {|k, v| + pv = v.pretty_inspect.rstrip + pv = v if pv[1..-2] == v + "#{k}=[#{pv}]" + }.sort_by{|kv| kv[0] }.join(' ') + + puts "%s.%s %-15s %s" % [name, version, msg, vals] +} + + +__END__ +proc: + - &addr | + require 'socket' + if val.length == 6 + addr = Socket.pack_sockaddr_in(0, '0.0.0.0') + addr[2,6] = val[0,6] + else + addr = Socket.pack_sockaddr_in(0, '::') + addr[2,2] = val[0,2] + addr[8,20] = val[2,20] + end + Socket.unpack_sockaddr_in(addr).reverse.join(':') + + - &time | + Time.at(val).strftime("%Y-%m-%d %H:%M:%S") + + - &clocktime | + Time.at(val>>32).strftime("%Y-%m-%d %H:%M:%S") + " clock #{val & 0xffffffff}" + +message: + SM: Manager start + SS: Server start + SW: Gateway start + eP: unknown partner + nS: new server + lS: lost server + ers: replicate-set failed + erd: replicate-delete failed + eg: Get failed + es: Set failed + ed: Delete failed + +filter: + SM.2: + time: [time, *time] + addr: [address, *addr] + Padd: [partner, *addr] + SS.2: + time: [time, *time] + addr: [address, *addr] + db: [database, val] + mgr1: [manager1, *addr] + mgr2: [manager2, *addr] + sadd: [stream_listen, *addr] + tmpd: [tmp_dir, val] + bkup: [backup_prefix, val] + SW.2: + time: [time, *time] + mgr1: [manager1, *addr] + mgr2: [manager2, *addr] + eP.2: + addr: [address, *addr] + nS.2: + addr: [address, *addr] + lS.2: + addr: [address, *addr] + diff --git a/src/command/kumostat b/src/command/kumostat new file mode 100644 index 0000000..d98a03b --- /dev/null +++ b/src/command/kumostat @@ -0,0 +1,90 @@ +#!/usr/bin/env ruby + +load File.dirname(__FILE__) + "/kumoctl" + + +class KumoServer < KumoRPC + def initialize(host, port) + super(host, port) + end + + def GetStatus(key) + send_request_sync_ex(Protocol::GetStatus, [key]) + end + + def SetConfig(*args) + send_request_sync_ex(Protocol::ControlDetachFaultServers, args) + end + + STAT_PID = 0 + STAT_UPTIME = 1 + STAT_TIME = 2 + STAT_VERSION = 3 + STAT_CMD_GET = 4 + STAT_CMD_SET = 5 + STAT_CMD_DELETE = 6 + STAT_DB_ITEMS = 7 +end + +def usage + puts "Usage: #{File.basename($0)} address[:port=#{KumoRPC::SERVER_DEFAULT_PORT}] command [options]" + puts "command:" + puts " pid get pid of server process" + puts " uptime get uptime" + puts " time get UNIX time" + puts " version get version" + puts " cmd_get get number of get requests" + puts " cmd_set get number of set requests" + puts " cmd_delete get number of delete requests" + puts " items get number of stored items" + exit 1 +end + +if ARGV.length < 2 + usage +end + +addr = ARGV.shift +host, port = addr.split(':', 2) +port ||= KumoRPC::SERVER_DEFAULT_PORT + +cmd = ARGV.shift + +case cmd +when "pid" + usage if ARGV.length != 0 + p KumoServer.new(host, port).GetStatus(KumoServer::STAT_PID) + +when "uptime" + usage if ARGV.length != 0 + p KumoServer.new(host, port).GetStatus(KumoServer::STAT_UPTIME) + +when "time" + usage if ARGV.length != 0 + p KumoServer.new(host, port).GetStatus(KumoServer::STAT_TIME) + +when "version" + usage if ARGV.length != 0 + p KumoServer.new(host, port).GetStatus(KumoServer::STAT_VERSION) + +when "cmd_get" + usage if ARGV.length != 0 + p KumoServer.new(host, port).GetStatus(KumoServer::STAT_CMD_GET) + +when "cmd_set" + usage if ARGV.length != 0 + p KumoServer.new(host, port).GetStatus(KumoServer::STAT_CMD_SET) + +when "cmd_delete" + usage if ARGV.length != 0 + p KumoServer.new(host, port).GetStatus(KumoServer::STAT_CMD_DELETE) + +when "items" + usage if ARGV.length != 0 + p KumoServer.new(host, port).GetStatus(KumoServer::STAT_DB_ITEMS) + +else + usage + +end + diff --git a/src/command/mergedb.cc b/src/command/mergedb.cc new file mode 100644 index 0000000..6db2fe7 --- /dev/null +++ b/src/command/mergedb.cc @@ -0,0 +1,86 @@ +#include "log/mlogger.h" +#include "log/mlogger_ostream.h" +#include "server/storage.h" +#include + +template +struct auto_array { + auto_array() : m(NULL) { } + auto_array(T* p) : m(p) { } + ~auto_array() { delete[] m; } + T& operator[] (size_t i) { return m[i]; } +private: + T* m; + auto_array(const auto_array&); +}; + + +using namespace kumo; +using kumo::server::Storage; + +struct for_each_update { + for_each_update(Storage* dstdb, uint64_t* total, uint64_t* merged) : + m_total(total), m_merged(merged), m_dstdb(dstdb) { } + + void operator() (Storage::iterator& kv) + { + ++*m_total; + + if(kv.keylen() < Storage::KEY_META_SIZE) { return; } + if(kv.vallen() < Storage::VALUE_META_SIZE) { return; } + + if( m_dstdb->update(kv.key(), kv.keylen(), kv.val(), kv.vallen()) ) { + ++*m_merged; + } + } + +private: + uint64_t *m_total; + uint64_t *m_merged; + Storage* m_dstdb; +}; + + +int main(int argc, char* argv[]) +{ + if(argc <= 3) { + std::cerr << "usage: "< ..." << std::endl; + return 1; + } + + const char* dst = argv[1]; + unsigned int nsrcs = argc - 2; + char* const* psrcs = argv + 2; + + mlogger::reset(new mlogger_ostream(mlogger::TRACE, std::cout)); + + { + // init src databases + auto_array< std::auto_ptr > srcdbs(new std::auto_ptr[nsrcs]); + for(unsigned int i=0; i < nsrcs; ++i) { + srcdbs[i].reset(new Storage(psrcs[i], 0, 0, 0)); + } + + // init dst database + std::auto_ptr dstdb(new Storage(dst, 0, 0, 0)); + + uint64_t total = 0; + uint64_t merged = 0; + for(unsigned int i=0; i < nsrcs; ++i) { + std::cout << "merging "<for_each( + for_each_update(dstdb.get(), &total, &merged), + ClockTime(0) ); + + //std::cout << srcdbs[i]->error() << std::endl; // FIXME + std::cout << " merged " << merged << " records of " << total << " records" << std::endl; + } + + std::cout << "closing "< +#include +#include +#include +#include +#include +#include + +namespace kazuhiki { + + +struct invalid_argument_real : public invalid_argument { + invalid_argument_real(const std::string& msg) : + invalid_argument(msg) { } + + virtual ~invalid_argument_real() throw() { } + + virtual const char* what() const throw() + { + if(msg.empty()) { + return std::runtime_error::what(); + } else { + return msg.c_str(); + } + } + + void setkey(const std::string& k) throw() + { + msg = std::string("argument error `") + k + "': " + what(); + } + +private: + std::string msg; +}; + + +void parser::raise(const char* fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + char buf[256]; + vsnprintf(buf, sizeof(buf), fmt, ap); + + va_end(ap); + throw invalid_argument_real(buf); +} + + +class command { +public: + command() { } + ~command() { } + +private: + static int parse_through(int& argc, const char** argv, int i, int s) + { + return i + s; + } + + static int parse_break(int& argc, const char** argv, int i, int s) + { + memmove(argv + i, argv + i + s, (argc - i - s)*sizeof(char*)); + argc -= s; + return i; + } + + static int fail_through(int& argc, const char** argv, int i) + { + return i + 1; + } + + static int fail_exception(int& argc, const char** argv, int i) + { + throw unknown_argument(std::string("unexpected argument: ")+argv[i]); + } + + template < int (*parse_method)(int& argc, const char** argv, int i, int s), + int (*fail_method)(int& argc, const char** argv, int i) > + void parse_real(int& argc, const char** argv) + { + int i = 0; + while(i < argc) { + std::string key(argv[i]); + try { + map_t::iterator it(m_map.find(key)); + if(it != m_map.end()) { + acceptable& ac(it->second.ac); + unsigned int s = (*ac.parse)(ac.data, argc - i - 1, argv + i + 1); + i = (*parse_method)(argc, argv, i, s + 1); + *it->second.shared_required = false; + if(it->second.optional) { + *it->second.optional = true; + } + } else { + i = (*fail_method)(argc, argv, i); + } + } catch (invalid_argument_real& e) { + e.setkey(key); + throw; + } + } + + std::vector missing; + for(map_t::reverse_iterator it(m_map.rbegin()), it_end(m_map.rend()); + it != it_end; ++it) { + entry& e(it->second); + if(*e.shared_required && e.optional == NULL) { + missing.push_back(it->first); + *e.shared_required = false; + } + } + if(!missing.empty()) { + std::string msg("required but not set: "); + std::vector::iterator it(missing.begin()); + msg += *it; + ++it; + for(; it != missing.end(); ++it) { + msg += ", "; + msg += *it; + } + throw invalid_argument_real(msg); + } + } + +public: + void on(const char* short_name, const char* long_name, + bool* optional, acceptable ac) + { + entry e; + e.ac = ac; + e.shared_required = s_parser->alloc(ac.required); + e.optional = optional; + if(e.optional) { *e.optional = false; } + if(short_name) { + m_map[std::string(short_name)] = e; + } + if(long_name) { + m_map[std::string(long_name)] = e; + } + } + + void parse(int argc, const char** argv) + { + parse_real<&command::parse_through, &command::fail_through>(argc, argv); + } + + void break_parse(int& argc, const char** argv) + { + parse_real<&command::parse_break, &command::fail_through>(argc, argv); + } + + void order(int argc, const char** argv) + { + parse_real<&command::parse_through, &command::fail_exception>(argc, argv); + } + + void break_order(int& argc, const char** argv) + { + parse_real<&command::parse_break, &command::fail_exception>(argc, argv); + } + +private: + struct entry { + bool* shared_required; + bool* optional; + acceptable ac; + }; + typedef std::map map_t; + map_t m_map; +}; + +static command* cmd; + +std::auto_ptr s_parser; + + +void init() +{ + s_parser.reset( new parser() ); + cmd = s_parser->alloc(); +} + +void on(const char* short_name, const char* long_name, acceptable ac) +{ + cmd->on(short_name, long_name, NULL, ac); +} + +void on(const char* short_name, const char* long_name, bool* optional, acceptable ac) +{ + cmd->on(short_name, long_name, optional, ac); +} + +void parse(int argc, char** argv) +{ + cmd->parse(argc, (const char**)argv); + s_parser.reset(); +} + +void break_parse(int& argc, char** argv) +{ + cmd->break_parse(argc, (const char**)argv); + s_parser.reset(); +} + +void order(int argc, char** argv) +{ + cmd->order(argc, (const char**)argv); + s_parser.reset(); +} + +void break_order(int& argc, char** argv) +{ + cmd->break_order(argc, (const char**)argv); + s_parser.reset(); +} + + +acceptable::acceptable() : + parse(NULL), data(NULL), required(false) { } + +acceptable::acceptable(parse_t p, void* d, bool r) : + parse(p), data(d), required(r) { } + + +namespace type { + + +static unsigned int parse_boolean(bool* dst, int argc, const char** argv) +{ + if(argc > 0) { + if( strcmp("true", argv[0]) == 0 || + strcmp("yes", argv[0]) == 0 || + strcmp("on", argv[0]) == 0 ) { + *dst = true; + return 1; + } else if( + strcmp("false", argv[0]) == 0 || + strcmp("no", argv[0]) == 0 || + strcmp("off", argv[0]) == 0 ) { + *dst = false; + return 1; + } else { + *dst = true; + return 0; + } + } else { + *dst = true; + return 0; + } +} + +acceptable boolean(bool* dst) +{ + *dst = false; + return acceptable((parse_t)parse_boolean, (void*)dst, false); +} + + +static unsigned int parse_string(std::string* dst, int argc, const char** argv) +{ + if(argc < 1) { parser::raise("string is required."); } + *dst = argv[0]; + return 1; +} + +acceptable string(std::string* dst) +{ + return acceptable((parse_t)parse_string, dst, true); +} + +acceptable string(std::string* dst, const std::string& d) +{ + *dst = d; + return acceptable((parse_t)parse_string, dst, false); +} + + +template +struct parse_network_base { + parse_network_base(Address* dst) : + m_dst(dst), m_port(0) { } + + parse_network_base(Address* dst, unsigned short port) : + m_dst(dst), m_port(port) { } + +protected: + void resolve_port(const char* port = NULL) + { + if(port) { + if(!convert::numeric(&m_port, port)) { + parser::raise("invalid port number: %s", port); + } + } else if(m_port == 0) { + parser::raise("port number is required."); + } + } + + void resolve_addr(const char* host, const char* port = NULL) + { + memset(m_dst, 0, sizeof(Address)); + resolve_port(port); + + addrinfo hints; + memset(&hints, 0, sizeof(hints)); + hints.ai_family = UseIPv6 ? AF_UNSPEC : AF_INET; + hints.ai_socktype = SOCK_STREAM; // FIXME + hints.ai_flags = AI_V4MAPPED | AI_ADDRCONFIG; + + addrinfo *res = NULL; + int err; + if( (err=getaddrinfo(host, NULL, &hints, &res)) != 0 ) { + parser::raise("can't resolve host name: %s", host); + } + + for(addrinfo* rp=res; rp; rp = rp->ai_next) { + if(rp->ai_family == AF_INET || + (UseIPv6 && rp->ai_family == AF_INET6)) { + memcpy((void*)m_dst, (const void*)rp->ai_addr, + std::max((size_t)rp->ai_addrlen, (size_t)sizeof(Address))); + ((sockaddr_in*)m_dst)->sin_port = htons(m_port); + freeaddrinfo(res); + return; + } + } + + parser::raise("can't resolve host name: %s", host); + } + + void addr_any(unsigned short port, bool dummy) + { + memset(m_dst, 0, sizeof(Address)); + //if(typeid(Address) == typeid(sockaddr_in)) { + sockaddr_in* addr = (sockaddr_in*)m_dst; + addr->sin_family = AF_INET; + addr->sin_port = htons(port); + addr->sin_addr.s_addr = INADDR_ANY; + //} else { + // sockaddr_in6* addr = (sockaddr_in6*)m_dst; + // addr->sin6_family = AF_INET6; + // addr->sin6_port = htons(port); + // addr->sin6_addr = in6addr_any; + //} + } + + void addr_any(const char* port = NULL) + { + resolve_port(port); + addr_any(m_port, true); + } + + void addr_path(const char* path) + { + sockaddr_un* addr = (sockaddr_un*)m_dst; + if(strlen(path) > sizeof(addr->sun_path)) { + parser::raise("path too long: %s", path); + } + addr->sun_family = AF_UNIX; + strcpy(addr->sun_path, path); + } + + Address* m_dst; + unsigned short m_port; +}; + + +template +struct parse_connectable : parse_network_base { + typedef parse_network_base net; + + parse_connectable(Address* dst) : net(dst) + { memset(dst, 0, sizeof(Address)); } + + parse_connectable(Address* dst, unsigned short port) : net(dst, port) + { memset(dst, 0, sizeof(Address)); } + + unsigned int operator() (int argc, const char** argv) + { + if(argc < 1) { parser::raise("network address is required."); } + std::string str(argv[0]); + + if(typeid(Address) == typeid(sockaddr_un) && + str.find('/') != std::string::npos) { + net::addr_path(argv[0]); + return 1; + } + + std::string::size_type posc = str.rfind(':'); + if(posc != std::string::npos) { + if(UseIPv6 && str.find(':') != posc) { + // IPv6 address + if( posc != 0 && str[posc-1] == ']' && str[0] == '[' ) { + // [ip:add::re:ss]:port + std::string host( str.substr(1,posc-2) ); + std::string port( str.substr(posc+1) ); + net::resolve_addr(host.c_str(), port.c_str()); + return 1; + } else { + // ip:add::re:ss (default port) + net::resolve_addr(str.c_str(), NULL); + return 1; + } + } else { + // host:port + // ip.add.re.ss:port + std::string host( str.substr(0,posc) ); + std::string port( str.substr(posc+1) ); + net::resolve_addr(host.c_str(), port.c_str()); + return 1; + } + } else { + // host (default port) + // ip.add.re.ss (default port) + net::resolve_addr(str.c_str(), NULL); + return 1; + } + } +}; + + +template +struct parse_listenable : parse_network_base { + typedef parse_network_base net; + + parse_listenable(Address* dst) : net(dst) + { net::addr_any(0, true); } + + parse_listenable(Address* dst, unsigned short port) : net(dst, port) + { net::addr_any(port, true); } + + unsigned int operator() (int argc, const char** argv) + { + if(argc < 1) { parser::raise("network address is required."); } + std::string str(argv[0]); + std::string::size_type posc = str.rfind(':'); + if(posc != std::string::npos) { + if(UseIPv6 && str.find(':') != posc) { + // IPv6 address + if( posc != 0 && str[posc-1] == ']' && str[0] == '[' ) { + // [ip:add::re:ss]:port + std::string host( str.substr(1,posc-2) ); + std::string port( str.substr(posc+1) ); + net::resolve_addr(host.c_str(), port.c_str()); + return 1; + } else { + // ip::add::re:ss (default port) + net::resolve_addr(str.c_str(), NULL); + return 1; + } + } else { + std::string host( str.substr(0,posc) ); + std::string port( str.substr(posc+1) ); + if(host.empty()) { + // :port + net::addr_any(port.c_str()); + return 1; + } else { + // host:port + net::resolve_addr(host.c_str(), port.c_str()); + return 1; + } + } + } else { + unsigned short tmp; + if(convert::numeric(&tmp, str.c_str())) { + // port + net::addr_any(str.c_str()); + return 1; + } else { + // host (default port) + net::resolve_addr(str.c_str(), NULL); + return 1; + } + } + } +}; + + +acceptable connectable(sockaddr_in* dst) +{ + typedef parse_connectable type; + return acceptable( + &parser::object_parse, + (void*)s_parser->alloc(dst), + true); +} + +acceptable connectable(sockaddr_in6* dst) +{ + typedef parse_connectable type; + return acceptable( + &parser::object_parse, + (void*)s_parser->alloc(dst), + true); +} + +acceptable connectable(sockaddr_in* dst, unsigned short d_port) +{ + typedef parse_connectable type; + return acceptable( + &parser::object_parse, + (void*)s_parser->alloc(dst, d_port), + true); +} + +acceptable connectable(sockaddr_in6* dst, unsigned short d_port) +{ + typedef parse_connectable type; + return acceptable( + &parser::object_parse, + (void*)s_parser->alloc(dst, d_port), + true); +} + + +acceptable listenable(sockaddr_in* dst) +{ + typedef parse_listenable type; + return acceptable( + &parser::object_parse, + (void*)s_parser->alloc(dst), + true); +} + +acceptable listenable(sockaddr_in6* dst) +{ + typedef parse_listenable type; + return acceptable( + &parser::object_parse, + (void*)s_parser->alloc(dst), + true); +} + +acceptable listenable(sockaddr_in* dst, unsigned short d_port) +{ + typedef parse_listenable type; + return acceptable( + &parser::object_parse, + (void*)s_parser->alloc(dst, d_port), + false); +} + +acceptable listenable(sockaddr_in6* dst, unsigned short d_port) +{ + typedef parse_listenable type; + return acceptable( + &parser::object_parse, + (void*)s_parser->alloc(dst, d_port), + false); +} + + +} // namespace type + + +namespace convert { + +static int netif_addr_impl(const char* ifname, unsigned short port, void* dst, int family) +{ + struct ifaddrs* ifap; + if(getifaddrs(&ifap)) { return -1; } + int ret = -1; + for(struct ifaddrs* i=ifap; i != NULL; i = i->ifa_next) { + if(i->ifa_addr == NULL) { continue; } + if(strcmp(ifname, i->ifa_name) != 0) { continue; } + int sa_family = i->ifa_addr->sa_family; + if(sa_family == AF_INET && (family == AF_INET || family == AF_UNSPEC)) { + memcpy(dst, i->ifa_addr, sizeof(sockaddr_in)); + ((struct sockaddr_in*)dst)->sin_port = htons(port); + ret = 0; + break; + } else if(sa_family == AF_INET6 && (family == AF_INET6 || family == AF_UNSPEC)) { + memcpy(dst, i->ifa_addr, sizeof(sockaddr_in6)); + ((struct sockaddr_in6*)dst)->sin6_port = htons(port); + ret = 0; + break; + } + } + freeifaddrs(ifap); + return ret; +} + +int netif_addr4(const char* ifname, unsigned short port, sockaddr_in* dst) +{ + memset(dst, 0, sizeof(sockaddr_in)); + return netif_addr_impl(ifname, port, dst, AF_INET); +} + +int netif_addr6(const char* ifname, unsigned short port, sockaddr_in6* dst) +{ + memset(dst, 0, sizeof(sockaddr_in6)); + return netif_addr_impl(ifname, port, dst, AF_INET6); +} + +int netif_addr(const char* ifname, unsigned short port, sockaddr_in6* dst) +{ + memset(dst, 0, sizeof(sockaddr_in6)); + return netif_addr_impl(ifname, port, dst, AF_UNSPEC); +} + +} // namespace convert + + +} // namespace kazuhiki + diff --git a/src/kazuhiki/kazuhiki.h b/src/kazuhiki/kazuhiki.h new file mode 100644 index 0000000..92986c9 --- /dev/null +++ b/src/kazuhiki/kazuhiki.h @@ -0,0 +1,242 @@ +/* + * Kazuhiki 3 + * + * Copyright (C) 2007-2008 FURUHASHI Sadayuki + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef KAZUHIKI_H__ +#define KAZUHIKI_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef MP_NO_CXX_ABI_H +#include +#include +#endif + +namespace kazuhiki { + +void init(); + +typedef unsigned int (*parse_t)(void* data, int argc, const char** argv); + +struct acceptable { + acceptable(); + acceptable(parse_t, void*, bool); + parse_t parse; + void* data; + bool required; +}; + +void on(const char* short_name, const char* long_name, acceptable ac); +void on(const char* short_name, const char* long_name, bool* optional, acceptable ac); + +void parse(int argc, char** argv); +void break_parse(int& argc, char** argv); +void order(int argc, char** argv); +void break_order(int& argc, char** argv); + + +struct argument_error : public std::runtime_error { + argument_error(const std::string& msg) : + std::runtime_error(msg) { } +}; + +struct invalid_argument : public argument_error { + invalid_argument(const std::string& msg) : + argument_error(msg) { } +}; + +struct unknown_argument : public argument_error { + unknown_argument(const std::string& msg) : + argument_error(msg) { } +}; + + +namespace type { + +acceptable boolean(bool* dst); + +acceptable string(std::string* dst); +acceptable string(std::string* dst, const std::string& d); + +template acceptable numeric(T* dst); +template acceptable numeric(T* dst, T d); + +template acceptable action(F f, bool required); + +acceptable connectable(sockaddr_in* dst); +acceptable connectable(sockaddr_in6* dst); +acceptable connectable(sockaddr_storage* dst); + +acceptable connectable(sockaddr_in* dst, unsigned short d_port); +acceptable connectable(sockaddr_in6* dst, unsigned short d_port); +acceptable connectable(sockaddr_storage* dst, unsigned short d_port); + +acceptable connectable(sockaddr_in* dst, unsigned short d_port, const char* d_host); +acceptable connectable(sockaddr_in6* dst, unsigned short d_port, const char* d_host); +acceptable connectable(sockaddr_storage* dst, unsigned short d_port, const char* d_host); +acceptable connectable(sockaddr_storage* dst, const char* d_path); + +acceptable listenable(sockaddr_in* dst); +acceptable listenable(sockaddr_in6* dst); +acceptable listenable(sockaddr_storage* dst); + +acceptable listenable(sockaddr_in* dst, unsigned short d_port); +acceptable listenable(sockaddr_in6* dst, unsigned short d_port); +acceptable listenable(sockaddr_storage* dst, unsigned short d_port); + +acceptable listenable(sockaddr_in* dst, unsigned short d_port, const char* d_host); +acceptable listenable(sockaddr_in6* dst, unsigned short d_port, const char* d_host); +acceptable listenable(sockaddr_storage* dst, const char* d_path); + +} // namespace type + + +class parser { +public: + template + T* alloc() { return alloc_real(new T()); } + + template + T* alloc(A1 a1) { return alloc_real(new T(a1)); } + + template + T* alloc(A1 a1, A2 a2) { return alloc_real(new T(a1, a2)); } + + template + T* alloc(A1 a1, A2 a2, A3 a3) { return alloc_real(new T(a1, a2, a3)); } + + template + static unsigned int object_parse(void* data, int argc, const char** argv) + { + return (*reinterpret_cast(data))(argc, argv); + } + + static void raise(const char* fmt, ...) + __attribute__((noreturn)) + __attribute__((format(printf, 1, 2))); + +private: + typedef std::vector< std::pair > pool_t; + pool_t m_pool; + + template static void object_delete(void* data) + { + delete reinterpret_cast(data); + } + + template T* alloc_real(T* new_data) + { + std::auto_ptr data(new_data); + m_pool.push_back( pool_t::value_type( + &parser::object_delete, + reinterpret_cast(data.get()) + ) ); + return data.release(); + } + +public: + parser() { } + + ~parser() + { + for(pool_t::iterator it(m_pool.begin()); + it != m_pool.end(); ++it) { + (*it->first)(it->second); + } + } +}; + +extern std::auto_ptr s_parser; + + +namespace convert { + +// convert network interface name to IPv4 address +int netif_addr4(sockaddr_in* dst, const char* ifname, unsigned short port); + +// convert network interface name to IPv6 address +int netif_addr6(sockaddr_in6* dst, const char* ifname, unsigned short port); + +// convert network interface name to IPv4 address or IPv6 address +int netif_addr(sockaddr_in6* dst, const char* ifname, unsigned short port); + +template +bool numeric(T* dst, const char* str) +{ + std::istringstream stream(str); + stream >> *dst; + if(stream.fail() || stream.bad() || !stream.eof()) { + return false; + } + return true; +} + +} // namespace convert + + +namespace type { + +namespace detail { + template + static unsigned int parse_numeric(void* dst, int argc, const char** argv) { + if(argc < 1) { parser::raise("numeric value is required."); } + if(!convert::numeric((T*)dst, argv[0])) { +#ifndef MP_NO_CXX_ABI_H + int status; + parser::raise("%s is expected: %s", + abi::__cxa_demangle(typeid(T).name(), 0, 0, &status), + argv[0]); +#else + parser::raise("invalid number: %s", argv[0]); +#endif + } + return 1; + } +} // namespace detail + +template acceptable numeric(T* dst) +{ + return acceptable(&detail::parse_numeric, (void*)dst, true); +} + +template acceptable numeric(T* dst, T d) +{ + *dst = d; + return acceptable(&detail::parse_numeric, (void*)dst, false); +} + +} // namespace type + + +} // namespace kazuhiki + +#endif /* kazuhiki.h */ + diff --git a/src/log/Makefile.am b/src/log/Makefile.am new file mode 100644 index 0000000..01d1ab3 --- /dev/null +++ b/src/log/Makefile.am @@ -0,0 +1,38 @@ + +nodist_noinst_DATA = mlogger.h +dist_noinst_DATA = mlogger.h.erb + +noinst_LIBRARIES = libkumo_log.a + +libkumo_log_a_SOURCES = \ + mlogger.cc \ + mlogger_null.cc \ + mlogger_ostream.cc \ + mlogger_syslog.cc \ + mlogger_tty.cc \ + logpack.c \ + logpacker.cc + +# FIXME GNU make extension +%.h: %.h.erb + $(ERB) < $< > $@.tmp + mv $@.tmp $@ + +%.hpp: %.hpp.erb + $(ERB) < $< > $@.tmp + mv $@.tmp $@ + +MOSTLYCLEANFILES = mlogger.h logpack.hpp + +noinst_HEADERS = \ + mlogger.h \ + mlogger_null.h \ + mlogger_ostream.h \ + mlogger_syslog.h \ + mlogger_tty.h \ + logpack.h \ + logpack.hpp \ + logpacker.h + +$(libkumo_log_a_SOURCES): mlogger.h logpack.hpp + diff --git a/src/log/logpack.c b/src/log/logpack.c new file mode 100644 index 0000000..68a09ed --- /dev/null +++ b/src/log/logpack.c @@ -0,0 +1,278 @@ +/* + * MessagePack fast log format + * + * Copyright (C) 2008-2009 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "logpack.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef LOGPACK_ENABLE_PSHARED +typedef struct { + pthread_mutex_t mutex; +#ifdef LOGPACK_ENABLE_PSHARED_ROBUST + volatile int seqnum; +#endif +} logpack_pshared; +#endif + +struct logpack_t { + volatile int logfd; + pthread_mutex_t mutex; + char* fname; +#ifdef LOGPACK_ENABLE_PSHARED + int mapfd; + logpack_pshared* pshared; +#endif +}; + +logpack_t* logpack_new(const char* fname) +{ + logpack_t* lpk = (logpack_t*)calloc(1, sizeof(logpack_t)); + if(!lpk) { + goto err_calloc; + } + + lpk->fname = strdup(fname); + if(!lpk->fname) { + goto err_fname; + } + + if(pthread_mutex_init(&lpk->mutex, NULL) != 0) { + goto err_mutex; + } + + lpk->logfd = open(lpk->fname, O_WRONLY|O_APPEND|O_CREAT, 0640); + if(lpk->logfd < 0) { + goto err_logfd; + } + + return lpk; + +err_logfd: + pthread_mutex_destroy(&lpk->mutex); +err_mutex: + free(lpk->fname); +err_fname: + free(lpk); +err_calloc: + return NULL; +} + +#ifdef LOGPACK_ENABLE_PSHARED +static int logpack_open_pshared(const char* basename) +{ + int mapfd; + size_t flen = strlen(basename); + char* tmpname; + + tmpname = (char*)malloc(flen+8); + if(!tmpname) { + return -1; + } + + memcpy(tmpname, basename, flen); + memcpy(tmpname+flen, "-XXXXXX", 8); // '-XXXXXX' + 1(='\0') + + mapfd = mkstemp(tmpname); + if(mapfd < 0) { + free(tmpname); + return -1; + } + + if(ftruncate(mapfd, sizeof(logpack_t)) < 0 ) { + unlink(tmpname); + free(tmpname); + return -1; + } + + unlink(tmpname); + + return mapfd; +} + +logpack_t* logpack_new(const char* fname) +{ + pthread_mutexattr_t attr; + + logpack_t* lpk = (logpack_t*)calloc(1, sizeof(logpack_t)); + if(!lpk) { + goto err_calloc; + } + + lpk->fname = strdup(fname); + if(!lpk->fname) { + goto err_fname; + } + + lpk->mapfd = logpack_open_pshared(fname); + if(lpk->mapfd < 0) { + goto err_open_pshared; + } + + lpk->pshared = (logpack_pshared*)mmap(NULL, sizeof(logpack_pshared), + PROT_READ|PROT_WRITE, MAP_SHARED, mapfd, 0); + if(lpk->pshared == MAP_FAILED) { + goto err_mmap; + } + memset(lpk->pshared, 0, sizeof(logpack_pshared)); + + if(pthread_mutexattr_init(&attr) != 0) { + goto err_mutexattr; + } +#ifdef LOGPACK_ENABLE_PSHARED_ROBUST + if(pthread_mutexattr_setrobust_np(&attr, PTHREAD_MUTEX_ROBUST_NP) != 0) { + goto err_mutexattr_set; + } +#endif + if(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) != 0) { + goto err_mutexattr_set; + } + + memset(&lpk->mutex, 0, sizeof(pthread_mutex_t)); + if(pthread_mutex_init(&lpk->mutex, &attr) != 0) { + goto err_mutexattr_set; + } + + lpk->logfd = open(lpk->fname, O_WRONLY|O_APPEND|O_CREAT, 0640); + if(lpk->logfd < 0) { + goto err_logfd; + } + + return lpk; + +err_logfd: + pthread_mutex_destroy(&lpk->mutex); +err_mutexattr_set: + pthread_mutexattr_destroy(&attr); +err_mutexattr: + munmap(lpk->pshared, sizeof(logpack_pshared)); +err_mmap: + close(lpk->mapfd); +err_open_pshared: + free(lpk->fname); +err_fname: + free(lpk); +err_calloc: + return NULL; +} +#endif + +void logpack_free(logpack_t* lpk) +{ + close(lpk->logfd); + free(lpk->fname); +#ifdef LOGPACK_ENABLE_PSHARED + if(lpk->pshared) { + munmap(lpk->pshared, sizeof(logpack_pshared)); + close(lpk->mapfd); + } else { + pthread_mutex_destroy(&lpk->mutex); + } +#else + pthread_mutex_destroy(&lpk->mutex); +#endif + free(lpk); +} + +static inline int logpack_lock(logpack_t* lpk) +{ +#ifdef LOGPACK_ENABLE_PSHARED +#ifdef LOGPACK_ENABLE_PSHARED_ROBUST +retry: + int seqnum = lpk->pshared->seqnum; +#endif +#endif + +#ifdef LOGPACK_ENABLE_PSHARED + if(lpk->pshared) { + if(pthread_mutex_lock(&lpk->pshared->mutex) != 0) { +#ifdef LOGPACK_ENABLE_PSHARED_ROBUST + if(errno == EOWNERDEAD) { + if(__sync_bool_compare_and_swap(&lpk->pshared->seqnum, + seqnum, seqnum+1)) { + if(pthread_mutex_consistent_np(&lpk->mutex) != 0) { + return -1; + } + } + goto retry; + } +#endif + return -1; + } + return 0; + } +#endif + + if(pthread_mutex_lock(&lpk->mutex) != 0) { + return -1; + } + return 0; +} + +static inline void logpack_unlock(logpack_t* lpk) +{ +#ifdef LOGPACK_ENABLE_PSHARED + if(lpk->pshared) { + pthread_mutex_unlock(&lpk->pshared->mutex); + return; + } +#endif + pthread_mutex_unlock(&lpk->mutex); +} + +int logpack_reopen(logpack_t* lpk) +{ + if(logpack_lock(lpk) < 0) { return -1; } + + int tmp = open(lpk->fname, O_WRONLY|O_APPEND|O_CREAT, 0640); + if(tmp < 0) { + return -1; + } + + close(lpk->logfd); + lpk->logfd = tmp; + + logpack_unlock(lpk); + return 0; +} + +int logpack_write_raw(logpack_t* lpk, const char* buf, size_t size) +{ + int ret = 0; + if(logpack_lock(lpk) < 0) { return -1; } + + while(true) { + ssize_t rl = write(lpk->logfd, buf, size); + if(rl <= 0) { ret = -1; break; } + if((size_t)rl >= size) { break; } + size -= rl; + } + + logpack_unlock(lpk); + return ret; +} + + diff --git a/src/log/logpack.h b/src/log/logpack.h new file mode 100644 index 0000000..9c04146 --- /dev/null +++ b/src/log/logpack.h @@ -0,0 +1,55 @@ +/* + * MessagePack fast log format + * + * Copyright (C) 2008-2009 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef LOGPACK_H__ +#define LOGPACK_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef struct logpack_t logpack_t; + +logpack_t* logpack_new(const char* fname); +logpack_t* logpack_new_pshared(const char* fname); +int logpack_reopen(logpack_t* lpk); +void logpack_free(logpack_t* lpk); + +int logpack_write_raw(logpack_t* lpk, const char* buf, size_t size); + +#if 0 +typedef struct { + msgpack_pack_t packer; + msgpack_sbuffer buffer; +} logpack_log_t; + +int logpack_log_init(logpack_log_t* pac, const char* name, uint16_t version); +void logpack_log_destroy(logpack_log_t* pac); +int logpack_write(logpack_t* lpk, const logpack_log_t* pac); +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* logpack.h */ + diff --git a/src/log/logpack.hpp.erb b/src/log/logpack.hpp.erb new file mode 100644 index 0000000..4aac691 --- /dev/null +++ b/src/log/logpack.hpp.erb @@ -0,0 +1,120 @@ +// +// MessagePack fast log format +// +// Copyright (C) 2008-2009 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef LOGPACK_HPP__ +#define LOGPACK_HPP__ + +#include +#include +#include +#include +#include "logpack.h" + + +class logpack { +public: + struct PSHARED_TAG { }; + logpack(const char* fname); + logpack(const char* fname, PSHARED_TAG); + ~logpack(); + + int reopen(); + +public: +#define MSGPACK_LOG_BEGIN(NUM) \ + msgpack::sbuffer buf; \ + { \ + uint64_t zero = 0; \ + buf.write((const char*)&zero, 4); \ + } \ + msgpack::packer m(buf); \ + m.pack_array(3); \ + m.pack_uint32(fixstr(name)); \ + m.pack_uint16(version); \ + m.pack_map(NUM); +#define MSGPACK_LOG_ADD(k, v) \ + m.pack_uint32(fixstr(k)); \ + m << v; +#define MSGPACK_LOG_END \ + char* p = (char*)buf.data(); \ + size_t sz = buf.size(); \ + *(uint32_t*)p = htonl(sz - 4); \ + return write_raw(p, sz); + +<%1.upto(16) {|i|%> + template , size_t L<%=j%>, typename T<%=j%><%}%>> + int write(const char (&name)[LN], uint16_t version<%1.upto(i) {|j|%>, const char (&k<%=j%>)[L<%=j%>], T<%=j%> v<%=j%><%}%>) + { + MSGPACK_LOG_BEGIN(<%=i%>); +<%1.upto(i) {|j|%> + MSGPACK_LOG_ADD(k<%=j%>, v<%=j%>); +<%}%> + MSGPACK_LOG_END; + } +<%}%> + +private: +<%1.upto(5) {|i|%> + static uint32_t fixstr(const char (&str)[<%=i%>]) + { return 0<%0.upto(i-2) {|j|%> | (((uint32_t)str[<%=j%>])<<(<%=i-2-j%>*8))<%}%>; } +<%}%> + + int write_raw(char* p, size_t sz); + +private: + logpack_t* m_logpack; + +private: + logpack(); + logpack(const logpack&); +}; + + +inline logpack::logpack(const char* fname) +{ + m_logpack = logpack_new(fname); + if(!m_logpack) { + throw std::runtime_error("failed to initialize msgpack::logpack"); + } +} + +inline logpack::logpack(const char* fname, PSHARED_TAG) +{ + m_logpack = logpack_new_pshared(fname); + if(!m_logpack) { + throw std::runtime_error("failed to initialize msgpack::logpack"); + } +} + +inline int logpack::reopen() +{ + return logpack_reopen(m_logpack); +} + +inline logpack::~logpack() +{ + logpack_free(m_logpack); +} + +inline int logpack::write_raw(char* buf, size_t size) +{ + return logpack_write_raw(m_logpack, buf, size); +} + + +#endif /* logpack.hpp */ + diff --git a/src/log/logpacker.cc b/src/log/logpacker.cc new file mode 100644 index 0000000..4a83d88 --- /dev/null +++ b/src/log/logpacker.cc @@ -0,0 +1,14 @@ +#include "logpacker.h" + +void logpacker::initialize(const char* fname) +{ + s_instance.reset(new logpack(fname)); +} + +void logpacker::destroy() +{ + s_instance.reset(); +} + +std::auto_ptr logpacker::s_instance; + diff --git a/src/log/logpacker.h b/src/log/logpacker.h new file mode 100644 index 0000000..2d794b4 --- /dev/null +++ b/src/log/logpacker.h @@ -0,0 +1,33 @@ +#ifndef LOGPACKER_H__ +#define LOGPACKER_H__ + +#include "logpack.hpp" +#include + +class logpacker { +public: + static void initialize(const char* fname); + static void destroy(); + static void reopen() { s_instance->reopen(); } + static bool is_active() { return !!s_instance.get(); } + static logpack& instance() { return *s_instance; } +private: + static std::auto_ptr s_instance; +}; + +#define LOGPACK(name, version, ...) \ + do { \ + if(logpacker::is_active()) { \ + logpacker::instance().write(name, version, __VA_ARGS__); \ + } \ + } while(0) + +#define MLOGPACK(name, version, message, ...) \ + do { \ + if(logpacker::is_active()) { \ + logpacker::instance().write(name, version, "msg", std::string(message), __VA_ARGS__); \ + } \ + } while(0) + +#endif /* logpacker.h */ + diff --git a/src/log/mlogger.cc b/src/log/mlogger.cc new file mode 100644 index 0000000..a27d23d --- /dev/null +++ b/src/log/mlogger.cc @@ -0,0 +1,21 @@ +#include "mlogger.h" + +mlogger* mlogger::s_logger; + +void mlogger::reset(mlogger* lg) +{ + if(s_logger) { delete s_logger; } + s_logger = lg; +} + +void mlogger::destroy() +{ + delete s_logger; + s_logger = NULL; +} + + +mlogger::mlogger(level runtime_level) : + m_runtime_level(runtime_level) {} + +mlogger::~mlogger() {} diff --git a/src/log/mlogger.h.erb b/src/log/mlogger.h.erb new file mode 100644 index 0000000..87a5763 --- /dev/null +++ b/src/log/mlogger.h.erb @@ -0,0 +1,150 @@ +#ifndef MLOGGER_H__ +#define MLOGGER_H__ + +#include +#include + + +#ifndef MLOGGER_LEVEL + +#ifdef NDEBUG +#define MLOGGER_LEVEL 2 +#else +#define MLOGGER_LEVEL 0 +#endif + +#endif +<% GENERATION_LIMIT = 16 %> + +class mlogger_initializer; + +class mlogger { +public: + static void reset(mlogger* lg); + static void destroy(); + +public: + static mlogger& instance(); + +public: + enum level { + TRACE = 0, + DEBUG = 1, + INFO = 2, + WARN = 3, + ERROR = 4, + FATAL = 5, + }; + + mlogger(level runtime_level); + virtual ~mlogger(); + +#define MLOGGER_IMPL_BEGIN \ + try { \ + if(lv < m_runtime_level) { return; } \ + std::stringstream s; \ + do { \ + char tmbuf[21]; \ + time_t ti = time(NULL); \ + struct tm t; localtime_r(&ti, &t); \ + s.write(tmbuf, strftime(tmbuf, sizeof(tmbuf), "%Y-%m-%d %H:%M:%S ", &t)); \ + } while(0) + +#define MLOGGER_IMPL_END \ + std::string str(s.str()); \ + log_impl(lv, str); \ + } catch (...) { \ + std::cerr << prefix << " log error" << std::endl; \ + } + +<% 0.upto(GENERATION_LIMIT) {|i| %> + template , typename A<%=j%><%}%>> + void log(level lv, const char* prefix, A0 a0<%1.upto(i) {|j|%>, A<%=j%> a<%=j%><%}%>) { + MLOGGER_IMPL_BEGIN; + s << prefix << a0<%1.upto(i) {|j|%> << a<%=j%><%}%>; + MLOGGER_IMPL_END; + } +<%}%> + +private: + virtual void log_impl(level lv, std::string& str) = 0; + +private: + level m_runtime_level; + +private: + friend class mlogger_initializer; + static mlogger* s_logger; + +private: + mlogger(); + mlogger(const mlogger&); +}; + +inline mlogger& mlogger::instance() +{ + return *s_logger; +} + + +#include "mlogger_null.h" + +static unsigned long mlogger_initializer_counter = 0; + +class mlogger_initializer { +public: + mlogger_initializer() + { + if(0 == mlogger_initializer_counter++) { + if(mlogger::s_logger == NULL) { + mlogger::reset(new mlogger_null()); + } + } + } + ~mlogger_initializer() + { + if(0 == --mlogger_initializer_counter) { + mlogger::destroy(); + } + } +private: + void initialize(); +}; + +static mlogger_initializer mlogger_initializer_; + +#define MLOGGER_XSTR(s) #s +#define MLOGGER_XSTR_(x) MLOGGER_XSTR(x) +#define MLOGGER_LINE MLOGGER_XSTR_(__LINE__) + +#ifndef MLOGGER_PREFIX +#define MLOGGER_PREFIX __FILE__ ":" MLOGGER_LINE ": " +#endif + +#ifndef MLOGGER_PREFIX_VERBOSE +#define MLOGGER_PREFIX_VERBOSE __FILE__ ":" MLOGGER_LINE ":", __FUNCTION__, ": " +#endif + +<% LEVELS = ["TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL"] %> +<% LEVELS[0, 2].each {|l| %> +#ifndef MLOGGER_PREFIX_<%=l%> +#define MLOGGER_PREFIX_<%=l%> MLOGGER_PREFIX_VERBOSE +#endif +<%}%> +<% LEVELS[2..-1].each {|l| %> +#ifndef MLOGGER_PREFIX_<%=l%> +#define MLOGGER_PREFIX_<%=l%> MLOGGER_PREFIX +#endif +<%}%> + +<% LEVELS.each {|l| %> +#if MLOGGER_LEVEL <= <%=LEVELS.index(l)%> +#define LOG_<%=l%>(...) \ + mlogger::instance().log(mlogger::<%=l%>, MLOGGER_PREFIX_<%=l%>, __VA_ARGS__) +#else +#define LOG_<%=l%>(...) ((void)0) +#endif +<%}%> + +#endif /* mlogger.h */ + diff --git a/src/log/mlogger_null.cc b/src/log/mlogger_null.cc new file mode 100644 index 0000000..f5482e6 --- /dev/null +++ b/src/log/mlogger_null.cc @@ -0,0 +1,13 @@ +#include "mlogger.h" +#include + +mlogger_null::mlogger_null() : + mlogger((level)((int)FATAL+1)) +{} + +mlogger_null::~mlogger_null() +{} + +void mlogger_null::log_impl(level lv, std::string& str) +{ } + diff --git a/src/log/mlogger_null.h b/src/log/mlogger_null.h new file mode 100644 index 0000000..5532b15 --- /dev/null +++ b/src/log/mlogger_null.h @@ -0,0 +1,15 @@ +#ifndef MLOGGER_NULL_H__ +#define MLOGGER_NULL_H__ + +#include "mlogger.h" + +class mlogger_null : public mlogger { +public: + mlogger_null(); + ~mlogger_null(); + + void log_impl(level lv, std::string& str); +}; + +#endif /* mlogger_null.h */ + diff --git a/src/log/mlogger_ostream.cc b/src/log/mlogger_ostream.cc new file mode 100644 index 0000000..9bb4a60 --- /dev/null +++ b/src/log/mlogger_ostream.cc @@ -0,0 +1,16 @@ +#include "mlogger_ostream.h" +#include + +mlogger_ostream::mlogger_ostream(level runtime_level, std::ostream& stream) : + mlogger(runtime_level), + m_stream(stream) +{} + +mlogger_ostream::~mlogger_ostream() +{} + +void mlogger_ostream::log_impl(level lv, std::string& str) +{ + m_stream << str << std::endl; +} + diff --git a/src/log/mlogger_ostream.h b/src/log/mlogger_ostream.h new file mode 100644 index 0000000..ad3c4ff --- /dev/null +++ b/src/log/mlogger_ostream.h @@ -0,0 +1,18 @@ +#ifndef MLOGGER_OSTREAM_H__ +#define MLOGGER_OSTREAM_H__ + +#include "mlogger.h" + +class mlogger_ostream : public mlogger { +public: + mlogger_ostream(level runtime_level, std::ostream& stream); + ~mlogger_ostream(); + + void log_impl(level lv, std::string& str); + +private: + std::ostream& m_stream; +}; + +#endif /* mlogger_ostream.h */ + diff --git a/src/log/mlogger_syslog.cc b/src/log/mlogger_syslog.cc new file mode 100644 index 0000000..6c31313 --- /dev/null +++ b/src/log/mlogger_syslog.cc @@ -0,0 +1,39 @@ +#include "mlogger_syslog.h" +#include + +mlogger_syslog::mlogger_syslog(level runtime_level, const char* ident, int facility, int option) : + mlogger(runtime_level) +{ + ::openlog(ident, option, facility); +} + +mlogger_syslog::~mlogger_syslog() +{ + ::closelog(); +} + +void mlogger_syslog::log_impl(level lv, std::string& str) +{ + int priority = LOG_DEBUG; + switch(lv) { + case TRACE: + case DEBUG: + priority = LOG_DEBUG; + break; + case INFO: + priority = LOG_INFO; + break; + case WARN: + priority = LOG_NOTICE; + break; + case ERROR: + priority = LOG_ERR; + break; + case FATAL: + priority = LOG_CRIT; + break; + } + + ::syslog(priority, "%s", str.c_str()); +} + diff --git a/src/log/mlogger_syslog.h b/src/log/mlogger_syslog.h new file mode 100644 index 0000000..0f60aab --- /dev/null +++ b/src/log/mlogger_syslog.h @@ -0,0 +1,16 @@ +#ifndef MLOGGER_SYSLOG_H__ +#define MLOGGER_SYSLOG_H__ + +#include "mlogger.h" +#include + +class mlogger_syslog : public mlogger { +public: + mlogger_syslog(level runtime_level, const char* ident, int facility = LOG_USER, int option = 0); + ~mlogger_syslog(); + + void log_impl(level lv, std::string& str); +}; + +#endif /* mlogger_syslog.h */ + diff --git a/src/log/mlogger_tty.cc b/src/log/mlogger_tty.cc new file mode 100644 index 0000000..f5b29d4 --- /dev/null +++ b/src/log/mlogger_tty.cc @@ -0,0 +1,62 @@ +#include "mlogger_tty.h" +#include +#include + +#define TTY_COLOR_RESET "\033]R" +#define TTY_COLOR_CRE "\033[K" +#define TTY_COLOR_CLEAR "\033c" +#define TTY_COLOR_NORMAL "\033[0;39m" +#define TTY_COLOR_RED "\033[1;31m" +#define TTY_COLOR_GREEN "\033[1;32m" +#define TTY_COLOR_YELLOW "\033[1;33m" +#define TTY_COLOR_BLUE "\033[1;34m" +#define TTY_COLOR_MAGENTA "\033[1;35m" +#define TTY_COLOR_CYAN "\033[1;36m" +#define TTY_COLOR_WHITE "\033[1;37m" + +static const char* const color_table[] = { + TTY_COLOR_NORMAL, + TTY_COLOR_WHITE, + TTY_COLOR_GREEN, + TTY_COLOR_YELLOW, + TTY_COLOR_MAGENTA, + TTY_COLOR_RED, +}; + +mlogger_tty::mlogger_tty(level runtime_level, std::ostream& stream) : + mlogger(runtime_level), + m_stream(stream) +{} + +mlogger_tty::~mlogger_tty() +{} + +void mlogger_tty::log_impl(level lv, std::string& str) +{ + // output atomically + + size_t sz = + strlen(color_table[lv]) + + str.size() + + strlen(TTY_COLOR_NORMAL "\n"); + + char* buf = (char*)::malloc(sz); + if(!buf) { throw std::bad_alloc(); } + + char* p = buf; + memcpy(p, color_table[lv], strlen(color_table[lv])); + p += strlen(color_table[lv]); + memcpy(p, str.data(), str.size()); + p += str.size(); + memcpy(p, TTY_COLOR_NORMAL "\n", strlen(TTY_COLOR_NORMAL "\n")); + + try { + m_stream.write(buf, sz) << std::flush; + } catch (...) { + free(buf); + throw; + } + free(buf); +} + + diff --git a/src/log/mlogger_tty.h b/src/log/mlogger_tty.h new file mode 100644 index 0000000..eb2d9cd --- /dev/null +++ b/src/log/mlogger_tty.h @@ -0,0 +1,18 @@ +#ifndef MLOGGER_TTY_H__ +#define MLOGGER_TTY_H__ + +#include "mlogger.h" + +class mlogger_tty : public mlogger { +public: + mlogger_tty(level runtime_level, std::ostream& stream); + ~mlogger_tty(); + + void log_impl(level lv, std::string& str); + +private: + std::ostream& m_stream; +}; + +#endif /* mlogger_tty.h */ + diff --git a/src/logic/Makefile.am b/src/logic/Makefile.am new file mode 100644 index 0000000..7eb90c8 --- /dev/null +++ b/src/logic/Makefile.am @@ -0,0 +1,152 @@ +export ERB +export RUBY +export RAGEL + +# work around for dependency +SUBDIRS = protogen + +noinst_LIBRARIES = libkumo_logic.a libkumo_storage.a +bin_PROGRAMS = kumo-manager kumo-server kumo-gateway + +AM_CPPFLAGS = -I.. +AM_C_CPPFLAGS = -I.. + + +PROTOGEN_AUTOGEN_FILES = \ + manager/proto.h \ + manager/proto_control.h \ + manager/proto_network.h \ + manager/proto_replace.h \ + server/proto.h \ + server/proto_control.h \ + server/proto_network.h \ + server/proto_replace.h \ + server/proto_replace_stream.h \ + server/proto_store.h \ + gateway/proto.h \ + gateway/proto_network.h + + +libkumo_logic_a_SOURCES = \ + boot.cc \ + hash.cc \ + wavy_server.cc + +libkumo_storage_a_SOURCES = server/storage.cc server/storage/tchdb.cc +#libkumo_storage_a_SOURCES = server/storage.cc server/storage/luxio.cc + + +kumo_manager_SOURCES = \ + manager/control_framework.cc \ + manager/framework.cc \ + manager/main.cc \ + manager/proto_control.cc \ + manager/proto_network.cc \ + manager/proto_replace.cc + +kumo_manager_LDADD = \ + libkumo_logic.a \ + ../kazuhiki/libkazuhiki.a \ + ../rpc/libkumo_cluster.a \ + ../log/libkumo_log.a \ + ../mpsrc/libmpio.a + + +kumo_server_SOURCES = \ + server/framework.cc \ + server/main.cc \ + server/proto_control.cc \ + server/proto_network.cc \ + server/proto_replace.cc \ + server/proto_replace_stream.cc \ + server/proto_store.cc \ + server/storage.cc + +kumo_server_LDADD = \ + libkumo_logic.a \ + libkumo_storage.a \ + ../kazuhiki/libkazuhiki.a \ + ../rpc/libkumo_cluster.a \ + ../log/libkumo_log.a \ + ../mpsrc/libmpio.a + + +kumo_gateway_SOURCES = \ + gateway/framework.cc \ + gateway/interface.cc \ + gateway/main.cc \ + gateway/proto_network.cc \ + gateway/scope_store.cc \ + gateway/gate_cloudy.cc \ + gateway/gate_memproto.cc \ + gateway/gate_memtext.cc \ + gateway/memproto/memproto.c \ + gateway/memproto/memtext.c + +kumo_gateway_LDADD = \ + libkumo_logic.a \ + ../kazuhiki/libkazuhiki.a \ + ../log/libkumo_log.a \ + ../rpc/libkumo_rpc.a \ + ../mpsrc/libmpio.a + + +noinst_HEADERS = \ + server/proto.h \ + gateway/proto.h \ + manager/proto.h \ + boot.h \ + client_logic.h \ + clock.h \ + cluster_logic.h \ + global.h \ + hash.h \ + msgtype.h \ + role.h \ + rpc_server.h \ + wavy_server.h \ + manager/control_framework.h \ + manager/framework.h \ + manager/init.h \ + server/framework.h \ + server/init.h \ + server/buffer_queue.h \ + server/storage.h \ + server/storage/interface.h \ + gateway/framework.h \ + gateway/init.h \ + gateway/interface.h \ + gateway/scope_store.h \ + gateway/gate_cloudy.h \ + gateway/gate_memproto.h \ + gateway/gate_memtext.h \ + gateway/memproto/memproto.h \ + gateway/memproto/memtext.h + +nodist_noinst_HEADERS = \ + $(PROTOGEN_AUTOGEN_FILES) + +EXTRA_DIST = \ + gateway.proto.h \ + manager.proto.h \ + server.proto.h \ + gateway/memproto/memtext.rl + +gateway/memproto/memtext.c: gateway/memproto/memtext.rl + $(RAGEL) -C $< -o $@.tmp + mv $@.tmp $@ + +MOSTLYCLEANFILES = \ + $(PROTOGEN_AUTOGEN_FILES) \ + gateway/memproto/memtext.c + + +# work around for duplicated file name +kumo_manager_CFLAGS = $(AM_CFLAGS) +kumo_manager_CXXFLAGS = $(AM_CXXFLAGS) +kumo_server_CFLAGS = $(AM_CFLAGS) +kumo_server_CXXFLAGS = $(AM_CXXFLAGS) +kumo_gateway_CFLAGS = $(AM_CFLAGS) +kumo_gateway_CXXFLAGS = $(AM_CXXFLAGS) + + diff --git a/src/logic/boot.cc b/src/logic/boot.cc new file mode 100644 index 0000000..3a98058 --- /dev/null +++ b/src/logic/boot.cc @@ -0,0 +1,211 @@ +#include "logic/boot.h" +#include +#include +#include +#include +#include + +namespace kumo { + + +scoped_listen_tcp::scoped_listen_tcp(struct sockaddr_in addr) : + m_addr(addr), + m_sock(listen(m_addr)) { } + +scoped_listen_tcp::~scoped_listen_tcp() +{ + ::close(m_sock); +} + + +int scoped_listen_tcp::listen(const rpc::address& addr) +{ + int lsock = socket(PF_INET, SOCK_STREAM, 0); + if(lsock < 0) { + throw std::runtime_error("socket failed"); + } + + int on = 1; + if( ::setsockopt(lsock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0 ) { + ::close(lsock); + throw std::runtime_error("setsockopt failed"); + } + + char addrbuf[addr.addrlen()]; + addr.getaddr((sockaddr*)addrbuf); + + if( ::bind(lsock, (sockaddr*)addrbuf, sizeof(addrbuf)) < 0 ) { + ::close(lsock); + throw std::runtime_error("bind failed"); + } + + if( ::listen(lsock, 1024) < 0 ) { + ::close(lsock); + throw std::runtime_error("listen failed"); + } + + mp::set_nonblock(lsock); + + return lsock; +} + + +void do_daemonize(bool close_stdio, const char* pidfile) +{ + pid_t pid; + pid = fork(); + if(pid < 0) { perror("fork"); exit(1); } + if(pid != 0) { exit(0); } + if(setsid() == -1) { perror("setsid"); exit(1); } + pid = fork(); + if(pid < 0) { perror("fork"); exit(1); } + if(pid != 0) { exit(0); } + if(pidfile) { + FILE* f = fopen(pidfile, "w"); + if(!f) { perror("can't open pid file"); exit(1); } + fprintf(f, "%d", getpid()); + fclose(f); + } + if(close_stdio) { + int devnull_r = open("/dev/null", O_RDONLY); + if(devnull_r < 0) { perror("open(\"/dev/null\", \"r\")"); exit(1); } + int devnull_a = open("/dev/null", O_APPEND); + if(devnull_a < 0) { perror("open(\"/dev/null\"), \"a\""); exit(1); } + close(0); + close(1); + close(2); + if(dup2(devnull_r, 0) < 0) { perror("dup2"); exit(1); } + if(dup2(devnull_a, 1) < 0) { perror("dup2"); exit(1); } + if(dup2(devnull_a, 2) < 0) { perror("dup2"); exit(1); } + close(devnull_r); + close(devnull_a); + } +} + + +void init_mlogger(const std::string& logfile, bool use_tty, mlogger::level level) +{ + if(!logfile.empty()) { + // log to file + if(logfile == "-") { + mlogger::reset(new mlogger_ostream(level, std::cout)); + } else { + std::ostream* logstream = new std::ofstream(logfile.c_str(), std::ios::app); + mlogger::reset(new mlogger_ostream(level, *logstream)); + } + } else if(use_tty) { + // log to tty + mlogger::reset(new mlogger_tty(level, std::cout)); + } else { + // log to stdout + mlogger::reset(new mlogger_ostream(level, std::cout)); + } +} + + +rpc_server_args::rpc_server_args() : + keepalive_interval(2.0), + clock_interval(2.0), + connect_timeout_sec(1.0), + connect_retry_limit(4), + wthreads(2), + rthreads(4) +{ + kazuhiki::init(); +} + +rpc_server_args::~rpc_server_args() { } + +rpc_cluster_args::rpc_cluster_args() : + cluster_lsock(-1) { } + +rpc_cluster_args::~rpc_cluster_args() +{ + ::close(cluster_lsock); +} + + +void rpc_server_args::set_basic_args() +{ + using namespace kazuhiki; + on("-v", "--verbose", + type::boolean(&verbose)); + on("-o", "--log", &logfile_set, + type::string(&logfile)); + on("-g", "--binary-log", &logpack_path_set, + type::string(&logpack_path)); + on("-d", "--daemon", &pidfile_set, + type::string(&pidfile)); + on("-Ci", "--clock-interval", + type::numeric(&clock_interval, clock_interval)); + on("-Ys", "--connect-timeout", + type::numeric(&connect_timeout_sec, connect_timeout_sec)); + on("-Yn", "--connect-retry-limit", + type::numeric(&connect_retry_limit, connect_retry_limit)); + on("-TW", "--write-threads", + type::numeric(&wthreads, wthreads)); + on("-TR", "--read-threads", + type::numeric(&rthreads, rthreads)); +} + +void rpc_server_args::show_usage() +{ +std::cout << +" -Ys " "--connect-timeout connect timeout time in seconds\n" +" -Yn " "--connect-retry-limit connect retry limit\n" +" -Ci " "--clock-interval clock interval in seconds\n" +" -TW " "--write-threads number of threads for asynchronous writing\n" +" -TR " "--read-threads number of threads for asynchronous reading\n" +" -o " "--log output logs to the file\n" +" -g " "--binary-log enable binary log\n" +" -v " "--verbose\n" +" -d " "--daemon\n" +<< std::endl; +} + +void rpc_cluster_args::set_basic_args() +{ + using namespace kazuhiki; + on("-k", "--keepalive-interval", + type::numeric(&keepalive_interval, keepalive_interval)); + rpc_server_args::set_basic_args(); +} + +void rpc_cluster_args::show_usage() +{ +std::cout << +" -k " "--keepalive-interval keepalive interval in seconds\n" +; +rpc_server_args::show_usage(); +} + +void rpc_server_args::parse(int argc, char** argv) +try { + prog = argv[0]; + --argc; + ++argv; + kazuhiki::break_order(argc, argv); + + convert(); + +} catch (std::runtime_error& e) { + show_usage(); + std::cerr << "error: " << e.what() << std::endl; + exit(1); +} + +void rpc_server_args::convert() +{ + clock_interval_usec = clock_interval * 1000 * 1000; + connect_timeout_sec = connect_timeout_msec * 1000; +} + +void rpc_cluster_args::convert() +{ + keepalive_interval_usec = keepalive_interval *1000 *1000; + rpc_server_args::convert(); +} + + +} // namespace kumo + diff --git a/src/logic/boot.h b/src/logic/boot.h new file mode 100644 index 0000000..62bcd8b --- /dev/null +++ b/src/logic/boot.h @@ -0,0 +1,108 @@ +#ifndef LOGIC_BOOT_H__ +#define LOGIC_BOOT_H__ + +#include "kazuhiki/kazuhiki.h" +#include "rpc/address.h" +#include "log/mlogger_tty.h" +#include "log/mlogger_ostream.h" +#include "log/logpacker.h" +#include "logic/global.h" + +namespace kumo { + + +class scoped_listen_tcp { +public: + scoped_listen_tcp(struct sockaddr_in addr); + ~scoped_listen_tcp(); + +public: + static int listen(const rpc::address& addr); + +public: + int sock() const + { + return m_sock; + } + + rpc::address addr() const + { + return rpc::address(m_addr); + } + +private: + rpc::address m_addr; + int m_sock; + +private: + scoped_listen_tcp(); + scoped_listen_tcp(const scoped_listen_tcp&); +}; + + +void do_daemonize(bool close_stdio, const char* pidfile); + +void init_mlogger(const std::string& logfile, bool use_tty, mlogger::level level); + +struct rpc_server_args { + rpc_server_args(); + ~rpc_server_args(); + + bool verbose; + + bool logfile_set; + std::string logfile; + + bool logpack_path_set; + std::string logpack_path; + + bool pidfile_set; + std::string pidfile; + + const char* prog; + + double keepalive_interval; // sec + unsigned long keepalive_interval_usec; // convert + + double clock_interval; // sec + unsigned long clock_interval_usec; // convert + + double connect_timeout_sec; + unsigned int connect_timeout_msec; // convert + + unsigned short connect_retry_limit; + + unsigned short wthreads; + unsigned short rthreads; + +public: + virtual void set_basic_args(); + virtual void show_usage(); + + void parse(int argc, char** argv); + +protected: + virtual void convert(); +}; + + +struct rpc_cluster_args : rpc_server_args { + rpc_cluster_args(); + ~rpc_cluster_args(); + + virtual void set_basic_args(); + virtual void show_usage(); + + struct sockaddr_in cluster_addr_in; + rpc::address cluster_addr; // convert + int cluster_lsock; // convert + +protected: + virtual void convert(); +}; + + +} // namespace kumo + +#endif /* logic/boot.h */ + diff --git a/src/logic/client_logic.h b/src/logic/client_logic.h new file mode 100644 index 0000000..b131dfc --- /dev/null +++ b/src/logic/client_logic.h @@ -0,0 +1,31 @@ +#ifndef LOGIC_CLIENT_LOGIC__ +#define LOGIC_CLIENT_LOGIC__ + +#include "rpc/client.h" +#include "logic/rpc_server.h" +#include "logic/hash.h" +#include "logic/clock.h" +#include "logic/global.h" + +namespace kumo { + + +template +class client_logic : public rpc_server, public rpc::client<> { +public: + client_logic(unsigned short rthreads, unsigned short wthreads, + unsigned int connect_timeout_msec, + unsigned short connect_retry_limit) : + rpc_server(rthreads, wthreads), + rpc::client<>(connect_timeout_msec, connect_retry_limit) { } +}; + + +typedef mp::shared_ptr shared_session; +typedef mp::weak_ptr weak_session; + + +} // namespace kumo + +#endif /* logic/client_logic.h */ + diff --git a/src/logic/clock.h b/src/logic/clock.h new file mode 100644 index 0000000..783ae4c --- /dev/null +++ b/src/logic/clock.h @@ -0,0 +1,155 @@ +#ifndef LOGIC_CLOCK_H__ +#define LOGIC_CLOCK_H__ + +#include +#include +#include +#include + +// FIXME 5 sec. +#ifndef TIME_ERROR_MARGIN +#define TIME_ERROR_MARGIN 5 +#endif + +namespace kumo { + + +class ClockTime; + + +class Clock { +public: + Clock(uint32_t n = 0) : m(n) {} + ~Clock() {} + +public: + ClockTime now(); + ClockTime now_incr(); + + uint32_t get_incr() + { + //return m++; + return __sync_fetch_and_add(&m, 1); + } + + uint32_t get() const + { + return m; + } + + void update(uint32_t o) + { + while(true) { + uint32_t x = m; + if(!clock_less(x, o)) { return; } + if(__sync_bool_compare_and_swap(&m, x, o)) { + return; + } + } + //if(clock_less(m, o)) { + // m = o; + //} + } + + void increment() + { + //++m; + __sync_add_and_fetch(&m, 1); + } + + bool operator< (const Clock& o) const + { + return clock_less(m, o.m); + } + +private: + static bool clock_less(uint32_t x, uint32_t y) + { + if((x < (((uint32_t)1)<<10) && (((uint32_t)1)<<22) < y) || + (y < (((uint32_t)1)<<10) && (((uint32_t)1)<<22) < x)) { + return x > y; + } else { + return x < y; + } + } + + friend class ClockTime; + +private: + volatile uint32_t m; +}; + + +class ClockTime { +public: + ClockTime(uint32_t c, uint32_t t) : + m( (((uint64_t)t) << 32) | c ) {} + + ClockTime(uint64_t n) : m(n) {} + + ~ClockTime() {} + +public: + uint64_t get() const { return m; } + + Clock clock() const { + return Clock(m&0xffffffff); + } + + ClockTime before_sec(uint32_t sec) + { + return ClockTime( m - (((uint64_t)sec) << 32) ); + } + + bool operator== (const ClockTime& o) const + { + return m == o.m; + } + + bool operator!= (const ClockTime& o) const + { + return !(*this == o); + } + + bool operator< (const ClockTime& o) const + { + return clocktime_less(m, o.m); + } + + bool operator<= (const ClockTime& o) const + { + return (*this == o) || (*this < o); + } + +private: + static bool clocktime_less(uint64_t x, uint64_t y) + { + uint32_t xt = x>>32; + uint32_t yt = y>>32; + if( std::abs((int)(xt - yt)) < TIME_ERROR_MARGIN ) { + return Clock::clock_less(x&0xffffffff, y&0xffffffff); + } else { + return xt < yt; + } + } + +private: + volatile uint64_t m; +}; + + +inline ClockTime Clock::now() +{ + return ClockTime(get(), time(NULL)); +} + +inline ClockTime Clock::now_incr() +{ + return ClockTime(get_incr(), time(NULL)); +} + + +} // namespace kumo + +#endif /* logic/clock.h */ + diff --git a/src/logic/cluster_logic.h b/src/logic/cluster_logic.h new file mode 100644 index 0000000..398e8f4 --- /dev/null +++ b/src/logic/cluster_logic.h @@ -0,0 +1,76 @@ +#ifndef LOGIC_CLUSTER_LOGIC__ +#define LOGIC_CLUSTER_LOGIC__ + +#include "rpc/cluster.h" +#include "logic/rpc_server.h" +#include "logic/hash.h" +#include "logic/clock.h" +#include "logic/global.h" +#include "logic/role.h" + +namespace kumo { + + +using rpc::role_type; +using rpc::weak_node; +using rpc::shared_node; +using rpc::shared_peer; + + +template +class cluster_logic : public rpc_server, public rpc::cluster { +public: + cluster_logic(unsigned short rthreads, unsigned short wthreads, + role_type self_id, + const address& self_addr, + unsigned int connect_timeout_msec, + unsigned short connect_retry_limit) : + rpc_server(rthreads, wthreads), + rpc::cluster( + self_id, + self_addr, + connect_timeout_msec, + connect_retry_limit) { } + +protected: + void start_keepalive(unsigned long interval) + { + struct timespec ts = {interval / 1000000, interval % 1000000 * 1000}; + wavy::timer(&ts, mp::bind(&Framework::keep_alive, + static_cast(this))); + LOG_TRACE("start keepalive interval = ",interval," usec"); + } + +protected: + void listen_cluster(int fd) + { + using namespace mp::placeholders; + wavy::listen(fd, mp::bind( + &Framework::cluster_accepted, this, + _1, _2)); + } + +private: + void cluster_accepted(int fd, int err) + { + if(fd < 0) { + LOG_FATAL("accept failed: ",strerror(err)); + static_cast(this)->signal_end(); + return; + } + LOG_DEBUG("accept cluster fd=",fd); + static_cast(this)->rpc::cluster::accepted(fd); + } +}; + + +#define REQUIRE_SSLK const pthread_scoped_lock& sslk +#define REQUIRE_HSLK const pthread_scoped_lock& hslk +#define REQUIRE_RELK const pthread_scoped_lock& relk +#define REQUIRE_STLK const pthread_scoped_lock& stlk + + +} // namespace kumo + +#endif /* logic/cluster_logic.h */ + diff --git a/src/logic/gateway.proto.h b/src/logic/gateway.proto.h new file mode 100644 index 0000000..56de490 --- /dev/null +++ b/src/logic/gateway.proto.h @@ -0,0 +1,31 @@ +#include "gateway/proto.h" +#include "logic/msgtype.h" +#include "logic/client_logic.h" +#include +#include +#include + +namespace kumo { +namespace gateway { + + +@message proto_network::HashSpacePush = 48 + + +@rpc proto_network + message HashSpacePush.1 { + msgtype::HSSeed wseed; + msgtype::HSSeed rseed; + // acknowledge: true + }; + +public: + void renew_hash_space(); + void renew_hash_space_for(const address& addr); + RPC_REPLY_DECL(HashSpaceRequest_1, from, res, err, life); +@end + + +} // namespace gateway +} // namespace kumo + diff --git a/src/logic/gateway/framework.cc b/src/logic/gateway/framework.cc new file mode 100644 index 0000000..b717e33 --- /dev/null +++ b/src/logic/gateway/framework.cc @@ -0,0 +1,80 @@ +#include "gateway/framework.h" + +namespace kumo { +namespace gateway { + + +std::auto_ptr net; +std::auto_ptr share; + + +void framework::dispatch( + shared_session from, weak_responder response, + rpc::method_id method, rpc::msgobj param, auto_zone z) +{ + switch(method.get()) { + RPC_DISPATCH(proto_network, HashSpacePush_1); + default: + // FIXME exception class + throw std::runtime_error("unknown method"); + } +} + + +void framework::step_timeout() +{ + rpc::client<>::step_timeout(); +} + + +void framework::session_lost(const address& addr, shared_session& s) +{ + LOG_INFO("lost session ",addr); + if(addr == share->manager1() || addr == share->manager2()) { + m_proto_network.renew_hash_space_for(addr); + } +} + + +inline void framework::submit(get_request& req) +{ + m_scope_store.Get(req.callback, req.user, req.life, + req.key, req.keylen, req.hash); +} + +inline void framework::submit(set_request& req) +{ + m_scope_store.Set(req.callback, req.user, req.life, + req.key, req.keylen, req.hash, + req.val, req.vallen); +} + +inline void framework::submit(delete_request& req) +{ + m_scope_store.Delete(req.callback, req.user, req.life, + req.key, req.keylen, req.hash); +} + + +// interface.h: +void submit(get_request& req) +{ + net->submit(req); +} + +// interface.h: +void submit(set_request& req) +{ + net->submit(req); +} + +// interface.h: +void submit(delete_request& req) +{ + net->submit(req); +} + + +} // namespace gateway +} // namespace kumo + diff --git a/src/logic/gateway/framework.h b/src/logic/gateway/framework.h new file mode 100644 index 0000000..f661dbf --- /dev/null +++ b/src/logic/gateway/framework.h @@ -0,0 +1,102 @@ +#ifndef GATEWAY_FRAMEWORK_H__ +#define GATEWAY_FRAMEWORK_H__ + +#include "logic/client_logic.h" +#include "gateway/proto_network.h" +#include "gateway/scope_store.h" + +namespace kumo { +namespace gateway { + + +class framework : public client_logic { +public: + template + framework(const Config& cfg); + + void dispatch( + shared_session from, weak_responder response, + rpc::method_id method, rpc::msgobj param, auto_zone z); + + void session_lost(const address& addr, shared_session& s); + + void step_timeout(); + +public: + void submit(get_request& req); + void submit(set_request& req); + void submit(delete_request& req); + +private: + proto_network m_proto_network; + scope_store m_scope_store; + +public: + proto_network& scope_proto_network() { return m_proto_network; } + +public: + shared_session get_server(const address& addr) + { + return get_session(addr); + } + +private: + framework(); + framework(const framework&); +}; + + +class resource { +public: + template + resource(const Config& cfg); + +private: + mp::pthread_rwlock m_hs_rwlock; + HashSpace m_rhs; + HashSpace m_whs; + + const address m_manager1; + const address m_manager2; + + const bool m_cfg_async_replicate_set; + const bool m_cfg_async_replicate_delete; + + const unsigned short m_cfg_get_retry_num; + const unsigned short m_cfg_set_retry_num; + const unsigned short m_cfg_delete_retry_num; + + const unsigned short m_cfg_renew_threshold; + +public: + RESOURCE_ACCESSOR(mp::pthread_rwlock, hs_rwlock); + RESOURCE_ACCESSOR(HashSpace, rhs); + RESOURCE_ACCESSOR(HashSpace, whs); + + RESOURCE_CONST_ACCESSOR(address, manager1); + RESOURCE_CONST_ACCESSOR(address, manager2); + + RESOURCE_CONST_ACCESSOR(bool, cfg_async_replicate_set); + RESOURCE_CONST_ACCESSOR(bool, cfg_async_replicate_delete); + + RESOURCE_CONST_ACCESSOR(unsigned short, cfg_get_retry_num); + RESOURCE_CONST_ACCESSOR(unsigned short, cfg_set_retry_num); + RESOURCE_CONST_ACCESSOR(unsigned short, cfg_delete_retry_num); + + RESOURCE_CONST_ACCESSOR(unsigned short, cfg_renew_threshold); + +private: + resource(); + resource(const resource&); +}; + + +extern std::auto_ptr net; +extern std::auto_ptr share; + + +} // namespace gateway +} // namespace kumo + +#endif /* gateway/framework.h */ + diff --git a/src/logic/gateway/gate_cloudy.cc b/src/logic/gateway/gate_cloudy.cc new file mode 100644 index 0000000..ca11191 --- /dev/null +++ b/src/logic/gateway/gate_cloudy.cc @@ -0,0 +1,488 @@ +#include "gateway/gate_cloudy.h" +#include "gateway/memproto/memproto.h" +#include "log/mlogger.h" +#include "gateway/framework.h" // FIXME net->signal_end() +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace kumo { + + +static const size_t CLOUDY_INITIAL_ALLOCATION_SIZE = 16*1024; +static const size_t CLOUDY_RESERVE_SIZE = 1024; + +Cloudy::Cloudy(int lsock) : + m_lsock(lsock) { } + +Cloudy::~Cloudy() {} + + +void Cloudy::accepted(int fd, int err) +{ + if(fd < 0) { + LOG_FATAL("accept failed: ",strerror(err)); + gateway::net->signal_end(); // FIXME gateway::fatal_end() + return; + } + LOG_DEBUG("accept memproto text user fd=",fd); + wavy::add(fd); +} + +void Cloudy::listen() +{ + using namespace mp::placeholders; + wavy::listen(m_lsock, + mp::bind(&Cloudy::accepted, _1, _2)); +} + + +class Cloudy::Connection : public wavy::handler { +public: + Connection(int fd); + ~Connection(); + +public: + void read_event(); + +private: + // get, getq, getk, getkq + inline void memproto_getx(memproto_header* h, const char* key, uint16_t keylen); + + // set + inline void memproto_set(memproto_header* h, const char* key, uint16_t keylen, + const char* val, uint32_t vallen, + uint32_t flags, uint32_t expiration); + + // delete + inline void memproto_delete(memproto_header* h, const char* key, uint16_t keylen, + uint32_t expiration); + +private: + memproto_parser m_memproto; + mp::stream_buffer m_buffer; + + typedef gateway::get_request get_request; + typedef gateway::set_request set_request; + typedef gateway::delete_request delete_request; + + typedef gateway::get_response get_response; + typedef gateway::set_response set_response; + typedef gateway::delete_response delete_response; + + typedef rpc::shared_zone shared_zone; + + shared_zone m_zone; + + struct Queue { + Queue() : m_valid(true) { } + ~Queue() { } + int is_valid() const { return m_valid; } + void invalidate() { m_valid = false; } + private: + bool m_valid; + }; + + typedef mp::shared_ptr SharedQueue; + SharedQueue m_queue; + + + struct Responder { + Responder(memproto_header* h, int fd, SharedQueue& queue) : + m_fd(fd), m_h(*h), m_queue(queue) { } + ~Responder() { } + + bool is_valid() const { return m_queue->is_valid(); } + + int fd() const { return m_fd; } + + protected: + void send_response_nodata( + uint8_t status, uint64_t cas); + + void send_response( + shared_zone& life, + uint8_t status, + const char* key, uint16_t keylen, + const void* val, uint16_t vallen, + const char* extra, uint16_t extralen, + uint64_t cas); + + private: + static inline void pack_header( + char* hbuf, uint16_t status, uint8_t op, + uint16_t keylen, uint32_t vallen, uint8_t extralen, + uint32_t opaque, uint64_t cas); + + private: + int m_fd; + memproto_header m_h; + SharedQueue m_queue; + }; + + struct ResGet : Responder { + ResGet(memproto_header* h, int fd, SharedQueue& queue) : + Responder(h, fd, queue) { } + ~ResGet() { } + void response(get_response& res); + public: + void set_req_key() { m_req_key = true; } + void set_req_quiet() { m_req_quiet = true; } + private: + bool m_req_key; + bool m_req_quiet; + }; + + struct ResSet : Responder { + ResSet(memproto_header* h, int fd, SharedQueue& queue) : + Responder(h, fd, queue) { } + ~ResSet() { } + void response(set_response& res); + void no_response(set_response& res); + }; + + struct ResDelete : Responder { + ResDelete(memproto_header* h, int fd, SharedQueue& queue) : + Responder(h, fd, queue) { } + ~ResDelete() { } + void response(delete_response& res); + void no_response(delete_response& res); + }; + +private: + Connection(); + Connection(const Connection&); +}; + + +Cloudy::Connection::Connection(int fd) : + wavy::handler(fd), + m_buffer(CLOUDY_INITIAL_ALLOCATION_SIZE), + m_zone(new msgpack::zone()), + m_queue(new Queue()) +{ + void (*cmd_getx)(void*, memproto_header*, + const char*, uint16_t) = &mp::object_callback + ::mem_fun; + + void (*cmd_set)(void*, memproto_header*, + const char*, uint16_t, + const char*, uint32_t, + uint32_t, uint32_t) = &mp::object_callback + ::mem_fun; + + void (*cmd_delete)(void*, memproto_header*, + const char*, uint16_t, + uint32_t) = &mp::object_callback + ::mem_fun; + + memproto_callback cb = { + cmd_getx, // get + cmd_set, // set + NULL, // add + NULL, // replace + cmd_delete, // delete + NULL, // increment + NULL, // decrement + NULL, // quit + NULL, // flush + cmd_getx, // getq + NULL, // noop + NULL, // version + cmd_getx, // getk + cmd_getx, // getkq + NULL, // append + NULL, // prepend + }; + + memproto_parser_init(&m_memproto, &cb, this); +} + +Cloudy::Connection::~Connection() +{ + m_queue->invalidate(); +} + + +void Cloudy::Connection::read_event() +try { + m_buffer.reserve_buffer(CLOUDY_RESERVE_SIZE); + + size_t rl = ::read(fd(), m_buffer.buffer(), m_buffer.buffer_capacity()); + if(rl < 0) { + if(errno == EAGAIN || errno == EINTR) { + return; + } else { + throw std::runtime_error("read error"); + } + } else if(rl == 0) { + LOG_DEBUG("connection closed: ",strerror(errno)); + throw std::runtime_error("connection closed"); + } + + m_buffer.buffer_consumed(rl); + + do { + size_t off = 0; + int ret = memproto_parser_execute(&m_memproto, + (char*)m_buffer.data(), m_buffer.data_size(), &off); + + if(ret == 0) { + break; + } + + if(ret < 0) { + //std::cout << "parse error " << ret << std::endl; + throw std::runtime_error("parse error"); + } + + m_buffer.data_used(off); + + m_zone->push_finalizer( + &mp::object_delete, + m_buffer.release()); + + ret = memproto_dispatch(&m_memproto); + if(ret <= 0) { + LOG_DEBUG("unknown command ",(uint16_t)-ret); + throw std::runtime_error("unknown command"); + } + + m_zone.reset(new msgpack::zone()); + + } while(m_buffer.data_size() > 0); + +} catch (std::exception& e) { + LOG_DEBUG("memcached binary protocol error: ",e.what()); + throw; +} catch (...) { + LOG_DEBUG("memcached binary protocol error: unknown error"); + throw; +} + + +void Cloudy::Connection::memproto_getx(memproto_header* h, const char* key, uint16_t keylen) +{ + LOG_TRACE("getx"); + + bool cmd_k = (h->opcode == MEMPROTO_CMD_GETK || h->opcode == MEMPROTO_CMD_GETKQ); + bool cmd_q = (h->opcode == MEMPROTO_CMD_GETQ || h->opcode == MEMPROTO_CMD_GETKQ); + + ResGet* ctx = m_zone->allocate(h, fd(), m_queue); + if(cmd_k) { ctx->set_req_key(); } + if(cmd_q) { ctx->set_req_quiet(); } + + get_request req; + req.keylen = keylen; + req.key = key; + req.hash = gateway::stdhash(req.key, req.keylen); + req.user = (void*)ctx; + req.callback = &mp::object_callback + ::mem_fun; + req.life = m_zone; + + gateway::submit(req); +} + +void Cloudy::Connection::memproto_set(memproto_header* h, const char* key, uint16_t keylen, + const char* val, uint32_t vallen, + uint32_t flags, uint32_t expiration) +{ + LOG_TRACE("set"); + + if(h->cas || flags || expiration) { + // FIXME error response + throw std::runtime_error("memcached binary protocol: invalid argument"); + } + + ResSet* ctx = m_zone->allocate(h, fd(), m_queue); + set_request req; + req.keylen = keylen; + req.key = key; + req.vallen = vallen; + req.hash = gateway::stdhash(req.key, req.keylen); + req.val = val; + req.user = (void*)ctx; + req.callback = &mp::object_callback + ::mem_fun; + req.life = m_zone; + + gateway::submit(req); +} + +void Cloudy::Connection::memproto_delete(memproto_header* h, const char* key, uint16_t keylen, + uint32_t expiration) +{ + LOG_TRACE("delete"); + + if(expiration) { + // FIXME error response + throw std::runtime_error("memcached binary protocol: invalid argument"); + } + + ResDelete* ctx = m_zone->allocate(h, fd(), m_queue); + delete_request req; + req.key = key; + req.keylen = keylen; + req.hash = gateway::stdhash(req.key, req.keylen); + req.user = (void*)ctx; + req.callback = &mp::object_callback + ::mem_fun; + req.life = m_zone; + + gateway::submit(req); +} + + +namespace { + static const uint32_t ZERO_FLAG = 0; +} // noname namespace + +void Cloudy::Connection::ResGet::response(get_response& res) +{ + if(!is_valid()) { return; } + LOG_TRACE("get response"); + + if(res.error) { + // error + if(m_req_quiet) { return; } + LOG_TRACE("getx res err"); + send_response_nodata(MEMPROTO_RES_INVALID_ARGUMENTS, 0); + return; + } + + if(!res.val) { + // not found + if(m_req_quiet) { return; } + send_response_nodata(MEMPROTO_RES_KEY_NOT_FOUND, 0); + return; + } + + // found + send_response(res.life, MEMPROTO_RES_NO_ERROR, + res.key, (m_req_key ? res.keylen : 0), + res.val, res.vallen, + (char*)&ZERO_FLAG, 4, + 0); +} + +void Cloudy::Connection::ResSet::response(set_response& res) +{ + if(!is_valid()) { return; } + LOG_TRACE("set response"); + + if(res.error) { + // error + send_response_nodata(MEMPROTO_RES_OUT_OF_MEMORY, 0); + return; + } + + // stored + send_response_nodata(MEMPROTO_RES_NO_ERROR, 0); +} + +void Cloudy::Connection::ResDelete::response(delete_response& res) +{ + if(!is_valid()) { return; } + LOG_TRACE("delete response"); + + if(res.error) { + // error + send_response_nodata(MEMPROTO_RES_INVALID_ARGUMENTS, 0); + return; + } + + if(res.deleted) { + send_response_nodata(MEMPROTO_RES_NO_ERROR, 0); + } else { + send_response_nodata(MEMPROTO_RES_OUT_OF_MEMORY, 0); + } +} + + +void Cloudy::Connection::Responder::pack_header( + char* hbuf, uint16_t status, uint8_t op, + uint16_t keylen, uint32_t vallen, uint8_t extralen, + uint32_t opaque, uint64_t cas) +{ + hbuf[0] = 0x81; + hbuf[1] = op; + *(uint16_t*)&hbuf[2] = htons(keylen); + hbuf[4] = extralen; + hbuf[5] = 0x00; + *(uint16_t*)&hbuf[6] = htons(status); + *(uint32_t*)&hbuf[8] = htonl(vallen + keylen + extralen); + *(uint32_t*)&hbuf[12] = htonl(opaque); + *(uint32_t*)&hbuf[16] = htonl((uint32_t)(cas>>32)); + *(uint32_t*)&hbuf[20] = htonl((uint32_t)(cas&0xffffffff)); +} + +void Cloudy::Connection::Responder::send_response_nodata( + uint8_t status, uint64_t cas) +{ + char* header = (char*)::malloc(MEMPROTO_HEADER_SIZE); + if(!header) { throw std::bad_alloc(); } + pack_header(header, status, m_h.opcode, + 0, 0, 0, + m_h.opaque, cas); + wavy::request req(&::free, header); + wavy::write(m_fd, header, MEMPROTO_HEADER_SIZE, req); +} + +inline void Cloudy::Connection::Responder::send_response( + shared_zone& life, + uint8_t status, + const char* key, uint16_t keylen, + const void* val, uint16_t vallen, + const char* extra, uint16_t extralen, + uint64_t cas) +{ + char* header = (char*)life->malloc(24); + pack_header(header, status, m_h.opcode, + keylen, vallen, extralen, + m_h.opaque, cas); + + struct iovec vb[4]; + + vb[0].iov_base = header; + vb[0].iov_len = 24; + size_t cnt = 1; + + if(extralen > 0) { + vb[cnt].iov_base = const_cast(extra); + vb[cnt].iov_len = extralen; + ++cnt; + } + + if(keylen > 0) { + vb[cnt].iov_base = const_cast(key); + vb[cnt].iov_len = keylen; + ++cnt; + } + + if(vallen > 0) { + vb[cnt].iov_base = const_cast(val); + vb[cnt].iov_len = vallen; + ++cnt; + } + + wavy::request req(&mp::object_delete, new shared_zone(life)); + wavy::writev(m_fd, vb, cnt, req); +} + + +} // namespace kumo + diff --git a/src/logic/gateway/gate_cloudy.h b/src/logic/gateway/gate_cloudy.h new file mode 100644 index 0000000..9423196 --- /dev/null +++ b/src/logic/gateway/gate_cloudy.h @@ -0,0 +1,30 @@ +#ifndef GATEWAY_GATE_CLOUDY_H__ +#define GATEWAY_GATE_CLOUDY_H__ + +#include "gateway/interface.h" + +namespace kumo { + + +class Cloudy : public gateway::gate { +public: + Cloudy(int lsock); + ~Cloudy(); + + static void accepted(int fd, int err); + void listen(); + +private: + class Connection; + int m_lsock; + +private: + Cloudy(); + Cloudy(const Cloudy&); +}; + + +} // namespace kumo + +#endif /* gateway/gate_cloudy.h */ + diff --git a/src/logic/gateway/gate_memproto.cc b/src/logic/gateway/gate_memproto.cc new file mode 100644 index 0000000..de8e6d9 --- /dev/null +++ b/src/logic/gateway/gate_memproto.cc @@ -0,0 +1,697 @@ +#include "gateway/gate_memproto.h" +#include "gateway/memproto/memproto.h" +#include "log/mlogger.h" +#include "gateway/framework.h" // FIXME net->signal_end() +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace kumo { + + +static const size_t MEMPROTO_INITIAL_ALLOCATION_SIZE = 2048; +static const size_t MEMPROTO_RESERVE_SIZE = 1024; + + +Memproto::Memproto(int lsock) : + m_lsock(lsock) { } + +Memproto::~Memproto() {} + + +void Memproto::accepted(int fd, int err) +{ + if(fd < 0) { + LOG_FATAL("accept failed: ",strerror(err)); + gateway::net->signal_end(); // FIXME gateway::fatal_end() + return; + } + LOG_DEBUG("accept memproto text user fd=",fd); + wavy::add(fd); +} + +void Memproto::listen() +{ + using namespace mp::placeholders; + wavy::listen(m_lsock, + mp::bind(&Memproto::accepted, _1, _2)); +} + + + +class Memproto::Connection : public wavy::handler { +public: + Connection(int fd); + ~Connection(); + +public: + void read_event(); + +private: + // get, getq, getk, getkq + inline void request_getx(memproto_header* h, + const char* key, uint16_t keylen); + + // set + inline void request_set(memproto_header* h, + const char* key, uint16_t keylen, + const char* val, uint32_t vallen, + uint32_t flags, uint32_t expiration); + + // delete + inline void request_delete(memproto_header* h, + const char* key, uint16_t keylen, + uint32_t expiration); + + // noop + inline void request_noop(memproto_header* h); + + inline void request_flush(memproto_header* h, + uint32_t expiration); + +private: + memproto_parser m_memproto; + mp::stream_buffer m_buffer; + + typedef gateway::get_request gw_get_request; + typedef gateway::set_request gw_set_request; + typedef gateway::delete_request gw_delete_request; + + typedef gateway::get_response gw_get_response; + typedef gateway::set_response gw_set_response; + typedef gateway::delete_response gw_delete_response; + + typedef rpc::shared_zone shared_zone; + + shared_zone m_zone; + + + struct entry; + + + class response_queue { + public: + response_queue(int fd); + ~response_queue(); + + void push_entry(entry* e, shared_zone& life); + void reached_try_send(entry* e, shared_zone& life, + struct iovec* vec, size_t veclen); + + int is_valid() const; + void invalidate(); + + private: + bool m_valid; + int m_fd; + + struct element_t { + entry* e; + shared_zone life; + struct iovec* vec; + size_t veclen; + }; + + mp::pthread_mutex m_queue_mutex; + + typedef std::deque queue_t; + queue_t m_queue; + + struct find_entry_compare; + + private: + response_queue(); + response_queue(const response_queue&); + }; + + typedef mp::shared_ptr shared_entry_queue; + shared_entry_queue m_queue; + + + struct entry { + shared_entry_queue queue; + memproto_header header; + }; + + + // get, getq, getk, getkq + struct get_entry : entry { + bool flag_key; + bool flag_quiet; + }; + static void response_getx(void* user, gw_get_response& res); + + + // set + struct set_entry : entry { + }; + static void response_set(void* user, gw_set_response& res); + + + // delete + struct delete_entry : entry { + }; + static void response_delete(void* user, gw_delete_response& res); + + + static void send_response_nosend(entry* e, shared_zone& life); + + static void send_response_nodata(entry* e, shared_zone& life, + uint8_t status); + + static void send_response(entry* e, shared_zone& life, + uint8_t status, + const char* key, uint16_t keylen, + const void* val, uint16_t vallen, + const char* extra, uint16_t extralen); + + static inline void pack_header( + char* hbuf, uint16_t status, uint8_t op, + uint16_t keylen, uint32_t vallen, uint8_t extralen, + uint32_t opaque, uint64_t cas); + +private: + Connection(); + Connection(const Connection&); +}; + + +Memproto::Connection::Connection(int fd) : + wavy::handler(fd), + m_buffer(MEMPROTO_INITIAL_ALLOCATION_SIZE), + m_zone(new msgpack::zone()), + m_queue(new response_queue(fd)) +{ + void (*cmd_getx)(void*, memproto_header*, + const char*, uint16_t) = &mp::object_callback + ::mem_fun; + + void (*cmd_set)(void*, memproto_header*, + const char*, uint16_t, + const char*, uint32_t, + uint32_t, uint32_t) = &mp::object_callback + ::mem_fun; + + void (*cmd_delete)(void*, memproto_header*, + const char*, uint16_t, + uint32_t) = &mp::object_callback + ::mem_fun; + + void (*cmd_noop)(void*, memproto_header*) = + &mp::object_callback + ::mem_fun; + + void (*cmd_flush)(void*, memproto_header*, + uint32_t) = &mp::object_callback + ::mem_fun; + + memproto_callback cb = { + cmd_getx, // get + cmd_set, // set + NULL, // add + NULL, // replace + cmd_delete, // delete + NULL, // increment + NULL, // decrement + NULL, // quit + cmd_flush, // flush + cmd_getx, // getq + cmd_noop, // noop + NULL, // version + cmd_getx, // getk + cmd_getx, // getkq + NULL, // append + NULL, // prepend + }; + + memproto_parser_init(&m_memproto, &cb, this); +} + +Memproto::Connection::~Connection() +{ + m_queue->invalidate(); +} + + +struct Memproto::Connection::response_queue::find_entry_compare { + find_entry_compare(entry* key) : m_key(key) { } + + bool operator() (const element_t& elem) + { + return elem.e == m_key; + } + + entry* m_key; +}; + +Memproto::Connection::response_queue::response_queue(int fd) : + m_valid(true), m_fd(fd) { } + +Memproto::Connection::response_queue::~response_queue() { } + +inline void Memproto::Connection::response_queue::push_entry( + entry* e, shared_zone& life) +{ + element_t m = {e, life}; + + mp::pthread_scoped_lock mqlk(m_queue_mutex); + m_queue.push_back(m); +} + +inline void Memproto::Connection::response_queue::reached_try_send( + entry* e, shared_zone& life, + struct iovec* vec, size_t veclen) +{ + mp::pthread_scoped_lock mqlk(m_queue_mutex); + + queue_t::iterator found = std::find_if(m_queue.begin(), m_queue.end(), + find_entry_compare(e)); + + if(found == m_queue.end()) { + // FIXME log? + return; + } + + found->e = NULL; + found->life = life; + found->vec = vec; + found->veclen = veclen; + + do { + element_t& elem(m_queue.front()); + + if(elem.e) { + break; + } + + if(elem.veclen > 0) { + wavy::request req(&mp::object_delete, new shared_zone(elem.life)); + wavy::writev(m_fd, elem.vec, elem.veclen, req); + } + + m_queue.pop_front(); + } while(!m_queue.empty()); + +#if 0 + size_t reqlen = 0; + + queue_t::iterator qlast = m_queue.begin(); + for(queue_t::const_iterator qend = m_queue.end(); qlast != qend; ++qlast) { + if(qlast->e) { break; } + reqlen += qlast->veclen; + } + + if(reqlen == 0) { return; } + + // optimize + //if(m_queue.begin() + 1 == qlast) { + //} + + typedef mp::wavy::output::request wavy_request; + + struct iovec* const vb = (struct iovec*)found->life->malloc( + sizeof(struct iovec) * reqlen); + + wavy_request* const vr = (wavy_request*)found->life->malloc( + sizeof(wavy_request) * reqlen); + + memset(vr, 0, sizeof(wavy_request) * reqlen); + + + struct iovec* vbp = vb; + wavy_request* vrp = vr; + + for(queue_t::const_iterator q(m_queue.begin()); q != qlast; ++q) { + memcpy(vbp, q->vec, sizeof(struct iovec) * q->veclen); + + vrp[q->veclen-1] = wavy_request( + &mp::object_delete, + new shared_zone(q->life)); + + vbp += q->veclen; + vrp += q->veclen; + } + + m_queue.erase(m_queue.begin(), qlast); + + mqlk.unlock(); + + wavy::writev(m_fd, vb, vr, reqlen); +#endif +} + +inline int Memproto::Connection::response_queue::is_valid() const +{ + return m_valid; +} + +inline void Memproto::Connection::response_queue::invalidate() +{ + m_valid = false; +} + + +void Memproto::Connection::read_event() +try { + m_buffer.reserve_buffer(MEMPROTO_RESERVE_SIZE); + + size_t rl = ::read(fd(), m_buffer.buffer(), m_buffer.buffer_capacity()); + if(rl < 0) { + if(errno == EAGAIN || errno == EINTR) { + return; + } else { + throw std::runtime_error("read error"); + } + } else if(rl == 0) { + LOG_DEBUG("connection closed: ",strerror(errno)); + throw std::runtime_error("connection closed"); + } + + m_buffer.buffer_consumed(rl); + + do { + size_t off = 0; + int ret = memproto_parser_execute(&m_memproto, + (char*)m_buffer.data(), m_buffer.data_size(), &off); + + if(ret == 0) { + break; + } + + if(ret < 0) { + //std::cout << "parse error " << ret << std::endl; + throw std::runtime_error("parse error"); + } + + m_buffer.data_used(off); + + m_zone->push_finalizer( + &mp::object_delete, + m_buffer.release()); + + ret = memproto_dispatch(&m_memproto); + if(ret <= 0) { + LOG_DEBUG("unknown command ",(uint16_t)-ret); + throw std::runtime_error("unknown command"); + } + + m_zone.reset(new msgpack::zone()); + + } while(m_buffer.data_size() > 0); + +} catch (std::exception& e) { + LOG_DEBUG("memcached binary protocol error: ",e.what()); + throw; +} catch (...) { + LOG_DEBUG("memcached binary protocol error: unknown error"); + throw; +} + +void Memproto::Connection::request_getx(memproto_header* h, + const char* key, uint16_t keylen) +{ + LOG_TRACE("getx"); + + get_entry* e = m_zone->allocate(); + e->queue = m_queue; + e->header = *h; + e->flag_key = (h->opcode == MEMPROTO_CMD_GETK || h->opcode == MEMPROTO_CMD_GETKQ); + e->flag_quiet = (h->opcode == MEMPROTO_CMD_GETQ || h->opcode == MEMPROTO_CMD_GETKQ); + + gw_get_request req; + req.keylen = keylen; + req.key = key; + req.hash = gateway::stdhash(req.key, req.keylen); + req.life = m_zone; + req.user = reinterpret_cast(e); + req.callback = &Connection::response_getx; + + m_queue->push_entry(e, m_zone); + + gateway::submit(req); +} + +void Memproto::Connection::request_set(memproto_header* h, + const char* key, uint16_t keylen, + const char* val, uint32_t vallen, + uint32_t flags, uint32_t expiration) +{ + LOG_TRACE("set"); + + if(h->cas || flags || expiration) { + // FIXME error response + throw std::runtime_error("memcached binary protocol: invalid argument"); + } + + set_entry* e = m_zone->allocate(); + e->queue = m_queue; + e->header = *h; + + gw_set_request req; + req.keylen = keylen; + req.key = key; + req.vallen = vallen; + req.hash = gateway::stdhash(req.key, req.keylen); + req.val = val; + req.life = m_zone; + req.user = reinterpret_cast(e); + req.callback = &Connection::response_set; + + m_queue->push_entry(e, m_zone); + + gateway::submit(req); +} + +void Memproto::Connection::request_delete(memproto_header* h, + const char* key, uint16_t keylen, + uint32_t expiration) +{ + LOG_TRACE("delete"); + + if(expiration) { + // FIXME error response + throw std::runtime_error("memcached binary protocol: invalid argument"); + } + + delete_entry* e = m_zone->allocate(); + e->queue = m_queue; + e->header = *h; + + gw_delete_request req; + req.key = key; + req.keylen = keylen; + req.hash = gateway::stdhash(req.key, req.keylen); + req.life = m_zone; + req.user = reinterpret_cast(e); + req.callback = &Connection::response_delete; + + m_queue->push_entry(e, m_zone); + + gateway::submit(req); +} + +void Memproto::Connection::request_noop(memproto_header* h) +{ + LOG_TRACE("noop"); + + entry* e = m_zone->allocate(); + e->queue = m_queue; + e->header = *h; + + m_queue->push_entry(e, m_zone); + send_response_nodata(e, m_zone, MEMPROTO_RES_NO_ERROR); +} + +void Memproto::Connection::request_flush(memproto_header* h, + uint32_t expiration) +{ + LOG_TRACE("flush"); + + if(expiration) { + // FIXME error response + throw std::runtime_error("memcached binary protocol: invalid argument"); + } + + entry* e = m_zone->allocate(); + e->queue = m_queue; + e->header = *h; + + m_queue->push_entry(e, m_zone); + send_response_nodata(e, m_zone, MEMPROTO_RES_NO_ERROR); +} + +namespace { + static const uint32_t ZERO_FLAG = 0; +} // noname namespace + +void Memproto::Connection::response_getx(void* user, gw_get_response& res) +{ + get_entry* e = reinterpret_cast(user); + if(!e->queue->is_valid()) { return; } + + LOG_TRACE("get response"); + + if(res.error) { + // error + if(e->flag_quiet) { + send_response_nosend(e, res.life); + return; + } + LOG_TRACE("getx res err"); + send_response_nodata(e, res.life, MEMPROTO_RES_INVALID_ARGUMENTS); + return; + } + + if(!res.val) { + // not found + if(e->flag_quiet) { + send_response_nosend(e, res.life); + return; + } + send_response_nodata(e, res.life, MEMPROTO_RES_KEY_NOT_FOUND); + return; + } + + // found + send_response(e, res.life, MEMPROTO_RES_NO_ERROR, + res.key, (e->flag_key ? res.keylen : 0), + res.val, res.vallen, + (char*)&ZERO_FLAG, 4); +} + +void Memproto::Connection::response_set(void* user, gw_set_response& res) +{ + set_entry* e = reinterpret_cast(user); + if(!e->queue->is_valid()) { return; } + + LOG_TRACE("set response"); + + if(res.error) { + // error + send_response_nodata(e, res.life, MEMPROTO_RES_OUT_OF_MEMORY); + return; + } + + // stored + send_response_nodata(e, res.life, MEMPROTO_RES_NO_ERROR); +} + +void Memproto::Connection::response_delete(void* user, gw_delete_response& res) +{ + delete_entry* e = reinterpret_cast(user); + if(!e->queue->is_valid()) { return; } + + LOG_TRACE("delete response"); + + if(res.error) { + // error + send_response_nodata(e, res.life, MEMPROTO_RES_INVALID_ARGUMENTS); + return; + } + + if(res.deleted) { + send_response_nodata(e, res.life, MEMPROTO_RES_NO_ERROR); + } else { + send_response_nodata(e, res.life, MEMPROTO_RES_OUT_OF_MEMORY); + } +} + + +void Memproto::Connection::pack_header( + char* hbuf, uint16_t status, uint8_t op, + uint16_t keylen, uint32_t vallen, uint8_t extralen, + uint32_t opaque, uint64_t cas) +{ + hbuf[0] = 0x81; + hbuf[1] = op; + *(uint16_t*)&hbuf[2] = htons(keylen); + hbuf[4] = extralen; + hbuf[5] = 0x00; + *(uint16_t*)&hbuf[6] = htons(status); + *(uint32_t*)&hbuf[8] = htonl(vallen + keylen + extralen); + *(uint32_t*)&hbuf[12] = htonl(opaque); + *(uint32_t*)&hbuf[16] = htonl((uint32_t)(cas>>32)); + *(uint32_t*)&hbuf[20] = htonl((uint32_t)(cas&0xffffffff)); +} + +void Memproto::Connection::send_response_nosend(entry* e, shared_zone& life) +{ + e->queue->reached_try_send(e, life, NULL, 0); +} + +void Memproto::Connection::send_response_nodata( + entry* e, shared_zone& life, + uint8_t status) +{ + char* header = (char*)life->malloc(MEMPROTO_HEADER_SIZE); + + pack_header(header, status, e->header.opcode, + 0, 0, 0, + e->header.opaque, 0); // cas = 0 + + struct iovec* vec = (struct iovec*)life->malloc( + sizeof(struct iovec) * 1); + vec[0].iov_base = header; + vec[0].iov_len = MEMPROTO_HEADER_SIZE; + + e->queue->reached_try_send(e, life, vec, 1); +} + +inline void Memproto::Connection::send_response( + entry* e, shared_zone& life, + uint8_t status, + const char* key, uint16_t keylen, + const void* val, uint16_t vallen, + const char* extra, uint16_t extralen) +{ + char* header = (char*)life->malloc(MEMPROTO_HEADER_SIZE); + pack_header(header, status, e->header.opcode, + keylen, vallen, extralen, + e->header.opaque, 0); // cas = 0 + + struct iovec* vec = (struct iovec*)life->malloc( + sizeof(struct iovec) * 4); + + vec[0].iov_base = header; + vec[0].iov_len = MEMPROTO_HEADER_SIZE; + size_t cnt = 1; + + if(extralen > 0) { + vec[cnt].iov_base = const_cast(extra); + vec[cnt].iov_len = extralen; + ++cnt; + } + + if(keylen > 0) { + vec[cnt].iov_base = const_cast(key); + vec[cnt].iov_len = keylen; + ++cnt; + } + + if(vallen > 0) { + vec[cnt].iov_base = const_cast(val); + vec[cnt].iov_len = vallen; + ++cnt; + } + + e->queue->reached_try_send(e, life, vec, cnt); +} + + +} // namespace kumo + diff --git a/src/logic/gateway/gate_memproto.h b/src/logic/gateway/gate_memproto.h new file mode 100644 index 0000000..7b430e8 --- /dev/null +++ b/src/logic/gateway/gate_memproto.h @@ -0,0 +1,30 @@ +#ifndef GATEWAY_GATE_MEMPROTO_H__ +#define GATEWAY_GATE_MEMPROTO_H__ + +#include "gateway/interface.h" + +namespace kumo { + + +class Memproto : public gateway::gate { +public: + Memproto(int lsock); + ~Memproto(); + + static void accepted(int fd, int err); + void listen(); + +private: + class Connection; + int m_lsock; + +private: + Memproto(); + Memproto(const Memproto&); +}; + + +} // namespace kumo + +#endif /* gateway/gate_memproto.h */ + diff --git a/src/logic/gateway/gate_memtext.cc b/src/logic/gateway/gate_memtext.cc new file mode 100644 index 0000000..457adf1 --- /dev/null +++ b/src/logic/gateway/gate_memtext.cc @@ -0,0 +1,487 @@ +#include "gateway/gate_memtext.h" +#include "gateway/memproto/memtext.h" +#include "log/mlogger.h" +#include "gateway/framework.h" // FIXME net->signal_end() +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace kumo { + + +static const size_t MEMTEXT_INITIAL_ALLOCATION_SIZE = 16*1024; +static const size_t MEMTEXT_RESERVE_SIZE = 1024; + + +Memtext::Memtext(int lsock) : + m_lsock(lsock) { } + +Memtext::~Memtext() {} + + +void Memtext::accepted(int fd, int err) +{ + if(fd < 0) { + LOG_FATAL("accept failed: ",strerror(err)); + gateway::net->signal_end(); // FIXME gateway::fatal_end() + return; + } + LOG_DEBUG("accept memproto text user fd=",fd); + wavy::add(fd); +} + +void Memtext::listen() +{ + using namespace mp::placeholders; + wavy::listen(m_lsock, + mp::bind(&Memtext::accepted, _1, _2)); +} + + +class Memtext::Connection : public wavy::handler { +public: + Connection(int fd); + ~Connection(); + +public: + void read_event(); + +private: + inline int memproto_get( + memtext_command cmd, + memtext_request_retrieval* r); + + inline int memproto_set( + memtext_command cmd, + memtext_request_storage* r); + + inline int memproto_delete( + memtext_command cmd, + memtext_request_delete* r); + +private: + memtext_parser m_memproto; + mp::stream_buffer m_buffer; + size_t m_off; + + typedef mp::shared_ptr SharedValid; + SharedValid m_valid; + + typedef gateway::get_request get_request; + typedef gateway::set_request set_request; + typedef gateway::delete_request delete_request; + + typedef gateway::get_response get_response; + typedef gateway::set_response set_response; + typedef gateway::delete_response delete_response; + + typedef rpc::shared_zone shared_zone; + + + struct Responder { + Responder(int fd, SharedValid& valid) : + m_fd(fd), m_valid(valid) { } + ~Responder() { } + + bool is_valid() const { return *m_valid; } + + int fd() const { return m_fd; } + + protected: + inline void send_data(const char* buf, size_t buflen); + inline void send_datav(struct iovec* vb, size_t count, shared_zone& life); + + private: + int m_fd; + SharedValid m_valid; + }; + + struct ResGet : Responder { + ResGet(int fd, SharedValid& valid) : + Responder(fd, valid) { } + ~ResGet() { } + void response(get_response& res); + private: + char m_numbuf[3+10+3]; // " 0 " + uint32 + "\r\n\0" + ResGet(); + ResGet(const ResGet&); + }; + + struct ResMultiGet : Responder { + ResMultiGet(int fd, SharedValid& valid, + struct iovec* vec, unsigned* count, + struct iovec* qhead, unsigned qlen) : + Responder(fd, valid), + m_vec(vec), m_count(count), + m_qhead(qhead), m_qlen(qlen) { } + ~ResMultiGet() { } + void response(get_response& res); + private: + struct iovec* m_vec; + unsigned *m_count; + struct iovec* m_qhead; + size_t m_qlen; + char m_numbuf[3+10+3]; // " 0 " + uint32 + "\r\n\0" + private: + ResMultiGet(); + ResMultiGet(const ResMultiGet&); + }; + + struct ResSet : Responder { + ResSet(int fd, SharedValid& valid) : + Responder(fd, valid) { } + ~ResSet() { } + void response(set_response& res); + void no_response(set_response& res); + }; + + struct ResDelete : Responder { + ResDelete(int fd, SharedValid& valid) : + Responder(fd, valid) { } + ~ResDelete() { } + void response(delete_response& res); + void no_response(delete_response& res); + }; + +private: + Connection(); + Connection(const Connection&); +}; + + +Memtext::Connection::Connection(int fd) : + mp::wavy::handler(fd), + m_buffer(MEMTEXT_INITIAL_ALLOCATION_SIZE), + m_off(0), + m_valid(new bool(true)) +{ + int (*cmd_get)(void*, memtext_command, memtext_request_retrieval*) = + &mp::object_callback:: + mem_fun; + + int (*cmd_set)(void*, memtext_command, memtext_request_storage*) = + &mp::object_callback:: + mem_fun; + + int (*cmd_delete)(void*, memtext_command, memtext_request_delete*) = + &mp::object_callback:: + mem_fun; + + memtext_callback cb = { + cmd_get, // get + cmd_set, // set + NULL, // add + NULL, // replace + NULL, // append + NULL, // prepend + NULL, // cas + cmd_delete, // delete + NULL, // incr + NULL, // decr + }; + + memtext_init(&m_memproto, &cb, this); +} + +Memtext::Connection::~Connection() +{ + *m_valid = false; +} + + +void Memtext::Connection::read_event() +try { + m_buffer.reserve_buffer(MEMTEXT_RESERVE_SIZE); + + ssize_t rl = ::read(fd(), m_buffer.buffer(), m_buffer.buffer_capacity()); + if(rl < 0) { + if(errno == EAGAIN || errno == EINTR) { + return; + } else { + throw std::runtime_error("read error"); + } + } else if(rl == 0) { + throw std::runtime_error("connection closed"); + } + + m_buffer.buffer_consumed(rl); + + do { + int ret = memtext_execute(&m_memproto, + (char*)m_buffer.data(), m_buffer.data_size(), &m_off); + if(ret < 0) { + throw std::runtime_error("parse error"); + } + if(ret == 0) { return; } + m_buffer.data_used(m_off); + m_off = 0; + } while(m_buffer.data_size() > 0); + +} catch (std::exception& e) { + LOG_DEBUG("memcached text protocol error: ",e.what()); + throw; +} catch (...) { + LOG_DEBUG("memcached text protocol error: unknown error"); + throw; +} + + +void Memtext::Connection::Responder::send_data( + const char* buf, size_t buflen) +{ + wavy::write(m_fd, buf, buflen); +} + +void Memtext::Connection::Responder::send_datav( + struct iovec* vb, size_t count, shared_zone& life) +{ + wavy::request req(&mp::object_delete, new shared_zone(life)); + wavy::writev(m_fd, vb, count, req); +} + + +namespace { +static const char* const NOT_SUPPORTED_REPLY = "CLIENT_ERROR supported\r\n"; +static const char* const GET_FAILED_REPLY = "SERVER_ERROR get failed\r\n"; +static const char* const STORE_FAILED_REPLY = "SERVER_ERROR store failed\r\n"; +static const char* const DELETE_FAILED_REPLY = "SERVER_ERROR delete failed\r\n"; +} // noname namespace + +#define RELEASE_REFERENCE(life) \ + shared_zone life(new msgpack::zone()); \ + life->push_finalizer(&mp::object_delete, \ + m_buffer.release()); + +int Memtext::Connection::memproto_get( + memtext_command cmd, + memtext_request_retrieval* r) +{ + LOG_TRACE("get"); + RELEASE_REFERENCE(life); + + if(r->key_num == 1) { + const char* key = r->key[0]; + unsigned keylen = r->key_len[0]; + + ResGet* ctx = life->allocate(fd(), m_valid); + get_request req; + req.key = key; + req.keylen = keylen; + req.hash = gateway::stdhash(req.key, req.keylen); + req.callback = &mp::object_callback + ::mem_fun; + req.user = (void*)ctx; + req.life = life; + + gateway::submit(req); + + } else { + ResMultiGet* ctxs[r->key_num]; + unsigned* count = life->allocate(r->key_num); + + size_t qlen = r->key_num * 5 + 1; // +1: "END\r\n" + struct iovec* qhead = (struct iovec*)life->malloc(sizeof(struct iovec) * qlen); + qhead[qlen-1].iov_base = const_cast("END\r\n"); + qhead[qlen-1].iov_len = 5; + + for(unsigned i=0; i < r->key_num; ++i) { + ctxs[i] = life->allocate(fd(), m_valid, + qhead + i*5, count, qhead, qlen); + } + + get_request req; + req.callback = &mp::object_callback + ::mem_fun; + req.life = life; + + for(unsigned i=0; i < r->key_num; ++i) { + // don't use shared zone. msgpack::allocate is not thread-safe. + req.user = (void*)ctxs[i]; + req.key = r->key[i]; + req.keylen = r->key_len[i]; + req.hash = gateway::stdhash(req.key, req.keylen); + gateway::submit(req); + } + } + + return 0; +} + + +int Memtext::Connection::memproto_set( + memtext_command cmd, + memtext_request_storage* r) +{ + LOG_TRACE("set"); + RELEASE_REFERENCE(life); + + if(r->flags || r->exptime) { + wavy::write(fd(), NOT_SUPPORTED_REPLY, strlen(NOT_SUPPORTED_REPLY)); + return 0; + } + + ResSet* ctx = life->allocate(fd(), m_valid); + set_request req; + req.key = r->key; + req.keylen = r->key_len; + req.hash = gateway::stdhash(req.key, req.keylen); + req.val = r->data; + req.vallen = r->data_len; + req.life = life; + + if(r->noreply) { + req.callback = &mp::object_callback + ::mem_fun; + } else { + req.callback = &mp::object_callback + ::mem_fun; + } + req.user = ctx; + + gateway::submit(req); + + return 0; +} + + +int Memtext::Connection::memproto_delete( + memtext_command cmd, + memtext_request_delete* r) +{ + LOG_TRACE("delete"); + RELEASE_REFERENCE(life); + + if(r->exptime) { + wavy::write(fd(), NOT_SUPPORTED_REPLY, strlen(NOT_SUPPORTED_REPLY)); + return 0; + } + + ResDelete* ctx = life->allocate(fd(), m_valid); + delete_request req; + req.key = r->key; + req.keylen = r->key_len; + req.hash = gateway::stdhash(req.key, req.keylen); + req.life = life; + + if(r->noreply) { + req.callback = &mp::object_callback + ::mem_fun; + } else { + req.callback = &mp::object_callback + ::mem_fun; + } + req.user = ctx; + + gateway::submit(req); + + return 0; +} + + + +void Memtext::Connection::ResGet::response(get_response& res) +{ + if(!is_valid()) { return; } + LOG_TRACE("get response"); + + if(res.error) { + send_data(GET_FAILED_REPLY, strlen(GET_FAILED_REPLY)); + return; + } + + if(!res.val) { + send_data("END\r\n", 5); + return; + } + + struct iovec vb[5]; + vb[0].iov_base = const_cast("VALUE "); + vb[0].iov_len = 6; + vb[1].iov_base = const_cast(res.key); + vb[1].iov_len = res.keylen; + vb[2].iov_base = m_numbuf; + vb[2].iov_len = sprintf(m_numbuf, " 0 %u\r\n", res.vallen); + vb[3].iov_base = const_cast(res.val); + vb[3].iov_len = res.vallen; + vb[4].iov_base = const_cast("\r\nEND\r\n"); + vb[4].iov_len = 7; + send_datav(vb, 5, res.life); +} + + +void Memtext::Connection::ResMultiGet::response(get_response& res) +{ + if(!is_valid()) { return; } + LOG_TRACE("get multi response ",m_count); + + if(res.error || !res.val) { + memset(m_vec, 0, sizeof(struct iovec)*5); + goto filled; + } + + // don't use shared zone. msgpack::allocate is not thread-safe. + m_vec[0].iov_base = const_cast("VALUE "); + m_vec[0].iov_len = 6; + m_vec[1].iov_base = const_cast(res.key); + m_vec[1].iov_len = res.keylen; + m_vec[2].iov_base = m_numbuf; + m_vec[2].iov_len = sprintf(m_numbuf, " 0 %u\r\n", res.vallen); + m_vec[3].iov_base = const_cast(res.val); + m_vec[3].iov_len = res.vallen; + m_vec[4].iov_base = const_cast("\r\n"); + m_vec[4].iov_len = 2; + +filled: + if(__sync_sub_and_fetch(m_count, 1) == 0) { + send_datav(m_qhead, m_qlen, res.life); + } +} + + +void Memtext::Connection::ResSet::response(set_response& res) +{ + if(!is_valid()) { return; } + LOG_TRACE("set response"); + + if(res.error) { + send_data(STORE_FAILED_REPLY, strlen(STORE_FAILED_REPLY)); + return; + } + + send_data("STORED\r\n", 8); +} + +void Memtext::Connection::ResSet::no_response(set_response& res) +{ } + + +void Memtext::Connection::ResDelete::response(delete_response& res) +{ + if(!is_valid()) { return; } + LOG_TRACE("delete response"); + + if(res.error) { + send_data(DELETE_FAILED_REPLY, strlen(DELETE_FAILED_REPLY)); + return; + } + if(res.deleted) { + send_data("DELETED\r\n", 9); + } else { + send_data("NOT FOUND\r\n", 11); + } +} + +void Memtext::Connection::ResDelete::no_response(delete_response& res) +{ } + + +} // namespace kumo + diff --git a/src/logic/gateway/gate_memtext.h b/src/logic/gateway/gate_memtext.h new file mode 100644 index 0000000..d54dfb4 --- /dev/null +++ b/src/logic/gateway/gate_memtext.h @@ -0,0 +1,30 @@ +#ifndef GATEWAY_GATE_MEMTEXT_H__ +#define GATEWAY_GATE_MEMTEXT_H__ + +#include "gateway/interface.h" + +namespace kumo { + + +class Memtext : public gateway::gate { +public: + Memtext(int lsock); + ~Memtext(); + + static void accepted(int fd, int err); + void listen(); + +private: + class Connection; + int m_lsock; + +private: + Memtext(); + Memtext(const Memtext&); +}; + + +} // namespace kumo + +#endif /* gateway/gate_memtext.h */ + diff --git a/src/logic/gateway/init.h b/src/logic/gateway/init.h new file mode 100644 index 0000000..72233b6 --- /dev/null +++ b/src/logic/gateway/init.h @@ -0,0 +1,49 @@ +#ifndef GATEWAY_INIT_H__ +#define GATEWAY_INIT_H__ + +#include "gateway/framework.h" + +namespace kumo { +namespace gateway { + + +template +framework::framework(const Config& cfg) : + client_logic( + cfg.rthreads, cfg.wthreads, + cfg.connect_timeout_msec, + cfg.connect_retry_limit) +{ + LOGPACK("SW",2, + "time", time(NULL), + "mgr1", share->manager1(), + "mgr2", share->manager2()); + start_timeout_step(cfg.clock_interval_usec); // rpc_server + scope_proto_network().renew_hash_space(); +} + +template +resource::resource(const Config& cfg) : + m_manager1(cfg.manager1), + m_manager2(cfg.manager2), + m_cfg_async_replicate_set(cfg.async_replicate_set), + m_cfg_async_replicate_delete(cfg.async_replicate_delete), + m_cfg_get_retry_num(cfg.get_retry_num), + m_cfg_set_retry_num(cfg.set_retry_num), + m_cfg_delete_retry_num(cfg.delete_retry_num), + m_cfg_renew_threshold(cfg.renew_threshold) +{ } + +template +static void init(const Config& cfg) +{ + share.reset(new resource(cfg)); + net.reset(new framework(cfg)); +} + + +} // namespace kumo +} // namespace gateway + +#endif /* gateway/init.h */ + diff --git a/src/logic/gateway/interface.cc b/src/logic/gateway/interface.cc new file mode 100644 index 0000000..22aa237 --- /dev/null +++ b/src/logic/gateway/interface.cc @@ -0,0 +1,23 @@ +#include "gateway/framework.h" + +namespace kumo { +namespace gateway { + + +void add_gate(gate* it) +{ + it->listen(); +} + + +uint64_t stdhash(const char* key, size_t keylen) +{ + return HashSpace::hash(key, keylen); +} + +// submit() is in framework.cc + + +} // namespace gateway +} // namespace kumo + diff --git a/src/logic/gateway/interface.h b/src/logic/gateway/interface.h new file mode 100644 index 0000000..af280f8 --- /dev/null +++ b/src/logic/gateway/interface.h @@ -0,0 +1,90 @@ +#ifndef GATEWAY_INTERFACE_H__ +#define GATEWAY_INTERFACE_H__ + +#include "rpc/wavy.h" +#include "rpc/types.h" +#include +#include +#include + +namespace kumo { +namespace gateway { + + +typedef rpc::wavy wavy; +using rpc::shared_zone; + + +class gate { +public: + gate() { } + virtual ~gate() { } + virtual void listen() = 0; +}; + + +void add_gate(gate* it); + + + +struct basic_response { + uint64_t hash; + const char* key; + uint32_t keylen; + shared_zone life; + int error; +}; + +struct basic_request { + uint64_t hash; + const char* key; + uint32_t keylen; + shared_zone life; +}; + +struct get_response : basic_response { + char* val; + uint32_t vallen; + uint64_t clocktime; +}; + +struct set_response : basic_response { + const char* val; + uint32_t vallen; + uint64_t clocktime; +}; + +struct delete_response : basic_response { + bool deleted; +}; + +struct get_request : basic_request { + void (*callback)(void* user, get_response& res); + void* user; +}; + +struct set_request : basic_request { + void (*callback)(void* user, set_response& res); + void* user; + const char* val; + uint32_t vallen; +}; + +struct delete_request : basic_request { + void (*callback)(void* user, delete_response& res); + void* user; +}; + + +uint64_t stdhash(const char* key, size_t keylen); + +void submit(get_request& req); +void submit(set_request& req); +void submit(delete_request& req); + + +} // namespace gateway +} // namespace kumo + +#endif /* gateway/interface.h */ + diff --git a/src/logic/gateway/main.cc b/src/logic/gateway/main.cc new file mode 100644 index 0000000..2420d50 --- /dev/null +++ b/src/logic/gateway/main.cc @@ -0,0 +1,157 @@ +#include "logic/boot.h" +#include "gateway/framework.h" +#include "gateway/init.h" +#include "gateway/gate_memtext.h" +#include "gateway/gate_memproto.h" +#include "gateway/gate_cloudy.h" + +using namespace kumo; + +#define MEMTEXT_DEFAULT_PORT 11411 +#define MEMPROTO_DEFAULT_PORT 11511 +#define CLOUDY_DEFAULT_PORT 11611 + +struct arg_t : rpc_server_args { + + sockaddr_in manager1_in; + sockaddr_in manager2_in; + bool manager2_set; + rpc::address manager1; // convert + rpc::address manager2; // convert + + unsigned short get_retry_num; + unsigned short set_retry_num; + unsigned short delete_retry_num; + + unsigned short renew_threshold; + + bool async_replicate_set; + bool async_replicate_delete; + + bool memtext_set; + sockaddr_in memtext_addr_in; + int memtext_lsock; // convert + + bool memproto_set; + sockaddr_in memproto_addr_in; + int memproto_lsock; // convert + + bool cloudy_set; + sockaddr_in cloudy_addr_in; + int cloudy_lsock; // convert + + virtual void convert() + { + manager1 = rpc::address(manager1_in); + manager2 = rpc::address(manager2_in); + + if(!memtext_set && !memproto_set && !cloudy_set) { + throw std::runtime_error("-t, -b or -c is required"); + } + if(memtext_set) { + memtext_lsock = scoped_listen_tcp::listen(memtext_addr_in); + } + if(memproto_set) { + memproto_lsock = scoped_listen_tcp::listen(memproto_addr_in); + } + if(cloudy_set) { + cloudy_lsock = scoped_listen_tcp::listen(cloudy_addr_in); + } + + rpc_server_args::convert(); + } + + arg_t(int& argc, char* argv[]) : + get_retry_num(5), + set_retry_num(20), + delete_retry_num(20), + renew_threshold(1) + { + using namespace kazuhiki; + set_basic_args(); + on("-m", "--manager1", + type::connectable(&manager1_in, MANAGER_DEFAULT_PORT)); + on("-p", "--manager2", &manager2_set, + type::connectable(&manager2_in, MANAGER_DEFAULT_PORT)); + on("-t", "--memproto-text", &memtext_set, + type::listenable(&memtext_addr_in, MEMTEXT_DEFAULT_PORT)); + on("-b", "--memproto-binary", &memproto_set, + type::listenable(&memproto_addr_in, MEMPROTO_DEFAULT_PORT)); + on("-c", "--cloudy", &cloudy_set, + type::listenable(&cloudy_addr_in, CLOUDY_DEFAULT_PORT)); + on("-G", "--get-retry", + type::numeric(&get_retry_num, get_retry_num)); + on("-S", "--set-retry", + type::numeric(&set_retry_num, set_retry_num)); + on("-D", "--delete-retry", + type::numeric(&delete_retry_num, delete_retry_num)); + on("-rn", "--renew-threshold", + type::numeric(&renew_threshold, renew_threshold)); + on("-As", "--async-replicate-set", + type::boolean(&async_replicate_set)); + on("-Ad", "--async-replicate-delete", + type::boolean(&async_replicate_delete)); + parse(argc, argv); + } + + void show_usage() + { +std::cout << +"usage: "< -p [-t port="< " "--manager1 address of manager 1\n" +" -p " "--manager2 address of manager 2\n" +" -t <[addr:]port="< " "--memproto-text memcached text protocol listen port\n" +" -b <[addr:]port="< " "--memprpto-binary memcached binary protocol listen port\n" +" -c <[addr:]port="< " "--cloudy asynchronous memcached binary protocol listen port\n" +" -As " "--async-replicate-set send response without waiting replication on set\n" +" -Ad " "--async-replicate-delete send response without waiting replication on delete\n" +" -G " "--get-retry get retry limit\n" +" -S " "--set-retry set retry limit\n" +" -D " "--delete-retry delete retry limit\n" +" -rn " "--renew-threshold hash space renew threshold\n" +; +rpc_server_args::show_usage(); + } + +}; + + + +int main(int argc, char* argv[]) +{ + arg_t arg(argc, argv); + + // initialize logger first + mlogger::level loglevel = (arg.verbose ? mlogger::TRACE : mlogger::WARN); + init_mlogger(arg.logfile, arg.pidfile.empty(), loglevel); + + // initialize gate + std::auto_ptr mpt; + std::auto_ptr mpb; + std::auto_ptr cl; + if(arg.memtext_set) { mpt.reset(new Memtext(arg.memtext_lsock)); } + if(arg.memproto_set) { mpb.reset(new Memproto(arg.memproto_lsock)); } + if(arg.cloudy_set) { cl.reset(new Cloudy(arg.cloudy_lsock)); } + + // daemonize + if(!arg.pidfile.empty()) { + do_daemonize(!arg.logfile.empty(), arg.pidfile.c_str()); + } + + // initialize binary logger + if(arg.logpack_path_set) { + logpacker::initialize(arg.logpack_path.c_str()); + } + + // run server + gateway::init(arg); + + if(mpt.get()) { gateway::add_gate(mpt.get()); } + if(mpb.get()) { gateway::add_gate(mpb.get()); } + if(cl.get()) { gateway::add_gate(cl.get()); } + + gateway::net->run(); + gateway::net->join(); +} + diff --git a/src/logic/gateway/memproto/memproto.c b/src/logic/gateway/memproto/memproto.c new file mode 100644 index 0000000..57fbdee --- /dev/null +++ b/src/logic/gateway/memproto/memproto.c @@ -0,0 +1,247 @@ +/* + * memproto memcached binary protocol parser + * + * Copyright (C) 2008 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "memproto.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define __LITTLE_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN +#define __BIG_ENDIAN__ +#endif +#endif + +#ifdef __LITTLE_ENDIAN__ +#if defined(__bswap_64) +# define memproto_be64h(x) __bswap_64(x) +#elif defined(__DARWIN_OSSwapInt64) +# define memproto_be64h(x) __DARWIN_OSSwapInt64(x) +#else +static inline uint64_t memproto_be64h(uint64_t x) { + return ((x << 56) & 0xff00000000000000ULL ) | + ((x << 40) & 0x00ff000000000000ULL ) | + ((x << 24) & 0x0000ff0000000000ULL ) | + ((x << 8) & 0x000000ff00000000ULL ) | + ((x >> 8) & 0x00000000ff000000ULL ) | + ((x >> 24) & 0x0000000000ff0000ULL ) | + ((x >> 40) & 0x000000000000ff00ULL ) | + ((x >> 56) & 0x00000000000000ffULL ) ; +} +#endif +#else +#define memproto_be64h(x) (x) +#endif + +#define MEMPROTO_MAGIC(header) (*(( uint8_t*)&((const char*)header)[0])) +#define MEMPROTO_OPCODE(header) (*(( uint8_t*)&((const char*)header)[1])) +#define MEMPROTO_KEY_LENGTH(header) ntohs(*((uint16_t*)&((const char*)header)[2])) +#define MEMPROTO_EXTRA_LENGTH(header) (*(( uint8_t*)&((const char*)header)[4])) +#define MEMPROTO_DATA_TYPE(header) (*(( uint8_t*)&((const char*)header)[5])) +#define MEMPROTO_STATUS(header) ntohs(*((uint16_t*)&((const char*)header)[6])) +#define MEMPROTO_BODY_LENGTH(header) ntohl(*((uint32_t*)&((const char*)header)[8])) +#define MEMPROTO_OPAQUE(header) ntohl(*((uint32_t*)&((const char*)header)[12])) +#define MEMPROTO_CAS(header) memproto_be64h(*((uint64_t*)&((const char*)header)[16])) + +#define MEMPROTO_CALLBACK(cb, ...) ((void (*)(__VA_ARGS__))cb) +#define MEMPROTO_EXTRA_4_EXPIRATION(extra) htonl(*((uint32_t*)&extra[0])) +#define MEMPROTO_EXTRA_8_FLAGS(extra) htonl(*((uint32_t*)&extra[0])) +#define MEMPROTO_EXTRA_8_EXPIRATION(extra) htonl(*((uint32_t*)&extra[4])) +#define MEMPROTO_EXTRA_20_AMOUNT(extra) memproto_be64h(*((uint64_t*)&extra[0])) +#define MEMPROTO_EXTRA_20_INITIAL(extra) memproto_be64h(*((uint64_t*)&extra[8])) +#define MEMPROTO_EXTRA_20_EXPIRATION(extra) memproto_be64h(*((uint32_t*)&extra[16])) + + +void memproto_parser_init(memproto_parser* ctx, memproto_callback* cb, void* user) +{ + memset(ctx, 0, sizeof(memproto_parser)); + ctx->callback[0x00] = (void*)cb->cb_get; + ctx->callback[0x01] = (void*)cb->cb_set; + ctx->callback[0x02] = (void*)cb->cb_add; + ctx->callback[0x03] = (void*)cb->cb_replace; + ctx->callback[0x04] = (void*)cb->cb_delete; + ctx->callback[0x05] = (void*)cb->cb_increment; + ctx->callback[0x06] = (void*)cb->cb_decrement; + ctx->callback[0x07] = (void*)cb->cb_quit; + ctx->callback[0x08] = (void*)cb->cb_flush; + ctx->callback[0x09] = (void*)cb->cb_getq; + ctx->callback[0x0a] = (void*)cb->cb_noop; + ctx->callback[0x0b] = (void*)cb->cb_version; + ctx->callback[0x0c] = (void*)cb->cb_getk; + ctx->callback[0x0d] = (void*)cb->cb_getkq; + ctx->callback[0x0e] = (void*)cb->cb_append; + ctx->callback[0x0f] = (void*)cb->cb_prepend; + ctx->user = user; +} + + +int memproto_parser_execute(memproto_parser* ctx, const char* data, size_t datalen, size_t* off) +{ + size_t region = datalen - *off; + + if(region < MEMPROTO_HEADER_SIZE) { return 0; } + + ctx->header = data + *off; + uint32_t bodylen = MEMPROTO_BODY_LENGTH(ctx->header); + + region -= MEMPROTO_HEADER_SIZE; + if(region < bodylen) { return 0; } + + if( MEMPROTO_MAGIC(ctx->header) != MEMPROTO_REQUEST ) { return -1; } + + *off += MEMPROTO_HEADER_SIZE + bodylen; + + return 1; +} + + +int memproto_dispatch(memproto_parser* ctx) +{ + memproto_header h; + + h.magic = MEMPROTO_MAGIC(ctx->header); + h.opcode = MEMPROTO_OPCODE(ctx->header); + const uint16_t keylen = MEMPROTO_KEY_LENGTH(ctx->header); + const uint16_t extralen = MEMPROTO_EXTRA_LENGTH(ctx->header); + h.data_type = MEMPROTO_DATA_TYPE(ctx->header); + h.reserved = MEMPROTO_STATUS(ctx->header); + const uint32_t bodylen = MEMPROTO_BODY_LENGTH(ctx->header); + h.opaque = MEMPROTO_OPAQUE(ctx->header); + h.cas = MEMPROTO_CAS(ctx->header); + const char* const extra = ctx->header + MEMPROTO_HEADER_SIZE; + const char* const key = extra + extralen; + const char* const val = key + keylen; + const uint32_t vallen = bodylen - extralen - keylen; + + memproto_command cmd = (memproto_command)h.opcode; + void* cb = ctx->callback[cmd]; + if(!cb) { return -cmd; } + + switch(cmd) { + case MEMPROTO_CMD_GET: + case MEMPROTO_CMD_GETQ: + case MEMPROTO_CMD_GETK: + case MEMPROTO_CMD_GETKQ: + if(extralen != 0) { return MEMPROTO_INVALID_ARGUMENT; } + if(vallen != 0) { return MEMPROTO_INVALID_ARGUMENT; } + if(keylen == 0) { return MEMPROTO_INVALID_ARGUMENT; } + MEMPROTO_CALLBACK(cb, void*, memproto_header*, + const char*, uint16_t)(ctx->user, &h, + key, keylen); + return 1; + + case MEMPROTO_CMD_DELETE: + if(keylen == 0) { return MEMPROTO_INVALID_ARGUMENT; } + if(vallen != 0) { return MEMPROTO_INVALID_ARGUMENT; } + if(extralen == 0) { + MEMPROTO_CALLBACK(cb, void*, memproto_header*, + const char*, uint16_t, + uint32_t)(ctx->user, &h, + key, keylen, + 0); + return 1; + } else if(extralen == 4) { + MEMPROTO_CALLBACK(cb, void*, memproto_header*, + const char*, uint16_t, + uint32_t)(ctx->user, &h, + key, keylen, + MEMPROTO_EXTRA_4_EXPIRATION(extra)); + return 1; + } else { return MEMPROTO_INVALID_ARGUMENT; } + + case MEMPROTO_CMD_FLUSH: + if(keylen != 0) { return MEMPROTO_INVALID_ARGUMENT; } + if(vallen != 0) { return MEMPROTO_INVALID_ARGUMENT; } + if(extralen == 0) { + MEMPROTO_CALLBACK(cb, void*, memproto_header*, + uint32_t)(ctx->user, &h, + 0); + return 1; + } else if(extralen == 4) { + MEMPROTO_CALLBACK(cb, void*, memproto_header*, + uint32_t)(ctx->user, &h, + MEMPROTO_EXTRA_4_EXPIRATION(extra)); + return 1; + } else { return MEMPROTO_INVALID_ARGUMENT; } + + + case MEMPROTO_CMD_SET: + case MEMPROTO_CMD_ADD: + case MEMPROTO_CMD_REPLACE: + if(keylen == 0) { return MEMPROTO_INVALID_ARGUMENT; } + if(extralen != 8) { return MEMPROTO_INVALID_ARGUMENT; } + /*if(vallen == 0) { return MEMPROTO_INVALID_ARGUMENT; }*/ + MEMPROTO_CALLBACK(cb, void*, memproto_header*, + const char*, uint16_t, + const char*, uint32_t, + uint32_t, uint32_t)(ctx->user, &h, + key, keylen, + val, vallen, + MEMPROTO_EXTRA_8_FLAGS(extra), + MEMPROTO_EXTRA_8_EXPIRATION(extra)); + return 1; + + case MEMPROTO_CMD_INCREMENT: + case MEMPROTO_CMD_DECREMENT: + if(extralen != 20) { return MEMPROTO_INVALID_ARGUMENT; } + if(keylen == 0) { return MEMPROTO_INVALID_ARGUMENT; } + if(vallen != 0) { return MEMPROTO_INVALID_ARGUMENT; } + MEMPROTO_CALLBACK(cb, void*, memproto_header*, + const char*, uint16_t, + uint64_t, uint64_t, uint32_t)(ctx->user, &h, + key, keylen, + MEMPROTO_EXTRA_20_AMOUNT(extra), + MEMPROTO_EXTRA_20_INITIAL(extra), + MEMPROTO_EXTRA_20_EXPIRATION(extra)); + return 1; + + case MEMPROTO_CMD_QUIT: + case MEMPROTO_CMD_NOOP: + case MEMPROTO_CMD_VERSION: + if(keylen != 0) { return MEMPROTO_INVALID_ARGUMENT; } + if(extralen != 0) { return MEMPROTO_INVALID_ARGUMENT; } + if(vallen != 0) { return MEMPROTO_INVALID_ARGUMENT; } + MEMPROTO_CALLBACK(cb, void*, memproto_header*)(ctx->user, &h); + return 1; + + case MEMPROTO_CMD_APPEND: + case MEMPROTO_CMD_PREPEND: + if(keylen == 0) { return MEMPROTO_INVALID_ARGUMENT; } + if(extralen != 0) { return MEMPROTO_INVALID_ARGUMENT; } + /*if(vallen == 0) { return MEMPROTO_INVALID_ARGUMENT; }*/ + MEMPROTO_CALLBACK(cb, void*, memproto_header*, + const char*, uint16_t, + const char*, uint32_t)(ctx->user, &h, + key, keylen, + val, vallen); + return 1; + } + + return -cmd; +} + + +#ifdef __cplusplus +} +#endif + diff --git a/src/logic/gateway/memproto/memproto.h b/src/logic/gateway/memproto/memproto.h new file mode 100644 index 0000000..fdb29d8 --- /dev/null +++ b/src/logic/gateway/memproto/memproto.h @@ -0,0 +1,183 @@ +/* + * memproto memcached binary protocol parser + * + * Copyright (C) 2008 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MEMPROTO_H__ +#define MEMPROTO_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + MEMPROTO_REQUEST = 0x80, + MEMPROTO_RESPONSE = 0x81, +} memproto_magic; + + +typedef enum { + MEMPROTO_RES_NO_ERROR = 0x0000, + MEMPROTO_RES_KEY_NOT_FOUND = 0x0001, + MEMPROTO_RES_KEY_EXISTS = 0x0002, + MEMPROTO_RES_VALUE_TOO_BIG = 0x0003, + MEMPROTO_RES_INVALID_ARGUMENTS = 0x0004, + MEMPROTO_RES_ITEM_NOT_STORED = 0x0005, + MEMPROTO_RES_UNKNOWN_COMMAND = 0x0081, + MEMPROTO_RES_OUT_OF_MEMORY = 0x0082, +} memproto_response_status; + + +typedef enum { + MEMPROTO_CMD_GET = 0x00, + MEMPROTO_CMD_SET = 0x01, + MEMPROTO_CMD_ADD = 0x02, + MEMPROTO_CMD_REPLACE = 0x03, + MEMPROTO_CMD_DELETE = 0x04, + MEMPROTO_CMD_INCREMENT = 0x05, + MEMPROTO_CMD_DECREMENT = 0x06, + MEMPROTO_CMD_QUIT = 0x07, + MEMPROTO_CMD_FLUSH = 0x08, + MEMPROTO_CMD_GETQ = 0x09, + MEMPROTO_CMD_NOOP = 0x0a, + MEMPROTO_CMD_VERSION = 0x0b, + MEMPROTO_CMD_GETK = 0x0c, + MEMPROTO_CMD_GETKQ = 0x0d, + MEMPROTO_CMD_APPEND = 0x0e, + MEMPROTO_CMD_PREPEND = 0x0f, +} memproto_command; + + +typedef enum { + MEMPROTO_TYPE_RAW_BYTES = 0x00, +} memproto_datatype; + + +typedef struct memproto_header_ { + uint8_t magic; + uint8_t opcode; + uint8_t data_type; + uint16_t reserved; + uint32_t opaque; + uint64_t cas; +} memproto_header; + + +#define MEMPROTO_HEADER_SIZE 24 + + +typedef struct memproto_callback_ { + void (*cb_get )(void* user, memproto_header* h, + const char* key, uint16_t keylen); + + void (*cb_set )(void* user, memproto_header* h, + const char* key, uint16_t keylen, + const char* val, uint32_t vallen, + uint32_t flags, uint32_t expiration); + + void (*cb_add )(void* user, memproto_header* h, + const char* key, uint16_t keylen, + const char* val, uint32_t vallen, + uint32_t flags, uint32_t expiration); + + void (*cb_replace )(void* user, memproto_header* h, + const char* key, uint16_t keylen, + const char* val, uint32_t vallen, + uint32_t flags, uint32_t expiration); + + void (*cb_delete )(void* user, memproto_header* h, + const char* key, uint16_t keylen, + uint32_t expiration); + + void (*cb_increment)(void* user, memproto_header* h, + const char* key, uint16_t keylen, + uint64_t amount, uint64_t initial, uint32_t expiration); + + void (*cb_decrement)(void* user, memproto_header* h, + const char* key, uint16_t keylen, + uint64_t amount, uint64_t initial, uint32_t expiration); + + void (*cb_quit )(void* user, memproto_header* h); + + void (*cb_flush )(void* user, memproto_header* h, + uint32_t expiration); + + void (*cb_getq )(void* user, memproto_header* h, + const char* key, uint16_t keylen); + + void (*cb_noop )(void* user, memproto_header* h); + + void (*cb_version )(void* user, memproto_header* h); + + + void (*cb_getk )(void* user, memproto_header* h, + const char* key, uint16_t keylen); + + void (*cb_getkq )(void* user, memproto_header* h, + const char* key, uint16_t keylen); + + void (*cb_append )(void* user, memproto_header* h, + const char* key, uint16_t keylen, + const char* val, uint32_t vallen); + + void (*cb_prepend )(void* user, memproto_header* h, + const char* key, uint16_t keylen, + const char* val, uint32_t vallen); + +} memproto_callback; + + +typedef struct memproto_parser_ { + const char* header; + void* callback[16]; + void* user; +} memproto_parser; + + +/** + * initialize parser context. + * @param ctx uninitialized parser context structure + * @param cb initialized callback structure. contents of it will be copied + * @param user this parameter will be passed to the callback function. + */ +void memproto_parser_init(memproto_parser* ctx, memproto_callback* cb, void* user); + +/** + * parse data. + * @return 0 if parsing is not completed, >0 if parsing is completed, <0 if failed + * @param ctx initialized parser context structure + * @param data buffer. parsing range is from (buffer + off) up to (buffer + len - off) + * @param len size of buffer + * @param off this parameter will be changed + */ +int memproto_parser_execute(memproto_parser* ctx, const char* data, size_t len, size_t* off); + +/** + * dispatch. + */ +#define MEMPROTO_INVALID_ARGUMENT (-128) +int memproto_dispatch(memproto_parser* ctx); + + +#ifdef __cplusplus +} +#endif + +#endif /* memproto.h */ + diff --git a/src/logic/gateway/memproto/memtext.h b/src/logic/gateway/memproto/memtext.h new file mode 100644 index 0000000..da33f9e --- /dev/null +++ b/src/logic/gateway/memproto/memtext.h @@ -0,0 +1,164 @@ +/* + * memtext memcached text protocol parser + * + * Copyright (C) 2008 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MEMTEXT_H__ +#define MEMTEXT_H__ + +#include +#include +#include + +#define MEMTEXT_MAX_MULTI_GET 256 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + /* retrieval */ + MEMTEXT_CMD_GET, + + /* storage */ + MEMTEXT_CMD_SET, + MEMTEXT_CMD_ADD, + MEMTEXT_CMD_REPLACE, + MEMTEXT_CMD_APPEND, + MEMTEXT_CMD_PREPEND, + + /* cas */ + MEMTEXT_CMD_CAS, + + /* delete */ + MEMTEXT_CMD_DELETE, + + /* numeric */ + MEMTEXT_CMD_INCR, + MEMTEXT_CMD_DECR, +} memtext_command; + + +typedef struct { + const char** key; + unsigned* key_len; + unsigned key_num; +} memtext_request_retrieval; + +typedef struct { + const char* key; + unsigned key_len; + const char* data; + unsigned data_len; + unsigned short flags; + uint32_t exptime; + bool noreply; +} memtext_request_storage; + +typedef struct { + const char* key; + unsigned key_len; + const char* data; + unsigned data_len; + unsigned short flags; + uint32_t exptime; + bool noreply; + uint64_t cas_unique; +} memtext_request_cas; + +typedef struct { + const char* key; + unsigned key_len; + uint32_t exptime; + bool noreply; +} memtext_request_delete; + +typedef struct { + const char* key; + unsigned key_len; + uint64_t value; + bool noreply; +} memtext_request_numeric; + +typedef int (*memtext_callback_retrieval)( + void* user, memtext_command cmd, + memtext_request_retrieval* req); + +typedef int (*memtext_callback_storage)( + void* user, memtext_command cmd, + memtext_request_storage* req); + +typedef int (*memtext_callback_cas)( + void* user, memtext_command cmd, + memtext_request_cas* req); + +typedef int (*memtext_callback_delete)( + void* user, memtext_command cmd, + memtext_request_delete* req); + +typedef int (*memtext_callback_numeric)( + void* user, memtext_command cmd, + memtext_request_numeric* req); + +typedef struct { + memtext_callback_retrieval cmd_get; + memtext_callback_storage cmd_set; + memtext_callback_storage cmd_add; + memtext_callback_storage cmd_replace; + memtext_callback_storage cmd_append; + memtext_callback_storage cmd_prepend; + memtext_callback_cas cmd_cas; + memtext_callback_delete cmd_delete; + memtext_callback_numeric cmd_incr; + memtext_callback_numeric cmd_decr; +} memtext_callback; + +typedef struct { + size_t data_count; + + int cs; + int top; + int stack[1]; + + memtext_command command; + + size_t key_pos[MEMTEXT_MAX_MULTI_GET]; + unsigned int key_len[MEMTEXT_MAX_MULTI_GET]; + unsigned int keys; + + size_t flags; + uint32_t exptime; + size_t bytes; + bool noreply; + uint64_t cas_unique; + + size_t data_pos; + unsigned int data_len; + + memtext_callback callback; + + void* user; +} memtext_parser; + +void memtext_init(memtext_parser* ctx, memtext_callback* callback, void* user); +int memtext_execute(memtext_parser* ctx, const char* data, size_t len, size_t* off); + +#ifdef __cplusplus +} +#endif + +#endif /* memtext.h */ + diff --git a/src/logic/gateway/memproto/memtext.rl b/src/logic/gateway/memproto/memtext.rl new file mode 100644 index 0000000..cbdacd6 --- /dev/null +++ b/src/logic/gateway/memproto/memtext.rl @@ -0,0 +1,342 @@ +/* + * memtext + * + * Copyright (C) 2008 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "memtext.h" +#include +#include +#include + +#define MARK(M, FPC) (ctx->M = FPC - data) +#define MARK_LEN(M, FPC) (FPC - (ctx->M + data)) +#define MARK_PTR(M) (ctx->M + data) + +#define NUM_BUF_MAX 20 + +#define SET_INTEGER(DST, M, FPC, STRFUNC) \ + do { \ + pos = MARK_PTR(M); \ + if(pos[0] == '0') { ctx->DST = 0; } \ + else { \ + len = MARK_LEN(M, FPC); \ + if(len > NUM_BUF_MAX) { goto convert_error; } \ + memcpy(numbuf, pos, len); \ + numbuf[len] = '\0'; \ + ctx->DST = STRFUNC(numbuf, NULL, 10); \ + if(ctx->DST == 0) { goto convert_error; } \ + } \ + } while(0) + +#define SET_UINT(DST, M, FPC) \ + SET_INTEGER(DST, M, FPC, strtoul) + +#define SET_ULL(DST, M, FPC) \ + SET_INTEGER(DST, M, FPC, strtoull) + +#define SET_MARK_LEN(DST, M, FPC) \ + ctx->DST = MARK_LEN(M, FPC); + +#define CALLBACK(NAME, TYPE) \ + TYPE NAME = ((TYPE*)(&ctx->callback))[ctx->command] + +%%{ + machine memtext; + + action reset { + ctx->keys = 0; + ctx->noreply = false; + ctx->exptime = 0; + } + + action mark_key { + MARK(key_pos[ctx->keys], fpc); + } + action key { + SET_MARK_LEN(key_len[ctx->keys], key_pos[ctx->keys], fpc); + } + action incr_key { + ++ctx->keys; + if(ctx->keys > MEMTEXT_MAX_MULTI_GET) { + goto convert_error; + } + } + + action mark_flags { + MARK(flags, fpc); + } + action flags { + SET_UINT(flags, flags, fpc); + } + + action mark_exptime { + MARK(exptime, fpc); + } + action exptime { + SET_UINT(exptime, exptime, fpc); + } + + action mark_bytes { + MARK(bytes, fpc); + } + action bytes { + SET_UINT(bytes, bytes, fpc); + } + + action noreply { + ctx->noreply = true; + } + + action mark_cas_unique { + MARK(cas_unique, fpc); + } + action cas_unique { + SET_ULL(cas_unique, cas_unique, fpc); + } + + action data_start { + MARK(data_pos, fpc+1); + ctx->data_count = ctx->bytes; + fcall data; + } + action data { + if(--ctx->data_count == 0) { + //printf("mark %d\n", ctx->data_pos); + //printf("fpc %p\n", fpc); + //printf("data %p\n", data); + SET_MARK_LEN(data_len, data_pos, fpc+1); + fret; + } + } + + + action cmd_get { ctx->command = MEMTEXT_CMD_GET; } + action cmd_set { ctx->command = MEMTEXT_CMD_SET; } + action cmd_add { ctx->command = MEMTEXT_CMD_ADD; } + action cmd_replace { ctx->command = MEMTEXT_CMD_REPLACE; } + action cmd_append { ctx->command = MEMTEXT_CMD_APPEND; } + action cmd_prepend { ctx->command = MEMTEXT_CMD_PREPEND; } + action cmd_cas { ctx->command = MEMTEXT_CMD_CAS; } + action cmd_delete { ctx->command = MEMTEXT_CMD_DELETE; } + action cmd_incr { ctx->command = MEMTEXT_CMD_INCR; } + action cmd_decr { ctx->command = MEMTEXT_CMD_DECR; } + + + action do_retrieval { + unsigned int i; + ++ctx->keys; + for(i=0; i < ctx->keys; ++i) { + ctx->key_pos[i] = (size_t)MARK_PTR(key_pos[i]); + } + CALLBACK(cb, memtext_callback_retrieval); + if(cb) { + memtext_request_retrieval req = { + (const char**)ctx->key_pos, + ctx->key_len, + ctx->keys + }; + if((*cb)(ctx->user, ctx->command, &req) < 0) { + goto convert_error; + } + } else { goto convert_error; } + } + + action do_storage { + CALLBACK(cb, memtext_callback_storage); + if(cb) { + memtext_request_storage req = { + MARK_PTR(key_pos[0]), ctx->key_len[0], + MARK_PTR(data_pos), ctx->data_len, + ctx->flags, + ctx->exptime, + ctx->noreply + }; + if((*cb)(ctx->user, ctx->command, &req) < 0) { + goto convert_error; + } + } else { goto convert_error; } + } + + action do_cas { + CALLBACK(cb, memtext_callback_cas); + if(cb) { + memtext_request_cas req = { + MARK_PTR(key_pos[0]), ctx->key_len[0], + MARK_PTR(data_pos), ctx->data_len, + ctx->flags, + ctx->exptime, + ctx->cas_unique, + ctx->noreply + }; + if((*cb)(ctx->user, ctx->command, &req) < 0) { + goto convert_error; + } + } else { goto convert_error; } + } + + action do_delete { + CALLBACK(cb, memtext_callback_delete); + if(cb) { + memtext_request_delete req = { + MARK_PTR(key_pos[0]), ctx->key_len[0], + ctx->exptime, ctx->noreply + }; + if((*cb)(ctx->user, ctx->command, &req) < 0) { + goto convert_error; + } + } else { goto convert_error; } + } + + action do_numeric { + CALLBACK(cb, memtext_callback_numeric); + if(cb) { + memtext_request_numeric req = { + MARK_PTR(key_pos[0]), ctx->key_len[0], + ctx->cas_unique, ctx->noreply + }; + if((*cb)(ctx->user, ctx->command, &req) < 0) { + goto convert_error; + } + } else { goto convert_error; } + } + + key = ([^\r \0\n]+) >mark_key %key; + #key = ([\!-\~]+) >mark_key %key; + flags = ('0' | [1-9][0-9]*) >mark_flags %flags; + exptime = ('0' | [1-9][0-9]*) >mark_exptime %exptime; + bytes = ([1-9][0-9]*) >mark_bytes %bytes; + noreply = ('noreply') %noreply; + cas_unique = ('0' | [1-9][0-9]*) >mark_cas_unique %cas_unique; + + + retrieval_command = ('get' 's'?) @cmd_get; + + storage_command = ('set' ) @cmd_set + | ('add' ) @cmd_add + | ('replace' ) @cmd_replace + | ('append' ) @cmd_append + | ('prepend' ) @cmd_prepend + ; + + cas_command = ('cas') @cmd_cas; + + delete_command = ('delete') @cmd_delete; + + numeric_command = ('incr') @cmd_incr + | ('decr') @cmd_decr + ; + + retrieval = retrieval_command ' ' key (' ' key >incr_key)* + ' '? # workaraound for libmemcached + '\r\n'; + + storage = storage_command ' ' key + ' ' flags ' ' exptime ' ' bytes + (' ' noreply)? + ' '? # workaraound for apr_memcache + '\r\n' + @data_start + '\r\n' + ; + + cas = cas_command ' ' key + ' ' flags ' ' exptime ' ' bytes + ' ' cas_unique + (' ' noreply)? + '\r\n' + @data_start + '\r\n' + ; + + delete = delete_command ' ' key + (' ' exptime)? (' ' noreply)? + '\r\n' + ; + + numeric = numeric_command ' ' key + ' ' cas_unique # cas_unique => value + (' ' noreply)? + '\r\n' + ; + + command = retrieval @do_retrieval + | storage @do_storage + | cas @do_cas + | delete @do_delete + | numeric @do_numeric + ; + +main := (command >reset)+; + +data := (any @data)*; +}%% + + +%% write data; + +void memtext_init(memtext_parser* ctx, memtext_callback* callback, void* user) +{ + int cs = 0; + int top = 0; + %% write init; + memset(ctx, 0, sizeof(memtext_parser)); + ctx->cs = cs; + ctx->callback = *callback; + ctx->user = user; +} + +int memtext_execute(memtext_parser* ctx, const char* data, size_t len, size_t* off) +{ + if(len <= *off) { return 0; } + + const char* p = data + *off; + const char* pe = data + len; + const char* eof = pe; + int cs = ctx->cs; + int top = ctx->top; + int* stack = ctx->stack; + const char* pos; + char numbuf[NUM_BUF_MAX+1]; + + //printf("execute, len:%lu, off:%lu\n", len, *off); + //printf("%s\n", data); + //printf("data: "); + //int i; + //for(i=0; i < len; ++i) { + // printf("0x%x ", (int)data[i]); + //} + //printf("\n"); + + %% write exec; + +ret: + ctx->cs = cs; + ctx->top = top; + *off = p - data; + + if(cs == memtext_error) { + return -1; + } else if(cs == memtext_first_final) { + return 1; + } else { + return 0; + } + +convert_error: + cs = memtext_error; + goto ret; +} + diff --git a/src/logic/gateway/proto_network.cc b/src/logic/gateway/proto_network.cc new file mode 100644 index 0000000..7dc5b8c --- /dev/null +++ b/src/logic/gateway/proto_network.cc @@ -0,0 +1,84 @@ +#include "gateway/framework.h" +#include "gateway/proto_network.h" +#include "manager/proto_network.h" + +namespace kumo { +namespace gateway { + + +RPC_IMPL(proto_network, HashSpacePush_1, req, z, response) +try { + LOG_DEBUG("HashSpacePush"); + + pthread_scoped_wrlock hslk(share->hs_rwlock()); + + if(share->whs().empty() || + share->whs().clocktime() <= req.param().wseed.clocktime()) { + share->whs() = HashSpace(req.param().wseed); + } + + if(share->rhs().empty() || + share->rhs().clocktime() <= req.param().rseed.clocktime()) { + share->rhs() = HashSpace(req.param().rseed); + } + + response.result(true); +} +RPC_CATCH(HashSpacePush, response) + + +void proto_network::renew_hash_space() +{ + shared_zone nullz; + manager::proto_network::HashSpaceRequest_1 param; + + rpc::callback_t callback( BIND_RESPONSE(proto_network, HashSpaceRequest_1) ); + + net->get_server(share->manager1())->call( + param, nullz, callback, 10); + + if(share->manager2().connectable()) { + net->get_server(share->manager2())->call( + param, nullz, callback, 10); + } +} + +void proto_network::renew_hash_space_for(const address& addr) +{ + shared_session ns(net->get_server(addr)); + shared_zone nullz; + manager::proto_network::HashSpaceRequest_1 param; + ns->call(param, nullz, + BIND_RESPONSE(proto_network, HashSpaceRequest_1), 10); +} + +RPC_REPLY_IMPL(proto_network, HashSpaceRequest_1, from, res, err, life) +{ + if(!err.is_nil()) { + LOG_DEBUG("HashSpaceRequest failed ",err); + if(SESSION_IS_ACTIVE(from)) { + shared_zone nullz; + manager::proto_network::HashSpaceRequest_1 param; + + from->call(param, nullz, + BIND_RESPONSE(proto_network, HashSpaceRequest_1), 10); + } // retry on Gateway::session_lost() if the node is lost + } else { + gateway::proto_network::HashSpacePush_1 st(res.convert()); + + pthread_scoped_wrlock hslk(share->hs_rwlock()); + if(share->whs().empty() || + share->whs().clocktime() <= st.wseed.clocktime()) { + share->whs() = HashSpace(st.wseed); + } + if(share->rhs().empty() || + share->rhs().clocktime() <= st.rseed.clocktime()) { + share->rhs() = HashSpace(st.rseed); + } + } +} + + +} // namespace gateway +} // namespace kumo + diff --git a/src/logic/gateway/scope_store.cc b/src/logic/gateway/scope_store.cc new file mode 100644 index 0000000..ff55ed0 --- /dev/null +++ b/src/logic/gateway/scope_store.cc @@ -0,0 +1,332 @@ +#include "gateway/framework.h" +#include + +namespace kumo { +namespace gateway { + + +scope_store::scope_store() : m_error_count(0) +{ } + +scope_store::~scope_store() { } + + +template +framework::shared_session scope_store::server_for(uint64_t h, unsigned int offset) +{ +#if NUM_REPLICATION != 2 +#error fix following code +#endif + assert(offset == 0 || offset == 1 || offset == 2); + + pthread_scoped_rdlock hslk(share->hs_rwlock()); + + if((Hs == HS_WRITE ? share->whs() : share->rhs()).empty()) { + net->scope_proto_network().renew_hash_space(); // FIXME may burst + throw std::runtime_error("No server"); + } + HashSpace::iterator it = + (Hs == HS_WRITE ? share->whs() : share->rhs()).find(h); + + { + if(offset == 0) { + if(it->is_active()) { goto node_found; } + } else { --offset; } + + HashSpace::iterator origin(it); + ++it; + for(; it != origin; ++it) { + if(*it == *origin) { continue; } + + if(offset == 0) { + if(it->is_active()) { goto node_found; } + } else { --offset; } + + HashSpace::node rep1 = *it; + ++it; + for(; it != origin; ++it) { + if(*it == *origin || *it == rep1) { continue; } + HashSpace::node _rep2_ = *it; + + if(offset == 0) { + if(it->is_active()) { goto node_found; } + } else { --offset; } + + break; + } + break; + } + } + +node_found: + address addr = it->addr(); + hslk.unlock(); + return net->get_server(addr); +} + + +// FIXME submit callback? +#define GATEWAY_CATCH(NAME, response_type) \ +catch (msgpack::type_error& e) { \ + LOG_WARN(#NAME " FAILED: type error"); \ + response_type res; \ + res.life = life; \ + res.error = 1; \ + wavy::submit(*callback, user, res); \ +} catch (std::exception& e) { \ + LOG_WARN(#NAME " FAILED: ",e.what()); \ + response_type res; \ + res.life = life; \ + res.error = 1; \ + wavy::submit(*callback, user, res); \ +} catch (...) { \ + LOG_WARN(#NAME " FAILED: unknown error"); \ + response_type res; \ + res.life = life; \ + res.error = 1; \ + wavy::submit(*callback, user, res); \ +} + + +void scope_store::Get(void (*callback)(void*, get_response&), void* user, + shared_zone life, + const char* key, uint32_t keylen, uint64_t hash) +try { + if(!life) { life.reset(new msgpack::zone()); } + rpc::retry* retry = + life->allocate< rpc::retry >( + server::proto_store::Get_1( + msgtype::DBKey(key, keylen, hash) + )); + + retry->set_callback( BIND_RESPONSE(scope_store, Get_1, retry, callback, user) ); + retry->call(server_for(hash), life, 10); +} +GATEWAY_CATCH(Get, get_response) + + +void scope_store::Set(void (*callback)(void*, set_response&), void* user, + shared_zone life, + const char* key, uint32_t keylen, uint64_t hash, + const char* val, uint32_t vallen) +try { + uint64_t meta = 0; + if(!life) { life.reset(new msgpack::zone()); } + rpc::retry* retry = + life->allocate< rpc::retry >( + server::proto_store::Set_1( + ( share->cfg_async_replicate_set() ? + static_cast(server::store_flags_async()) : + static_cast(server::store_flags_none() ) ), + msgtype::DBKey(key, keylen, hash), + msgtype::DBValue(val, vallen, meta)) + ); + + retry->set_callback( BIND_RESPONSE(scope_store, Set_1, retry, callback, user) ); + retry->call(server_for(hash), life, 10); +} +GATEWAY_CATCH(Set, set_response) + + +void scope_store::Delete(void (*callback)(void*, delete_response&), void* user, + shared_zone life, + const char* key, uint32_t keylen, uint64_t hash) +try { + if(!life) { life.reset(new msgpack::zone()); } + rpc::retry* retry = + life->allocate< rpc::retry >( + server::proto_store::Delete_1( + (share->cfg_async_replicate_delete() ? + static_cast(server::store_flags_async()) : + static_cast(server::store_flags_none() ) ), + msgtype::DBKey(key, keylen, hash)) + ); + + retry->set_callback( BIND_RESPONSE(scope_store, Delete_1, retry, callback, user) ); + retry->call(server_for(hash), life, 10); +} +GATEWAY_CATCH(Delete, delete_response) + + +RPC_REPLY_IMPL(scope_store, Get_1, from, res, err, life, + rpc::retry* retry, + void (*callback)(void*, get_response&), void* user) +try { + msgtype::DBKey key(retry->param().dbkey); + LOG_TRACE("ResGet ",err); + + if(err.is_nil()) { + get_response ret; + ret.error = 0; + ret.life = life; + ret.key = key.data(); + ret.keylen = key.size(); + ret.hash = key.hash(); + if(res.is_nil()) { + ret.val = NULL; + ret.vallen = 0; + ret.clocktime = 0; + } else { + msgtype::DBValue st(res.convert()); + ret.val = (char*)st.data(); + ret.vallen = st.size(); + ret.clocktime = st.clocktime().get(); + } + try { (*callback)(user, ret); } catch (...) { } + + } else if( retry->retry_incr((NUM_REPLICATION+1) * share->cfg_get_retry_num() - 1) ) { + incr_error_count(); + unsigned short offset = retry->num_retried() % (NUM_REPLICATION+1); + retry->call(server_for(key.hash(), offset), life, 10); + LOG_INFO("Get error: ",err,", fallback to offset +",offset," node"); + + } else { + if(err.via.u64 == (uint64_t)rpc::protocol::TRANSPORT_LOST_ERROR || + err.via.u64 == (uint64_t)rpc::protocol::SERVER_ERROR) { + net->scope_proto_network().renew_hash_space(); // FIXME + } + get_response ret; + ret.error = 1; // ERROR + ret.life = life; + ret.key = key.data(); + ret.keylen = key.size(); + ret.hash = key.hash(); + ret.val = NULL; + ret.vallen = 0; + ret.clocktime = 0; + try { (*callback)(user, ret); } catch (...) { } + LOGPACK("eg",2, + "key",msgtype::raw_ref(key.data(),key.size()), + "err",err.via.u64); + LOG_ERROR("Get error: ", err); + } +} +GATEWAY_CATCH(ResGet, get_response) + + +RPC_REPLY_IMPL(scope_store, Set_1, from, res, err, life, + rpc::retry* retry, + void (*callback)(void*, set_response&), void* user) +try { + msgtype::DBKey key(retry->param().dbkey); + msgtype::DBValue val(retry->param().dbval); + LOG_TRACE("ResSet ",err); + + if(!res.is_nil()) { + msgpack::type::tuple st(res); + set_response ret; + ret.error = 0; + ret.life = life; + ret.key = key.data(); + ret.keylen = key.size(); + ret.hash = key.hash(); + ret.val = val.data(); + ret.vallen = val.size(); + ret.clocktime = st.get<0>(); + try { (*callback)(user, ret); } catch (...) { } + + } else if( retry->retry_incr(share->cfg_set_retry_num()) ) { + incr_error_count(); + if(!SESSION_IS_ACTIVE(from)) { + //FIXME this check is not atomic. it may throw "session not bound" error + //FIXME XXX noew rpc::basic_session::call does'nt throw "session not bound" error. + // FIXME renew hash space? + // FIXME delayed retry + from = server_for(key.hash()); + } + retry->call(from, life, 10); + LOG_WARN("Set error: ",err,", retry ",retry->num_retried()); + + } else { + if(err.via.u64 == (uint64_t)rpc::protocol::TRANSPORT_LOST_ERROR || + err.via.u64 == (uint64_t)rpc::protocol::SERVER_ERROR) { + net->scope_proto_network().renew_hash_space(); // FIXME + } + set_response ret; + ret.error = 1; // ERROR + ret.life = life; + ret.key = key.data(); + ret.keylen = key.size(); + ret.hash = key.hash(); + ret.val = val.data(); + ret.vallen = val.size(); + ret.clocktime = 0; + try { (*callback)(user, ret); } catch (...) { } + LOGPACK("es",2, + "key",msgtype::raw_ref(key.data(),key.size()), + "val",msgtype::raw_ref(val.data(),val.size()), + "err",err.via.u64); + LOG_ERROR("Set error: ",err); + } +} +GATEWAY_CATCH(ResSet, set_response) + + +RPC_REPLY_IMPL(scope_store, Delete_1, from, res, err, life, + rpc::retry* retry, + void (*callback)(void*, delete_response&), void* user) +try { + msgtype::DBKey key(retry->param().dbkey); + LOG_TRACE("ResDelete ",err); + + if(!res.is_nil()) { + bool st(res.convert()); + delete_response ret; + ret.error = 0; + ret.life = life; + ret.key = key.data(); + ret.keylen = key.size(); + ret.hash = key.hash(); + ret.deleted = st; + try { (*callback)(user, ret); } catch (...) { } + + } else if( retry->retry_incr(share->cfg_delete_retry_num()) ) { + incr_error_count(); + if(!SESSION_IS_ACTIVE(from)) { + //FIXME this check is not atomic. it may throw "session not bound" error + //FIXME XXX noew rpc::basic_session::call does'nt throw "session not bound" error. + // FIXME renew hash space? + // FIXME delayed retry + from = server_for(key.hash()); + } + retry->call(from, life, 10); + LOG_WARN("Delete error: ",err,", retry ",retry->num_retried()); + + } else { + if(err.via.u64 == (uint64_t)rpc::protocol::TRANSPORT_LOST_ERROR || + err.via.u64 == (uint64_t)rpc::protocol::SERVER_ERROR) { + net->scope_proto_network().renew_hash_space(); // FIXME + } + delete_response ret; + ret.error = 1; // ERROR + ret.life = life; + ret.key = key.data(); + ret.keylen = key.size(); + ret.hash = key.hash(); + ret.deleted = false; + try { (*callback)(user, ret); } catch (...) { } + LOGPACK("ed",2, + "key",msgtype::raw_ref(key.data(),key.size()), + "err",err.via.u64); + LOG_ERROR("Delete error: ",err); + } +} +GATEWAY_CATCH(ResDelete, delete_response) + + +void scope_store::incr_error_count() +{ + LOG_DEBUG("increment error count ",m_error_count); + if(m_error_count >= share->cfg_renew_threshold()) { + m_error_count = 0; + net->scope_proto_network().renew_hash_space(); + sleep(1); // FIXME ad-hoc delay + } else { + ++m_error_count; + } +} + + +} // namespace gateway +} // namespace kumo + diff --git a/src/logic/gateway/scope_store.h b/src/logic/gateway/scope_store.h new file mode 100644 index 0000000..24c7c69 --- /dev/null +++ b/src/logic/gateway/scope_store.h @@ -0,0 +1,59 @@ +#ifndef GATEWAY_SCOPE_STORE_H__ +#define GATEWAY_SCOPE_STORE_H__ + +#include "gateway/interface.h" +#include "server/proto_store.h" + +namespace kumo { +namespace gateway { + + +class scope_store { +public: + scope_store(); + ~scope_store(); + +public: + void Get(void (*callback)(void*, get_response&), void* user, + shared_zone life, + const char* key, uint32_t keylen, uint64_t hash); + + void Set(void (*callback)(void*, set_response&), void* user, + shared_zone life, + const char* key, uint32_t keylen, uint64_t hash, + const char* val, uint32_t vallen); + + void Delete(void (*callback)(void*, delete_response&), void* user, + shared_zone life, + const char* key, uint32_t keylen, uint64_t hash); + +private: + RPC_REPLY_DECL(Get_1, from, res, err, life, + rpc::retry* retry, + void (*callback)(void*, get_response&), void* user); + + RPC_REPLY_DECL(Set_1, from, res, err, life, + rpc::retry* retry, + void (*callback)(void*, set_response&), void* user); + + RPC_REPLY_DECL(Delete_1, from, res, err, life, + rpc::retry* retry, + void (*callback)(void*, delete_response&), void* user); + + enum hash_space_type { + HS_WRITE, + HS_READ, + }; + template + shared_session server_for(uint64_t h, unsigned int offset = 0); + + void incr_error_count(); + unsigned short m_error_count; +}; + + +} // namespace gateway +} // namespace kumo + +#endif /* gateway/scope_store.h */ + diff --git a/src/logic/global.h b/src/logic/global.h new file mode 100644 index 0000000..b2d630a --- /dev/null +++ b/src/logic/global.h @@ -0,0 +1,28 @@ +#ifndef LOGIC_GLOBAL_H__ +#define LOGIC_GLOBAL_H__ + +#include "log/mlogger.h" + +#define NUM_REPLICATION 2 + +#ifndef MANAGER_DEFAULT_PORT +#define MANAGER_DEFAULT_PORT 19700 +#endif + +#ifndef SERVER_DEFAULT_PORT +#define SERVER_DEFAULT_PORT 19800 +#endif + +#ifndef SERVER_STREAM_DEFAULT_PORT +#define SERVER_STREAM_DEFAULT_PORT 19900 +#endif + +#ifndef CONTROL_DEFAULT_PORT +#define CONTROL_DEFAULT_PORT 19750 +#endif + +// FIXME VERSION +#define VERSION "0.1.0" + +#endif /* logic/global.h */ + diff --git a/src/logic/hash.cc b/src/logic/hash.cc new file mode 100644 index 0000000..60f1f15 --- /dev/null +++ b/src/logic/hash.cc @@ -0,0 +1,137 @@ +#include "logic/hash.h" +#include "log/mlogger.h" +#include + +namespace kumo { + +static const size_t HASHSPACE_VIRTUAL_NODE_NUMBER = 128; + + +HashSpace::HashSpace(ClockTime clocktime) : + m_timestamp(clocktime) {} + +HashSpace::~HashSpace() {} + + +/* +class HashFunction { +public: + HashFunction() { SHA1_Init(&m_ctx); } + uint64_t operator() (const char* data, unsigned long datalen) + { + SHA1_Update(&m_ctx, data, datalen); + SHA1_Final(m_buf, &m_ctx); + return *(uint64_t*)m_buf; // FIXME endian + } +private: + SHA_CTX m_ctx; + unsigned char m_buf[SHA_DIGEST_LENGTH]; +}; +static HashFunction HashFunction_; +*/ + +uint64_t HashSpace::hash(const char* data, unsigned long len) +{ + // FIXME thread-safety with thread local storage + //return HashFunction_(data, len); + unsigned char buf[SHA_DIGEST_LENGTH]; + SHA1((unsigned const char*)data, len, buf); + return *(uint64_t*)buf; // FIXME endian? +} + +void HashSpace::add_server(ClockTime clocktime, const address& addr) +{ + m_timestamp = clocktime; + m_nodes.push_back( node(addr,true) ); + add_virtual_nodes(m_nodes.back()); + std::sort(m_hashspace.begin(), m_hashspace.end()); +} + +bool HashSpace::remove_server(ClockTime clocktime, const address& addr) +{ + nodes_t::iterator it = + std::find_if(m_nodes.begin(), m_nodes.end(), + node_address_equal(addr)); + if(it != m_nodes.end()) { + m_nodes.erase(it); + m_timestamp = clocktime; + rehash(); + return true; + } + return false; +} + +bool HashSpace::fault_server(ClockTime clocktime, const address& addr) +{ + nodes_t::iterator it = + std::find_if(m_nodes.begin(), m_nodes.end(), + node_address_equal(addr)); + if(it != m_nodes.end()) { + it->fault(); + m_timestamp = clocktime; + return true; + } + return false; +} + +bool HashSpace::recover_server(ClockTime clocktime, const address& addr) +{ + nodes_t::iterator it = + std::find_if(m_nodes.begin(), m_nodes.end(), + node_address_equal(addr)); + if(it != m_nodes.end()) { + it->recover(); + m_timestamp = clocktime; + return true; + } + return false; +} + +bool HashSpace::remove_fault_servers(ClockTime clocktime) +{ + bool ret = false; + for(nodes_t::iterator it(m_nodes.begin()); it != m_nodes.end(); ) { + if(!it->is_active()) { + ret = true; + it = m_nodes.erase(it); + } else { + ++it; + } + } + if(ret) { + m_timestamp = clocktime; + rehash(); + return true; + } + return false; +} + +void HashSpace::add_virtual_nodes(const node& n) +{ + uint64_t x = HashSpace::hash(n.addr().dump(), n.addr().dump_size()); + m_hashspace.push_back( virtual_node(x, n) ); + for(size_t i=1; i < HASHSPACE_VIRTUAL_NODE_NUMBER; ++i) { + // FIXME use another hash function? + x = HashSpace::hash((const char*)&x, sizeof(uint64_t)); + m_hashspace.push_back( virtual_node(x, n) ); + } +} + +void HashSpace::rehash() +{ + m_hashspace.clear(); + for(nodes_t::const_iterator it(m_nodes.begin()), it_end(m_nodes.end()); + it != it_end; ++it) { + add_virtual_nodes(*it); + } + std::sort(m_hashspace.begin(), m_hashspace.end()); + + for(hashspace_t::const_iterator x(m_hashspace.begin()), x_end(m_hashspace.end()); + x != x_end; ++x) { + LOG_TRACE("virtual node dump: ",x->hash(),":",x->real()); + } +} + + +} // namespace kumo + diff --git a/src/logic/hash.h b/src/logic/hash.h new file mode 100644 index 0000000..7a6915f --- /dev/null +++ b/src/logic/hash.h @@ -0,0 +1,299 @@ +#ifndef LOGIC_HASH_H__ +#define LOGIC_HASH_H__ + +#include "rpc/address.h" +#include "logic/clock.h" +#include +#include +#include + +namespace kumo { + + +using rpc::address; + + +class HashSpace { +public: + class Seed; + + HashSpace(ClockTime clocktime = ClockTime(0,0)); + HashSpace(Seed& seed); + ~HashSpace(); + +public: + class node { + public: + node() {} + node(const address& addr, bool active) : m_addr(addr), m_active(active) {} + public: + const address& addr() const { return m_addr; } + bool is_active() const { return m_active; } + void fault() { m_active = false; } + void recover() { m_active = true; } + bool operator== (const node& other) const; + private: + address m_addr; + bool m_active; + }; + + struct node_address_equal; + +private: + struct virtual_node { + virtual_node(uint64_t h) : m_hash(h) {} + virtual_node(uint64_t h, const node& r) : m_hash(h), m_real(r) {} + public: + uint64_t hash() const { return m_hash; } + const node& real() const { return m_real; } + bool operator< (const virtual_node& other) const + { + return m_hash < other.m_hash; + } + private: + uint64_t m_hash; + node m_real; + }; + + // sorted vector + typedef std::vector hashspace_t; + hashspace_t m_hashspace; + + typedef std::vector nodes_t; + nodes_t m_nodes; + + ClockTime m_timestamp; + +public: + class iterator; + + iterator find(uint64_t h) const; + + size_t active_node_count() const; + void get_active_nodes(std::vector
& result) const; + +public: + void add_server(ClockTime clocktime, const address& addr); + bool remove_server(ClockTime clocktime, const address& addr); + bool fault_server(ClockTime clocktime, const address& addr); + bool recover_server(ClockTime clocktime, const address& addr); + bool remove_fault_servers(ClockTime clocktime); + + bool empty() const; + + const ClockTime& clocktime() const + { return m_timestamp; } + + bool operator== (const HashSpace& other) const + { return m_nodes == other.m_nodes; } + + void nodes_diff(const HashSpace& other, std::vector
& result) const; + + bool server_is_include(const address& addr) const; + bool server_is_active(const address& addr) const; + bool server_is_fault(const address& addr) const; + +private: + void add_virtual_nodes(const node& n); + void rehash(); + +public: + static uint64_t hash(const char* data, unsigned long len); + +public: + friend class Seed; + bool operator== (const Seed& other) const; +}; + + +class HashSpace::iterator { +public: + iterator(const hashspace_t& hs, hashspace_t::const_iterator it) : + m_it(it), m_hashspace(hs) {} + ~iterator() {} +public: + iterator& operator++ () + { + ++m_it; + if(m_it == m_hashspace.end()) { + m_it = m_hashspace.begin(); + } + return *this; + } + + bool operator== (const iterator& it) const + { + return m_it == it.m_it; + } + + bool operator!= (const iterator& it) const + { + return !(*this == it); + } + + const node& operator* () const + { + return m_it->real(); + } + + const node* operator-> () const + { + return &m_it->real(); + } + +private: + hashspace_t::const_iterator m_it; + const hashspace_t& m_hashspace; +}; + + +inline bool HashSpace::node::operator== (const node& other) const +{ + return m_active == other.m_active && m_addr == other.m_addr; +} + +struct HashSpace::node_address_equal { + node_address_equal(const address& a) : m(a) {} + bool operator() (const HashSpace::node& other) const + { + return m == other.addr(); + } +private: + const address& m; +}; + + +inline std::ostream& operator<< (std::ostream& stream, const HashSpace::node& n) +{ + return stream << n.addr() << '(' << (n.is_active() ? "active" : "fault") << ')'; +} + +inline HashSpace::node& operator>> (msgpack::object o, HashSpace::node& v) +{ + using namespace msgpack; + if(o.type != type::RAW) { throw type_error(); } + address addr(o.via.raw.ptr+1, o.via.raw.size-1); // sie is checked in address::address + bool active = o.via.raw.ptr[0] != 0; + v = HashSpace::node(addr, active); + return v; +} + +template +inline msgpack::packer& operator<< (msgpack::packer& o, const HashSpace::node& v) +{ + using namespace msgpack; + o.pack_raw(1 + v.addr().dump_size()); + char a = v.is_active() ? 1 : 0; + o.pack_raw_body(&a, 1); + o.pack_raw_body(v.addr().dump(), v.addr().dump_size()); + return o; +} + + +class HashSpace::Seed : public msgpack::define< + msgpack::type::tuple > { +public: + Seed() { } + Seed(HashSpace& hs) : + define_type(msgpack_type( hs.m_nodes, hs.m_timestamp.get() )) {} + const nodes_t& nodes() const { return get<0>(); } + uint64_t clocktime() const { return get<1>(); } + bool empty() const { return get<0>().empty(); } +}; + +inline HashSpace::HashSpace(Seed& seed) : + m_nodes(seed.nodes()), m_timestamp(seed.clocktime()) +{ + rehash(); +} + +inline bool HashSpace::operator== (const Seed& other) const +{ + return m_timestamp.get() == other.clocktime() && m_nodes == other.nodes(); +} + + +inline HashSpace::iterator HashSpace::find(uint64_t h) const +{ + hashspace_t::const_iterator it( + std::lower_bound(m_hashspace.begin(), m_hashspace.end(), virtual_node(h)) + ); + if(it == m_hashspace.end()) { + return iterator(m_hashspace, m_hashspace.begin()); + } else { + return iterator(m_hashspace, it); + } +} + +inline bool HashSpace::empty() const +{ + for(nodes_t::const_iterator it(m_nodes.begin()), it_end(m_nodes.end()); + it != it_end; ++it) { + if(it->is_active()) { return false; } + } + return true; +} + +inline size_t HashSpace::active_node_count() const +{ + size_t n = 0; + for(nodes_t::const_iterator it(m_nodes.begin()), it_end(m_nodes.end()); + it != it_end; ++it) { + if(it->is_active()) { ++n; } + } + return n; +} + +inline void HashSpace::get_active_nodes(std::vector
& result) const +{ + for(nodes_t::const_iterator it(m_nodes.begin()), it_end(m_nodes.end()); + it != it_end; ++it) { + if(it->is_active()) { result.push_back(it->addr()); } + } +} + +inline void HashSpace::nodes_diff(const HashSpace& other, std::vector
& result) const +{ + for(nodes_t::const_iterator it(m_nodes.begin()), it_end(m_nodes.end()); + it != it_end; ++it) { + if(std::find_if(other.m_nodes.begin(), other.m_nodes.end(), + node_address_equal(it->addr())) == other.m_nodes.end()) { + result.push_back(it->addr()); + } + } +} + +inline bool HashSpace::server_is_include(const address& addr) const +{ + return std::find_if( + m_nodes.begin(), m_nodes.end(), + node_address_equal(addr)) != m_nodes.end(); +} + +inline bool HashSpace::server_is_active(const address& addr) const +{ + nodes_t::const_iterator it = std::find_if( + m_nodes.begin(), m_nodes.end(), + node_address_equal(addr)); + if(it != m_nodes.end() && it->is_active()) { + return true; + } + return false; +} + +inline bool HashSpace::server_is_fault(const address& addr) const +{ + nodes_t::const_iterator it = std::find_if( + m_nodes.begin(), m_nodes.end(), + node_address_equal(addr)); + if(it != m_nodes.end() && !it->is_active()) { + return true; + } + return false; +} + + +} // namespace kumo + +#endif /* logic/hash.h */ + diff --git a/src/logic/manager.proto.h b/src/logic/manager.proto.h new file mode 100644 index 0000000..7f501ca --- /dev/null +++ b/src/logic/manager.proto.h @@ -0,0 +1,189 @@ +#include "manager/proto.h" +#include "logic/msgtype.h" +#include "logic/cluster_logic.h" +#include +#include +#include + +namespace kumo { +namespace manager { + + +@message proto_network::KeepAlive = 0 +@message proto_network::HashSpaceRequest = 1 +@message proto_network::WHashSpaceRequest = 34 +@message proto_network::RHashSpaceRequest = 35 +@message proto_network::HashSpaceSync = 81 +@message proto_replace::ReplaceCopyEnd = 32 +@message proto_replace::ReplaceDeleteEnd = 33 +@message proto_replace::ReplaceElection = 80 + +@message proto_control::GetStatus = 84 +@message proto_control::AttachNewServers = 85 +@message proto_control::DetachFaultServers = 86 +@message proto_control::CreateBackup = 87 +@message proto_control::SetAutoReplace = 88 +@message proto_control::StartReplace = 89 + + +@rpc proto_network + message KeepAlive.1 +cluster { + uint32_t clock; + // ok: UNDEFINED + }; + + message HashSpaceRequest.1 { + // success: gateway::proto_network::HashSpacePush_1 + }; + + message WHashSpaceRequest.1 { + // success: hash_space:tuple,uint64_t> + }; + + message RHashSpaceRequest.1 { + // success: hash_space:tuple,uint64_t> + }; + + message HashSpaceSync.1 +cluster { + msgtype::HSSeed wseed; + msgtype::HSSeed rseed; + uint32_t clock; + // success: true + // obsolete: nil + }; + +public: + void keep_alive(); + + void sync_hash_space_servers(REQUIRE_HSLK, REQUIRE_SSLK); + void sync_hash_space_partner(REQUIRE_HSLK); + void push_hash_space_clients(REQUIRE_HSLK); + +private: + RPC_REPLY_DECL(KeepAlive_1, from, res, err, life); + + RPC_REPLY_DECL(HashSpaceSync_1, from, res, err, life); + RPC_REPLY_DECL(HashSpacePush_1, from, res, err, life); +@end + + +@code proto_replace +class framework; +@end + +@rpc proto_replace + message ReplaceCopyEnd.1 { + uint64_t clocktime; // FIXME ClockTime? + uint32_t clock; + // acknowledge: true + }; + + message ReplaceDeleteEnd.1 { + uint64_t clocktime; // FIXME ClockTime? + uint32_t clock; + // acknowledge: true + }; + + message ReplaceElection.1 +cluster { + msgtype::HSSeed hsseed; + uint32_t clock; + // sender of ReplaceElection is responsible for replacing: true + // receiver of ReplaceElection is responsible for replacing: nil + }; + +public: + proto_replace(); + ~proto_replace(); + +public: + void attach_new_servers(REQUIRE_HSLK); + void detach_fault_servers(REQUIRE_HSLK); + + void start_replace(REQUIRE_HSLK); + + void add_server(const address& addr, shared_node& s); + void remove_server(const address& addr); + +private: + void replace_election(); + + RPC_REPLY_DECL(ReplaceElection_1, from, res, err, life); + RPC_REPLY_DECL(ReplaceCopyStart_1, from, res, err, life); + RPC_REPLY_DECL(ReplaceDeleteStart_1, from, res, err, life); + + void finish_replace_copy(REQUIRE_RELK); + void finish_replace(REQUIRE_RELK); + +private: + class ReplaceContext { + public: + ReplaceContext(); + ~ReplaceContext(); + public: + ClockTime clocktime() const; + void reset(ClockTime ct, unsigned int num); + bool pop(ClockTime ct); + void invalidate(); + private: + unsigned int m_num; + ClockTime m_clocktime; + }; + + mp::pthread_mutex m_replace_mutex; + ReplaceContext m_copying; + ReplaceContext m_deleting; + + short m_delayed_replace_clock; + + void delayed_replace_election(); + void delayed_replace_election_step(); // called from frame + friend class framework; +@end + + +@rpc proto_control + message GetStatus.1 { + }; + + message AttachNewServers.1 { + bool replace; + }; + + message DetachFaultServers.1 { + bool replace; + }; + + message CreateBackup.1 { + std::string suffix; + }; + + message SetAutoReplace.1 { + bool enable; + }; + + message StartReplace.1 { + }; + + +public: + proto_control(); + ~proto_control(); + +public: + void listen_control(int lsock); + +private: + struct Status : msgpack::define< msgpack::type::tuple< + msgtype::HSSeed, std::vector
> > { + Status() { } + msgtype::HSSeed& hsseed() { return get<0>(); } + std::vector
& newcomers() { return get<1>(); } + }; + + RPC_REPLY_DECL(CreateBackup_1, from, res, err, life); +@end + + +} // namespace manager +} // namespace kumo + diff --git a/src/logic/manager/control_framework.cc b/src/logic/manager/control_framework.cc new file mode 100644 index 0000000..41857b9 --- /dev/null +++ b/src/logic/manager/control_framework.cc @@ -0,0 +1,51 @@ +#include "manager/framework.h" +#include "manager/control_framework.h" + +namespace kumo { +namespace manager { + + +control_framework::control_framework() { } +control_framework::~control_framework() { } + +void control_framework::dispatch( + rpc::shared_peer from, rpc::weak_responder response, + rpc::method_id method, rpc::msgobj param, rpc::auto_zone z) +{ + // FIXME try & catch + switch(method.get()) { + RPC_DISPATCH(proto_control, GetStatus_1); + RPC_DISPATCH(proto_control, AttachNewServers_1); + RPC_DISPATCH(proto_control, DetachFaultServers_1); + RPC_DISPATCH(proto_control, CreateBackup_1); + RPC_DISPATCH(proto_control, SetAutoReplace_1); + RPC_DISPATCH(proto_control, StartReplace_1); + default: + // FIXME exception class + throw std::runtime_error("unknown method"); + } +} + + +void control_framework::listen_control(int lsock) +{ + using namespace mp::placeholders; + wavy::listen(lsock, mp::bind( + &control_framework::control_checked_accepted, this, + _1, _2)); +} + +void control_framework::control_checked_accepted(int fd, int err) +{ + if(fd < 0) { + LOG_FATAL("accept failed: ",strerror(err)); + net->signal_end(); + return; + } + accepted(fd); +} + + +} // namespace manager +} // namespace kumo + diff --git a/src/logic/manager/control_framework.h b/src/logic/manager/control_framework.h new file mode 100644 index 0000000..59e1a52 --- /dev/null +++ b/src/logic/manager/control_framework.h @@ -0,0 +1,37 @@ +#ifndef MANAGER_CONTROL_FRAMEWORK_H__ +#define MANAGER_CONTROL_FRAMEWORK_H__ + +#include "logic/rpc_server.h" +#include "manager/proto_control.h" + +namespace kumo { +namespace manager { + + +class control_framework : public rpc::server { +public: + control_framework(); + ~control_framework(); + + void dispatch( + rpc::shared_peer from, rpc::weak_responder response, + rpc::method_id method, rpc::msgobj param, rpc::auto_zone z); + + void listen_control(int lsock); + +private: + proto_control m_proto_control; + +private: + void control_checked_accepted(int fd, int err); + +private: + control_framework(const control_framework&); +}; + + +} // namespace manager +} // namespace kumo + +#endif /* manager/framework.h */ + diff --git a/src/logic/manager/framework.cc b/src/logic/manager/framework.cc new file mode 100644 index 0000000..53a3eb2 --- /dev/null +++ b/src/logic/manager/framework.cc @@ -0,0 +1,96 @@ +#include "manager/framework.h" + +namespace kumo { +namespace manager { + + +std::auto_ptr net; +std::auto_ptr share; + + +void framework::cluster_dispatch( + shared_node from, weak_responder response, + rpc::method_id method, rpc::msgobj param, auto_zone z) +{ + // FIXME try & catch + switch(method.get()) { + RPC_DISPATCH(proto_network, KeepAlive_1); + RPC_DISPATCH(proto_network, WHashSpaceRequest_1); + RPC_DISPATCH(proto_network, RHashSpaceRequest_1); + RPC_DISPATCH(proto_network, HashSpaceSync_1); + RPC_DISPATCH(proto_replace, ReplaceCopyEnd_1); + RPC_DISPATCH(proto_replace, ReplaceDeleteEnd_1); + RPC_DISPATCH(proto_replace, ReplaceElection_1); + default: + // FIXME exception class + throw std::runtime_error("unknown method"); + } +} + +void framework::subsystem_dispatch( + shared_peer from, weak_responder response, + rpc::method_id method, rpc::msgobj param, auto_zone z) +{ + // FIXME try & catch + switch(method.get()) { + RPC_DISPATCH(proto_network, HashSpaceRequest_1); + default: + // FIXME exception class + throw std::runtime_error("unknown method"); + } +} + + +void framework::step_timeout() +{ + rpc::cluster::step_timeout(); + scope_proto_replace().delayed_replace_election_step(); +} + + +void framework::new_node(address addr, role_type id, shared_node n) +{ + LOG_WARN("new node ",id," ",addr); + if(id == ROLE_MANAGER) { + if(addr != share->partner()) { + LOGPACK("eP",2, + "addr",addr); + LOG_ERROR("unknown partner node"); + // FIXME + return; + } + LOG_INFO("partner connected ",addr); + { + pthread_scoped_lock hslk(share->hs_mutex()); + scope_proto_network().sync_hash_space_partner(hslk); + } + return; + + } else if(id == ROLE_SERVER) { + // FIXME delayed change + scope_proto_replace().add_server(addr, n); + return; + + } else { + LOG_ERROR("unkown node id ",(uint16_t)id); + } +} + +void framework::lost_node(address addr, role_type id) +{ + LOG_WARN("lost node ",id," ",addr); + if(id == ROLE_MANAGER) { + return; + + } else if(id == ROLE_SERVER) { + // FIXME delayed change + scope_proto_replace().remove_server(addr); + return; + + } +} + + +} // namespace manager +} // namespace kumo + diff --git a/src/logic/manager/framework.h b/src/logic/manager/framework.h new file mode 100644 index 0000000..c9f3e49 --- /dev/null +++ b/src/logic/manager/framework.h @@ -0,0 +1,128 @@ +#ifndef MANAGER_FRAMEWORK_H__ +#define MANAGER_FRAMEWORK_H__ + +#include "logic/cluster_logic.h" +#include "manager/proto_network.h" +#include "manager/proto_replace.h" +#include "manager/control_framework.h" + + +#define EACH_ACTIVE_SERVERS_BEGIN(NODE) \ + for(servers_t::iterator _it_(share->servers().begin()), it_end(share->servers().end()); \ + _it_ != it_end; ++_it_) { \ + shared_node NODE(_it_->second.lock()); \ + if(SESSION_IS_ACTIVE(NODE)) { + // FIXME share->servers().erase(it) ? + +#define EACH_ACTIVE_SERVERS_END \ + } \ + } + + +namespace kumo { +namespace manager { + + +class framework : public cluster_logic { +public: + template + framework(const Config& cfg); + + void cluster_dispatch( + shared_node from, weak_responder response, + rpc::method_id method, rpc::msgobj param, auto_zone z); + + void subsystem_dispatch( + shared_peer from, weak_responder response, + rpc::method_id method, rpc::msgobj param, auto_zone z); + + void new_node(address addr, role_type id, shared_node n); + void lost_node(address addr, role_type id); + + void step_timeout(); + + // cluster_logic + void keep_alive() + { + scope_proto_network().keep_alive(); + } + +private: + proto_network m_proto_network; + proto_replace m_proto_replace; + +public: + proto_network& scope_proto_network() { return m_proto_network; } + proto_replace& scope_proto_replace() { return m_proto_replace; } + +private: + control_framework m_control_framework; + +private: + framework(); + framework(const framework&); +}; + + +typedef std::vector new_servers_t; +typedef std::map servers_t; + + +class resource { +public: + template + resource(const Config& cfg); + +private: + Clock m_clock; + + mp::pthread_mutex m_hs_mutex; + HashSpace m_rhs; + HashSpace m_whs; + + // 'joined servers' including both active and fault servers + mp::pthread_mutex m_servers_mutex; + servers_t m_servers; + + // added but 'not joined servers' + mp::pthread_mutex m_new_servers_mutex; + new_servers_t m_new_servers; + + const address m_partner; + + bool m_cfg_auto_replace; + const short m_cfg_replace_delay_clocks; + +public: + RESOURCE_ACCESSOR(Clock, clock); + + RESOURCE_ACCESSOR(mp::pthread_mutex, hs_mutex); + RESOURCE_ACCESSOR(HashSpace, rhs); + RESOURCE_ACCESSOR(HashSpace, whs); + + RESOURCE_ACCESSOR(mp::pthread_mutex, servers_mutex); + RESOURCE_ACCESSOR(servers_t, servers); + + RESOURCE_ACCESSOR(mp::pthread_mutex, new_servers_mutex); + RESOURCE_ACCESSOR(new_servers_t, new_servers); + + RESOURCE_CONST_ACCESSOR(address, partner); + + RESOURCE_ACCESSOR(bool, cfg_auto_replace); + RESOURCE_CONST_ACCESSOR(short, cfg_replace_delay_clocks); + +private: + resource(); + resource(const resource&); +}; + + +extern std::auto_ptr net; +extern std::auto_ptr share; + + +} // namespace manager +} // namespace kumo + +#endif /* manager/framework.h */ + diff --git a/src/logic/manager/init.h b/src/logic/manager/init.h new file mode 100644 index 0000000..0196c88 --- /dev/null +++ b/src/logic/manager/init.h @@ -0,0 +1,49 @@ +#ifndef MANAGER_INIT_H__ +#define MANAGER_INIT_H__ + +#include "manager/framework.h" + +namespace kumo { +namespace manager { + + +template +framework::framework(const Config& cfg) : + cluster_logic( + cfg.rthreads, cfg.wthreads, + ROLE_SERVER, + cfg.cluster_addr, + cfg.connect_timeout_msec, + cfg.connect_retry_limit) +{ + LOG_INFO("start manager ",addr()); + LOGPACK("SM",2, + "time", time(NULL), + "addr", cfg.cluster_addr, + "Padd", share->partner()); + listen_cluster(cfg.cluster_lsock); // cluster_logic + m_control_framework.listen_control(cfg.ctlsock_lsock); + start_timeout_step(cfg.clock_interval_usec); // rpc_server + start_keepalive(cfg.keepalive_interval_usec); // cluster_logic +} + +template +resource::resource(const Config& cfg) : + m_partner(cfg.partner), + m_cfg_auto_replace(cfg.auto_replace), + m_cfg_replace_delay_clocks(cfg.replace_delay_clocks) +{ } + +template +static void init(const Config& cfg) +{ + share.reset(new resource(cfg)); + net.reset(new framework(cfg)); +} + + +} // namespace manager +} // namespace kumo + +#endif /* manager/init.h */ + diff --git a/src/logic/manager/main.cc b/src/logic/manager/main.cc new file mode 100644 index 0000000..46c68a5 --- /dev/null +++ b/src/logic/manager/main.cc @@ -0,0 +1,85 @@ +#include "logic/boot.h" +#include "manager/framework.h" +#include "manager/init.h" + +using namespace kumo; + +struct arg_t : rpc_cluster_args { + unsigned short replace_delay_clocks; + + bool auto_replace; + + bool partner_set; + struct sockaddr_in partner_in; + rpc::address partner; // convert + + sockaddr_in ctlsock_addr_in; + int ctlsock_lsock; + + virtual void convert() + { + cluster_addr = rpc::address(cluster_addr_in); + cluster_lsock = scoped_listen_tcp::listen(cluster_addr); + partner = rpc::address(partner_in); + ctlsock_lsock = scoped_listen_tcp::listen(ctlsock_addr_in); + rpc_cluster_args::convert(); + } + + arg_t(int argc, char** argv) : + replace_delay_clocks(4) + { + using namespace kazuhiki; + set_basic_args(); + on("-l", "--listen", + type::connectable(&cluster_addr_in, MANAGER_DEFAULT_PORT)); + on("-c", "--control", + type::listenable(&ctlsock_addr_in, CONTROL_DEFAULT_PORT)); + on("-p", "--partner", &partner_set, + type::connectable(&partner_in, MANAGER_DEFAULT_PORT)); + on("-a", "--auto-replace", + type::boolean(&auto_replace)); + on("-Rs", "--replace-delay", + type::numeric(&replace_delay_clocks, replace_delay_clocks)); + parse(argc, argv); + } + + void show_usage() + { +std::cout << +"usage: "< -p [-c port="< " "--partner master-slave replication partner\n" +" -c <[addr:]port="< " "--control dynamic control socket\n" +" -a " "--auto-replace enable auto replacing\n" +" -Rs " "--replace-delay delay steps of auto replacing\n" +; +rpc_cluster_args::show_usage(); + } +}; + +int main(int argc, char* argv[]) +{ + arg_t arg(argc, argv); + + // initialize logger first + mlogger::level loglevel = (arg.verbose ? mlogger::TRACE : mlogger::WARN); + init_mlogger(arg.logfile, arg.pidfile.empty(), loglevel); + + // daemonize + if(!arg.pidfile.empty()) { + do_daemonize(!arg.logfile.empty(), arg.pidfile.c_str()); + } + + // initialize binary logger + if(arg.logpack_path_set) { + logpacker::initialize(arg.logpack_path.c_str()); + } + + // run server + manager::init(arg); + manager::net->run(); + manager::net->join(); + + return 0; +} + diff --git a/src/logic/manager/proto_control.cc b/src/logic/manager/proto_control.cc new file mode 100644 index 0000000..c4a0929 --- /dev/null +++ b/src/logic/manager/proto_control.cc @@ -0,0 +1,116 @@ +#include "manager/framework.h" +#include "manager/control_framework.h" +#include "server/proto_control.h" + +namespace kumo { +namespace manager { + + +proto_control::proto_control() { } +proto_control::~proto_control() { } + + +RPC_IMPL(proto_control, GetStatus_1, req, z, response) +try { + Status res; + + { + pthread_scoped_lock hslk(share->hs_mutex()); + res.hsseed() = HashSpace::Seed(share->whs()); + } + + pthread_scoped_lock nslk(share->new_servers_mutex()); + for(new_servers_t::iterator it(share->new_servers().begin()), it_end(share->new_servers().end()); + it != it_end; ++it) { + shared_node n(it->lock()); + if(n) { + res.newcomers().push_back(n->addr()); + } + } + nslk.unlock(); + + response.result(res); +} +RPC_CATCH(GetStatus_1, response) + +RPC_IMPL(proto_control, AttachNewServers_1, req, z, response) +try { + { + pthread_scoped_lock hslk(share->hs_mutex()); + net->scope_proto_replace().attach_new_servers(hslk); + net->scope_proto_replace().start_replace(hslk); + } + response.null(); +} +RPC_CATCH(AttachNewServers, response); + +RPC_IMPL(proto_control, DetachFaultServers_1, req, z, response) +try { + { + pthread_scoped_lock hslk(share->hs_mutex()); + net->scope_proto_replace().detach_fault_servers(hslk); + net->scope_proto_replace().start_replace(hslk); + } + response.null(); +} +RPC_CATCH(DetachFaultServers, response); + +RPC_IMPL(proto_control, CreateBackup_1, req, z, response) +try { + if(req.param().suffix.empty()) { + std::string msg("empty suffix"); + response.error(msg); + return; + } + server::proto_control::CreateBackup_1 param(req.param().suffix); + rpc::callback_t callback( BIND_RESPONSE(proto_control, CreateBackup_1) ); + shared_zone nullz; + + pthread_scoped_lock sslk(share->servers_mutex()); + EACH_ACTIVE_SERVERS_BEGIN(node) + node->call(param, nullz, callback, 10); + EACH_ACTIVE_SERVERS_END + sslk.unlock(); + + response.null(); +} +RPC_CATCH(CreateBackup, response); + +RPC_REPLY_IMPL(proto_control, CreateBackup_1, from, res, err, life) +{ } + +RPC_IMPL(proto_control, SetAutoReplace_1, req, z, response) +try { + if(share->cfg_auto_replace() && !req.param().enable) { + share->cfg_auto_replace() = false; + response.result(false); + } else if(!share->cfg_auto_replace() && req.param().enable) { + share->cfg_auto_replace() = true; + + { + pthread_scoped_lock hslk(share->hs_mutex()); + net->scope_proto_replace().attach_new_servers(hslk); + net->scope_proto_replace().detach_fault_servers(hslk); + } + + response.result(true); + } + response.null(); +} +RPC_CATCH(SetAutoReplace, response); + +RPC_IMPL(proto_control, StartReplace_1, req, z, response) +try { + { + pthread_scoped_lock hslk(share->hs_mutex()); + net->scope_proto_replace().start_replace(hslk); + } + + response.null(); +} +RPC_CATCH(StartReplace, response); + + +} // namespace manager +} // namespace kumo + diff --git a/src/logic/manager/proto_network.cc b/src/logic/manager/proto_network.cc new file mode 100644 index 0000000..007bccb --- /dev/null +++ b/src/logic/manager/proto_network.cc @@ -0,0 +1,238 @@ +#include "manager/framework.h" +#include "manager/proto_network.h" +#include "server/proto_network.h" +#include "gateway/proto_network.h" + + +#define EACH_ACTIVE_NEW_COMERS_BEGIN(NODE) \ + for(new_servers_t::iterator _it_(share->new_servers().begin()), \ + it_end(share->new_servers().end()); _it_ != it_end; ++_it_) { \ + shared_node NODE(_it_->lock()); \ + if(SESSION_IS_ACTIVE(NODE)) { + // FIXME share->new_servers().erase(it) ? + +#define EACH_ACTIVE_NEW_COMERS_END \ + } \ + } + +namespace kumo { +namespace manager { + + +RPC_IMPL(proto_network, KeepAlive_1, req, z, response) +try { + share->clock().update(req.param().clock); + response.null(); +} +RPC_CATCH(KeepAlive_1, response) + + +RPC_IMPL(proto_network, HashSpaceRequest_1, req, z, response) +try { + HashSpace::Seed* wseed; + HashSpace::Seed* rseed; + { + pthread_scoped_lock hslk(share->hs_mutex()); + wseed = z->allocate(share->whs()); + rseed = z->allocate(share->rhs()); + } + + gateway::proto_network::HashSpacePush_1 arg(*wseed, *rseed); + response.result(arg, z); +} +RPC_CATCH(HashSpaceRequest_1, response) + + +RPC_IMPL(proto_network, WHashSpaceRequest_1, req, z, response) +try { + HashSpace::Seed* seed; + { + pthread_scoped_lock hslk(share->hs_mutex()); + seed = z->allocate(share->whs()); + } + response.result(*seed, z); +} +RPC_CATCH(WHashSpaceRequest_1, response) + + +RPC_IMPL(proto_network, RHashSpaceRequest_1, req, z, response) +try { + HashSpace::Seed* seed; + { + pthread_scoped_lock hslk(share->hs_mutex()); + seed = z->allocate(share->rhs()); + } + response.result(*seed, z); +} +RPC_CATCH(RHashSpaceRequest_1, response) + + + +void proto_network::sync_hash_space_servers(REQUIRE_HSLK, REQUIRE_SSLK) +{ + shared_zone life(new msgpack::zone()); + HashSpace::Seed* wseed = life->allocate(share->whs()); + HashSpace::Seed* rseed = life->allocate(share->rhs()); + + server::proto_network::HashSpaceSync_1 arg(*wseed, *rseed, share->clock().get_incr()); + + rpc::callback_t callback( BIND_RESPONSE(proto_network, HashSpaceSync_1) ); + + EACH_ACTIVE_SERVERS_BEGIN(node) + node->call(arg, life, callback, 10); + EACH_ACTIVE_SERVERS_END +} + + +void proto_network::sync_hash_space_partner(REQUIRE_HSLK) +{ + if(!share->partner().connectable()) { return; } + + shared_zone life(new msgpack::zone()); + HashSpace::Seed* wseed = life->allocate(share->whs()); + HashSpace::Seed* rseed = life->allocate(share->rhs()); + + manager::proto_network::HashSpaceSync_1 arg(*wseed, *rseed, share->clock().get_incr()); + net->get_node(share->partner())->call( + arg, life, + BIND_RESPONSE(proto_network, HashSpaceSync_1), 10); +} + +RPC_REPLY_IMPL(proto_network, HashSpaceSync_1, from, res, err, life) +{ + // FIXME retry +} + + +namespace { + struct each_client_push { + each_client_push(HashSpace::Seed* whs, HashSpace::Seed* rhs, + rpc::callback_t cb, shared_zone& l) : + life(l), + param(*whs, *rhs), + callback(cb) { } + + void operator() (rpc::shared_peer p) + { + LOG_WARN("push hash space to ",(void*)p.get()); + p->call(param, life, callback, 10); + } + + private: + rpc::shared_zone& life; + gateway::proto_network::HashSpacePush_1 param; + rpc::callback_t callback; + }; +} // noname namespace + +void proto_network::push_hash_space_clients(REQUIRE_HSLK) +{ + LOG_WARN("push hash space ..."); + + shared_zone life(new msgpack::zone()); + HashSpace::Seed* wseed = life->allocate(share->whs()); + HashSpace::Seed* rseed = life->allocate(share->rhs()); + + rpc::callback_t callback( BIND_RESPONSE(proto_network, HashSpacePush_1) ); + net->subsystem().for_each_peer( each_client_push(wseed, rseed, callback, life) ); +} + +RPC_REPLY_IMPL(proto_network, HashSpacePush_1, from, res, err, life) +{ } + + + +RPC_IMPL(proto_network, HashSpaceSync_1, req, z, response) +try { + if(req.node()->addr() != share->partner()) { + throw std::runtime_error("unknown partner node"); + } + + share->clock().update(req.param().clock); + + bool ret = false; + + pthread_scoped_lock hslk(share->hs_mutex()); + pthread_scoped_lock nslk(share->new_servers_mutex()); + + if(!req.param().wseed.empty() && (share->whs().empty() || + share->whs().clocktime() <= ClockTime(req.param().wseed.clocktime()))) { + share->whs() = HashSpace(req.param().wseed); + ret = true; + } + + if(!req.param().rseed.empty() && (share->rhs().empty() || + share->rhs().clocktime() <= ClockTime(req.param().rseed.clocktime()))) { + share->rhs() = HashSpace(req.param().rseed); + ret = true; + } + + for(new_servers_t::iterator it(share->new_servers().begin()); + it != share->new_servers().end(); ) { + shared_node srv(it->lock()); + if(!srv || share->whs().server_is_active(srv->addr())) { + it = share->new_servers().erase(it); + } else { + ++it; + } + } + + nslk.unlock(); + hslk.unlock(); + + if(ret) { + response.result(true); + } else { + response.null(); + } +} +RPC_CATCH(HashSpaceSync_1, response) + + +void proto_network::keep_alive() +{ + LOG_TRACE("keep alive ..."); + shared_zone nullz; + server::proto_network::KeepAlive_1 param(share->clock().get_incr()); + + rpc::callback_t callback( BIND_RESPONSE(proto_network, KeepAlive_1) ); + + pthread_scoped_lock sslk(share->servers_mutex()); + EACH_ACTIVE_SERVERS_BEGIN(node) + // FIXME exception + node->call(param, nullz, callback, 10); + EACH_ACTIVE_SERVERS_END + sslk.unlock(); + + pthread_scoped_lock nslk(share->new_servers_mutex()); + EACH_ACTIVE_NEW_COMERS_BEGIN(node) + // FIXME exception + node->call(param, nullz, callback, 10); + EACH_ACTIVE_NEW_COMERS_END + nslk.unlock(); + + if(share->partner().connectable()) { + // FIXME cache result of net->get_node(share->partner())? + net->get_node(share->partner())->call( + param, nullz, callback, 10); + } +} + +RPC_REPLY_IMPL(proto_network, KeepAlive_1, from, res, err, life) +{ + if(err.is_nil()) { + LOG_TRACE("KeepAlive succeeded"); + } else { + LOG_WARN("KeepAlive failed: ",err); + if(from && !from->is_lost()) { + if(from->increment_connect_retried_count() > 5) { // FIXME + from->shutdown(); + } + } + } +} + + +} // namespace manager +} // namespace kumo + diff --git a/src/logic/manager/proto_replace.cc b/src/logic/manager/proto_replace.cc new file mode 100644 index 0000000..d8d0acf --- /dev/null +++ b/src/logic/manager/proto_replace.cc @@ -0,0 +1,385 @@ +#include "manager/framework.h" +#include "manager/proto_replace.h" +#include "server/proto_replace.h" + +namespace kumo { +namespace manager { + + +proto_replace::proto_replace() : + m_delayed_replace_clock(0) +{ } + +proto_replace::~proto_replace() { } + + +void proto_replace::add_server(const address& addr, shared_node& s) +{ + LOG_INFO("server connected ",s->addr()); + LOGPACK("nS",2, + "addr", addr); + + //if(!share->whs().server_is_fault(addr)) { + pthread_scoped_lock nslk(share->new_servers_mutex()); + share->new_servers().push_back( weak_node(s) ); + nslk.unlock(); + + if(share->cfg_auto_replace()) { + // delayed replace + delayed_replace_election(); + } +} + +void proto_replace::remove_server(const address& addr) +{ + LOG_INFO("server lost ",addr); + LOGPACK("lS",2, + "addr", addr); + + ClockTime ct = share->clock().now_incr(); + + pthread_scoped_lock hslk(share->hs_mutex()); + pthread_scoped_lock sslk(share->servers_mutex()); + pthread_scoped_lock nslk(share->new_servers_mutex()); + + bool wfault = share->whs().fault_server(ct, addr); + bool rfault = share->rhs().fault_server(ct, addr); + + if((wfault || rfault) && !share->cfg_auto_replace()) { + net->scope_proto_network().sync_hash_space_partner(hslk); + net->scope_proto_network().sync_hash_space_servers(hslk, sslk); + net->scope_proto_network().push_hash_space_clients(hslk); + } + hslk.unlock(); + + share->servers().erase(addr); + sslk.unlock(); + + for(new_servers_t::iterator it(share->new_servers().begin()); + it != share->new_servers().end(); ) { + shared_node n(it->lock()); + if(!n || n->addr() == addr) { + it = share->new_servers().erase(it); + } else { + ++it; + } + } + nslk.unlock(); + + if(share->cfg_auto_replace()) { + // delayed replace + delayed_replace_election(); + } else { + pthread_scoped_lock relk(m_replace_mutex); + m_copying.invalidate(); // prevent replace delete + } +} + + +void proto_replace::delayed_replace_election() +{ + m_delayed_replace_clock = share->cfg_replace_delay_clocks(); + LOG_INFO("set delayed replace clock(",m_delayed_replace_clock,")"); + if(m_delayed_replace_clock == 0) { + m_delayed_replace_clock = 1; + } +} + +void proto_replace::delayed_replace_election_step() +{ + if(m_delayed_replace_clock > 0) { + --m_delayed_replace_clock; + if(m_delayed_replace_clock == 0) { + replace_election(); + } + } +} + + +void proto_replace::replace_election() +{ + // XXX + // election: smaller address has priority + pthread_scoped_lock hslk(share->hs_mutex()); + attach_new_servers(hslk); + detach_fault_servers(hslk); + + if(share->partner().connectable() && share->partner() < net->addr()) { + LOG_INFO("replace delegate to ",share->partner()); + + // delegate replace + shared_zone life(new msgpack::zone()); + + HashSpace::Seed* seed = life->allocate(share->whs()); + hslk.unlock(); + + manager::proto_replace::ReplaceElection_1 arg(*seed, share->clock().get_incr()); + net->get_node(share->partner())->call( // FIXME exception + arg, life, + BIND_RESPONSE(proto_replace, ReplaceElection_1), 10); + } else { + LOG_INFO("replace self elected"); + start_replace(hslk); + } +} + +RPC_REPLY_IMPL(proto_replace, ReplaceElection_1, from, res, err, life) +{ + if(!err.is_nil() || res.is_nil()) { + LOG_INFO("replace delegate failed, elected"); + pthread_scoped_lock hslk(share->hs_mutex()); + start_replace(hslk); + } else { + // do nothing + } +} + + + +void proto_replace::attach_new_servers(REQUIRE_HSLK) +{ + // update hash space + ClockTime ct = share->clock().now_incr(); + LOG_INFO("update hash space at time(",ct.get(),")"); + + pthread_scoped_lock nslk(share->new_servers_mutex()); + pthread_scoped_lock sslk(share->servers_mutex()); + + for(new_servers_t::iterator it(share->new_servers().begin()), + it_end(share->new_servers().end()); it != it_end; ++it) { + shared_node srv(it->lock()); + if(srv) { + if(share->whs().server_is_include(srv->addr())) { + LOG_INFO("recover server: ",srv->addr()); + share->whs().recover_server(ct, srv->addr()); + } else { + LOG_INFO("new server: ",srv->addr()); + share->whs().add_server(ct, srv->addr()); + } + share->servers()[srv->addr()] = *it; + } + } + share->new_servers().clear(); + + sslk.unlock(); + nslk.unlock(); + + net->scope_proto_network().sync_hash_space_partner(hslk); + //net->scope_proto_network().sync_hash_space_servers(); + //push_hash_space_clients(); +} + +void proto_replace::detach_fault_servers(REQUIRE_HSLK) +{ + ClockTime ct = share->clock().now_incr(); + + share->whs().remove_fault_servers(ct); + + net->scope_proto_network().sync_hash_space_partner(hslk); + //net->scope_proto_network().sync_hash_space_servers(); + //net->scope_proto_network().push_hash_space_clients(); +} + + +proto_replace::ReplaceContext::ReplaceContext() : + m_num(0), m_clocktime(0) {} + +proto_replace::ReplaceContext::~ReplaceContext() {} + +inline ClockTime proto_replace::ReplaceContext::clocktime() const { return m_clocktime; } + +inline void proto_replace::ReplaceContext::reset(ClockTime ct, unsigned int num) +{ + m_num = num; + m_clocktime = ct; +} + +bool proto_replace::ReplaceContext::pop(ClockTime ct) +{ + if(m_clocktime != ct) { return false; } + if(m_num == 1) { + m_num = 0; + return true; + } + --m_num; + return false; +} + +void proto_replace::ReplaceContext::invalidate() +{ + m_clocktime = ClockTime(0); + m_num = 0; +} + + +void proto_replace::start_replace(REQUIRE_HSLK) +{ + LOG_INFO("start replace copy"); + pthread_scoped_lock relk(m_replace_mutex); + + shared_zone life(new msgpack::zone()); + + HashSpace::Seed* seed = life->allocate(share->whs()); + ClockTime ct(share->whs().clocktime()); + + server::proto_replace::ReplaceCopyStart_1 arg(*seed, share->clock().get_incr()); + + using namespace mp::placeholders; + rpc::callback_t callback( BIND_RESPONSE(proto_replace, ReplaceCopyStart_1) ); + + unsigned int num_active = 0; + + pthread_scoped_lock sslk(share->servers_mutex()); + EACH_ACTIVE_SERVERS_BEGIN(n) + n->call(arg, life, callback, 10); + ++num_active; + EACH_ACTIVE_SERVERS_END + sslk.unlock(); + + LOG_INFO("active node: ",num_active); + m_copying.reset(ct, num_active); + m_deleting.reset(0, 0); + relk.unlock(); + + // push hashspace to the clients + try { + net->scope_proto_network().push_hash_space_clients(hslk); + } catch (std::runtime_error& e) { + LOG_ERROR("HashSpacePush failed: ",e.what()); + } catch (...) { + LOG_ERROR("HashSpacePush failed: unknown error"); + } +} + +RPC_REPLY_IMPL(proto_replace, ReplaceCopyStart_1, from, res, err, life) +{ + // FIXME +} + + +RPC_IMPL(proto_replace, ReplaceElection_1, req, z, response) +try { + LOG_DEBUG("ReplaceElection"); + + if(req.node()->addr() != share->partner()) { + throw std::runtime_error("unknown partner node"); + } + + share->clock().update(req.param().clock); + + pthread_scoped_lock hslk(share->hs_mutex()); + ClockTime ct(share->whs().clocktime()); + + if(req.param().hsseed.empty() || + ClockTime(req.param().hsseed.clocktime()) < share->whs().clocktime()) { + LOG_DEBUG("obsolete hashspace"); + response.result(true); + return; + } + + if(share->whs().clocktime() < req.param().hsseed.clocktime()) { + LOG_INFO("double replace guard ",share->partner()); + + } else { + // election: smaller address has priority + if(share->partner() < net->addr()) { + LOG_INFO("replace re-delegate to ",share->partner()); + response.null(); + } else { + LOG_INFO("replace delegated from ",share->partner()); + attach_new_servers(hslk); + detach_fault_servers(hslk); + start_replace(hslk); + hslk.unlock(); + response.result(true); + } + } +} +RPC_CATCH(ReplaceElection_1, response) + + + +RPC_IMPL(proto_replace, ReplaceCopyEnd_1, req, z, response) +try { + pthread_scoped_lock relk(m_replace_mutex); + + share->clock().update(req.param().clock); + + ClockTime ct(req.param().clocktime); + if(m_copying.pop(ct)) { + finish_replace_copy(relk); + } + + relk.unlock(); + response.result(true); +} +RPC_CATCH(ReplaceCopyEnd_1, response) + + +RPC_IMPL(proto_replace, ReplaceDeleteEnd_1, req, z, response) +try { + pthread_scoped_lock relk(m_replace_mutex); + + share->clock().update(req.param().clock); + + ClockTime ct(req.param().clocktime); + if(m_deleting.pop(ct)) { + finish_replace(relk); + } + + relk.unlock(); + response.result(true); +} +RPC_CATCH(ReplaceDeleteEnd_1, response) + + +void proto_replace::finish_replace_copy(REQUIRE_RELK) +{ + // FIXME + ClockTime clocktime = m_copying.clocktime(); + LOG_INFO("start replace delete time(",clocktime.get(),")"); + m_copying.reset(0, 0); + + shared_zone life(new msgpack::zone()); + HashSpace::Seed* seed = life->allocate(share->whs()); + // FIXME server::proto_replace::ReplaceDeleteStart_1 has HashSpace::Seed: + // not so good efficiency + server::proto_replace::ReplaceDeleteStart_1 arg(*seed, share->clock().get_incr()); + + using namespace mp::placeholders; + rpc::callback_t callback( BIND_RESPONSE(proto_replace, ReplaceDeleteStart_1) ); + + unsigned int num_active = 0; + + pthread_scoped_lock sslk(share->servers_mutex()); + EACH_ACTIVE_SERVERS_BEGIN(node) + node->call(arg, life, callback, 10); + ++num_active; + EACH_ACTIVE_SERVERS_END + sslk.unlock(); + + m_deleting.reset(clocktime, num_active); + + pthread_scoped_lock hslk(share->hs_mutex()); + share->rhs() = share->whs(); + net->scope_proto_network().push_hash_space_clients(hslk); + hslk.unlock(); +} + +RPC_REPLY_IMPL(proto_replace, ReplaceDeleteStart_1, from, res, err, life) +{ + // FIXME +} + + +inline void proto_replace::finish_replace(REQUIRE_RELK) +{ + // FIXME + LOG_INFO("replace finished time(",m_deleting.clocktime().get(),")"); + m_deleting.reset(0, 0); +} + + +} // namespace manager +} // namespace kumo + diff --git a/src/logic/msgtype.h b/src/logic/msgtype.h new file mode 100644 index 0000000..54dc992 --- /dev/null +++ b/src/logic/msgtype.h @@ -0,0 +1,146 @@ +#ifndef LOGIC_MSGTYPE_H__ +#define LOGIC_MSGTYPE_H__ + +#include "server/storage.h" +#include "logic/hash.h" +#include + +namespace kumo { +namespace msgtype { + +using namespace msgpack::type; +using msgpack::type_error; +typedef HashSpace::Seed HSSeed; + + +struct DBKey { + DBKey() {} + + DBKey(const char* key, size_t keylen, uint64_t hash) : + m_keylen(keylen), m_key(key), m_hash(hash) {} + + DBKey(const char* raw_key, size_t raw_keylen) + { + msgpack_unpack(raw_ref(raw_key, raw_keylen)); + } + + const char* data() const { return m_key; } + size_t size() const { return m_keylen; } + uint64_t hash() const { return m_hash; } + + // these functions are available only when deserialized + const char* raw_data() const { return m_key - Storage::KEY_META_SIZE; } + size_t raw_size() const { return m_keylen + Storage::KEY_META_SIZE; } + + template + void msgpack_pack(Packer& pk) const + { + char metabuf[Storage::KEY_META_SIZE]; + Storage::hash_to(m_hash, metabuf); + pk.pack_raw(m_keylen + Storage::KEY_META_SIZE); + pk.pack_raw_body(metabuf, Storage::KEY_META_SIZE); + pk.pack_raw_body(m_key, m_keylen); + } + + void msgpack_unpack(raw_ref o) + { + if(o.size < Storage::KEY_META_SIZE) { + throw type_error(); + } + m_keylen = o.size - Storage::KEY_META_SIZE; + m_hash = Storage::hash_of(o.ptr); + m_key = o.ptr + Storage::KEY_META_SIZE; + } + +private: + size_t m_keylen; + const char* m_key; + uint64_t m_hash; + + typedef server::Storage Storage; +}; + + +struct DBValue { + DBValue() : m_clocktime(0) {} + + DBValue(const char* val, size_t vallen, uint64_t meta) : + m_vallen(vallen), m_val(val), m_clocktime(0), m_meta(meta) {} + + DBValue(const char* raw_val, size_t raw_vallen) : + m_clocktime(0) + { + msgpack_unpack(raw_ref(raw_val, raw_vallen)); + } + + const char* data() const { return m_val; } + size_t size() const { return m_vallen; } + ClockTime clocktime() const { return m_clocktime; } + uint64_t meta() const { return m_meta; } + + // these functions are available only when deserialized + const char* raw_data() const { return m_val - Storage::VALUE_META_SIZE; } + size_t raw_size() const { return m_vallen + Storage::VALUE_META_SIZE; } + void raw_set_clocktime(ClockTime clocktime) + { + m_clocktime = clocktime; + Storage::clocktime_to(clocktime, const_cast(raw_data())); + } + + template + void msgpack_pack(Packer& pk) const + { + char metabuf[Storage::VALUE_META_SIZE]; + Storage::clocktime_to(m_clocktime, metabuf); + Storage::meta_to(m_meta, metabuf); + pk.pack_raw(m_vallen + Storage::VALUE_META_SIZE); + pk.pack_raw_body(metabuf, Storage::VALUE_META_SIZE); + pk.pack_raw_body(m_val, m_vallen); + } + + void msgpack_unpack(raw_ref o) + { + if(o.size < Storage::VALUE_META_SIZE) { + throw type_error(); + } + m_clocktime = Storage::clocktime_of(o.ptr); + m_meta = Storage::meta_of(o.ptr); + m_vallen = o.size - Storage::VALUE_META_SIZE; + m_val = o.ptr + Storage::VALUE_META_SIZE; + } + +private: + size_t m_vallen; + const char* m_val; + ClockTime m_clocktime; + uint64_t m_meta; + + typedef server::Storage Storage; +}; + + +struct flags_base { + template + bool is_set() const { return m & T::flag; } + +protected: + uint32_t m; +public: + flags_base() : m(0) { } + template + void msgpack_pack(Packer& pk) const { pk.pack(m); } + void msgpack_unpack(uint32_t f) { m = f; } +}; + +template +struct flags : Base { + static const uint32_t flag = Flag; + flags() { this->flags_base::m = flag; } +}; + + +} // namespace msgtype +} // namespace kumo + +#endif /* logic/msgtype.h */ + diff --git a/src/logic/protogen/Makefile.am b/src/logic/protogen/Makefile.am new file mode 100644 index 0000000..28de350 --- /dev/null +++ b/src/logic/protogen/Makefile.am @@ -0,0 +1,9 @@ + +EXTRA_DIST = protogen + +.PHONY: all +all: + $(RUBY) protogen ../manager.proto.h + $(RUBY) protogen ../server.proto.h + $(RUBY) protogen ../gateway.proto.h + diff --git a/src/logic/protogen/protogen b/src/logic/protogen/protogen new file mode 100755 index 0000000..4ab976e --- /dev/null +++ b/src/logic/protogen/protogen @@ -0,0 +1,408 @@ +#!/usr/bin/env ruby + +if ARGV.length != 1 + puts "usage: #{$0} .proto.h" + exit 1 +end + +srcpath = ARGV[0] +$dirname = File.dirname(srcpath) +$genname = File.basename(srcpath, ".proto.h") + +src = File.read(srcpath) + + + +AUTOGEN_WARN = < {] + f.puts %[ #{members.xjoin(" ",";","\n\t\t")}] + f.puts %[ + void msgpack_unpack(const msgpack::type::tuple< + #{members.map {|type, var|type}.join(", ")} >& args) + { + #{i=-1; members.map {|type, var| "this->#{var} = args.get<#{i+=1}>();" }.join("\n\t\t\t")} + } + template + void msgpack_pack(Packer& pk) const + { + pk.pack_array(#{members.size}); + #{members.map {|type, var| "pk.pack(#{var});" }.join("\n\t\t\t")} + }] + f.puts %[ typedef rpc::retry<#{struct}> retry;] + unless members.empty? + f.puts %[ #{struct}(#{members.map {|type, var| "\n\t\t\t\tconst #{type}& #{var}_" }.join(",")}) :] + f.puts %[ #{members.map {|type, var| "#{var}(#{var}_)" }.join(", ")} { }] + end + f.puts %[ #{struct}() { }] + f.puts %[ + #{@code} + };] + f.puts %[ void rpc_#{struct}(rpc::request<#{struct}>&, rpc::auto_zone z, + rpc::weak_responder);] + f.puts %[] + end + end +end + + +class RPCHeader + def initialize(name, pre_body) + + @name = name + @trails = [] + + @messages = {} + + @pre_body = pre_body + @post_body = "" + + @code = "" + end + + def add_trails(trails) + @trails.concat(trails) + end + + def message(name, id = nil) + msg = @messages[name] + unless msg + return @messages[name] = RPCMessage.new(name, id ? id.to_i : nil) + end + if id + if msg.id + raise "duplicated declaration of message '#{@name}::#{name}'" + end + msg.id = id.to_i + end + msg + end + + def <<(body) + if @messages.find {|name, msg| !msg.versions.empty? } + @post_body << body + else + @pre_body << body + end + end + + def append_code(code) + @code << code + end + + def check + @messages.each_pair {|name, msg| + msg.check + } + end + + def generate(file) + File.open(file, "w") {|f| + include_guard(f, file) { + f.puts AUTOGEN_WARN + f.puts @pre_body + f.puts %[class #{@name} #{@trails.join(' ').strip} {] + f.puts %[public:] + @messages.each_pair {|name, msg| + msg.generate(f) + } + f.puts @code.strip + f.puts %[};] + f.puts @post_body + } + } + end + + def each_message(&block) + @messages.each_pair {|name, message| + block.call(message) + } + end +end + + +class ProtocolNumber + def initialize(pre_body) + @pre_body = pre_body + @post_body = "" + end + + def <<(body) + @post_body << body + end + + def check + end + + def generate(file, headers) + File.open(file, "w") {|f| + include_guard(f, file) { + f.puts @pre_body + f.puts %[namespace proto {] + headers.each_pair {|name, header| + f.puts %[ enum #{name} {] + header.each_message {|msg| + f.puts %[ #{msg.name} = #{msg.id},] + } + f.puts %[ };] + } + f.puts %[}] + f.puts @post_body + } + } + end +end + + +# FIXME +class Dispatch + def initialize(name, scopes) + @name = name + @scopes = scopes + end + + def check(headers) + @headers = @scopes.map {|scope| + unless header = headers[scope] + raise "dispatch scope '#{scope}' is not declared" + end + header + } + end + + def generate(file) + File.open(file, "w") {|f| + f.puts(AUTOGEN_WARN) + @headers.each {|header| + header.each_message {|msg| + msg.versions.each {|ver| + f.puts %[case #{msg.id << 16 | ver}:] + f.puts %[ break;] + } + } + } + } + end +end + +class OutputFiles + def initialize + @pre_body = "" + @headers = {} + @dispatches = {} + @protonum = nil + end + + def <<(body) + @pre_body << body + @headers.each_pair {|name, header| + header << body + } + if @protonum + @protonum << body + end + end + + def rpc_header(name, trails = []) + @protonum ||= ProtocolNumber.new(@pre_body.dup) + header = @headers[name] ||= RPCHeader.new(name, @pre_body.dup) + header.add_trails(trails) + header + end + + def rpc_dispatch(name, headers) + if @dispatches.include?(name) + raise "duplicated declaration of #{name} dispatch" + end + @dispatches[name] = Dispatch.new(name, headers) + end + + def check + @headers.each_pair {|name, header| + header.check + } + @dispatches.each_pair {|name, dispatch| + dispatch.check(@headers) + } + end + + def generate(dir) + @headers.each_pair {|name, header| + header.generate(File.join(dir, "#{name}.h")) + } + @dispatches.each_pair {|name, dispatch| + dispatch.generate(File.join(dir, "#{name}_dispatch.h")) + } + if @protonum + @protonum.generate(File.join(dir, "proto.h"), @headers) + end + end +end + + +$body = OutputFiles.new + + +s = StringScanner.new(src) + +while body_len = s.exist?(/@(rpc|c?message|dispatch|code)\b/) + $body << s.peek(body_len - s.matched_size) + s.pos += body_len + + case s[1] + when "rpc" + line = s.scan_until(/\n/) + scope, *trails = line.strip.split(/\s/) + + header = $body.rpc_header(scope, trails) + + code = s.scan_until(/@end\s\n?/) + raise "'@end' mismatch" unless code + code.slice!(-s.matched_size, s.matched_size) + + code.gsub!(/\bmessage\b\s+(\w+)\.(\d+)(\s*\+cluster\b)?(?:\s*\=\s*(\d+)\s*)?\s*\{(.*?)\}\;/m) do |match| + msgname = $~[1] + version = $~[2] + + mode_cluster = $~[3] + + msgid = $~[4] + message = header.message(msgname, msgid) + + message.mode_cluster = true if mode_cluster + + decl = $~[5] + members = [] + decl.gsub!(/\b(\S+)\s+(\w+)\s*\;/) {|match| + type = $~[1] + var = $~[2] + members.push [type, var] + "" + } + decl.strip! + + message.add_version(version, members) + message.append_code(decl) + + "" + end + + code.gsub!(/\bmessage\b\s+(\w+)\s*\=\s*(\d+)\s*\;\s*/m) do |match| + msgname = $~[1] + msgid = $~[2] + + message = header.message(msgname, msgid) + + "" + end + + header.append_code(code) + + + when "message", "cmessage" + line = s.scan_until(/(:?\;|\n)\s*/) + + unless match = /\A\s*(\w+)\:\:(\w+)\s*\=\s*(\d+)\s*?(:?\;|\n)\s*\z/.match(line) + raise "invalid @message line '#{line}'" + end + + scope = match[1] + msgname = match[2] + msgid = match[3] + + header = $body.rpc_header(scope) + header.message(msgname, msgid) + + + when "dispatch" + line = s.scan_until(/;\s*/) + + unless match = /\A\s*\(\s*(\w+)\s*\)\s+(\w+)\s*(?:,\s*(\w+))*\s*\;\s*\z/.match(line) + raise "invalid @dispatch line '#{line}'" + end + captures = match.captures.compact + + name = captures.shift + scopes = captures + + $body.rpc_dispatch(name, scopes) + + + when "code" + line = s.scan_until(/\n/) + scopes = line.strip.split(/\s/) + + code = s.scan_until(/@end\s\n?/) + raise "'@end' mismatch" unless code + code.slice!(-s.matched_size, s.matched_size) + + scopes.each {|scope| + header = $body.rpc_header(scope) + header << code + } + + end +end + +$body << s.rest + +$body.check +$body.generate(File.join($dirname,$genname)) + diff --git a/src/logic/role.h b/src/logic/role.h new file mode 100644 index 0000000..42ea243 --- /dev/null +++ b/src/logic/role.h @@ -0,0 +1,14 @@ +#ifndef LOGIC_ROLE_H__ +#define LOGIC_ROLE_H__ + +#include "rpc/types.h" + +namespace kumo { + +static const rpc::role_type ROLE_MANAGER = 0; +static const rpc::role_type ROLE_SERVER = 1; + +} // namespace kumo + +#endif /* logic/role.h */ + diff --git a/src/logic/rpc_server.h b/src/logic/rpc_server.h new file mode 100644 index 0000000..4d35d7e --- /dev/null +++ b/src/logic/rpc_server.h @@ -0,0 +1,120 @@ +#ifndef LOGIC_RPC_SERVER_H__ +#define LOGIC_RPC_SERVER_H__ + +#include "logic/wavy_server.h" +#include "rpc/rpc.h" +#include +#include + +namespace kumo { + + +using namespace mp::placeholders; + +//using rpc::msgobj; +//using rpc::msgid_t; +//using rpc::method_id; + +using rpc::address; +using rpc::auto_zone; +using rpc::shared_zone; + +using rpc::weak_responder; +using rpc::basic_shared_session; +//using rpc::shared_peer; + +using mp::pthread_scoped_lock; +using mp::pthread_scoped_rdlock; +using mp::pthread_scoped_wrlock; + + +template +class rpc_server : public wavy_server { +public: + rpc_server(unsigned short rthreads, unsigned short wthreads) + { + init_wavy(rthreads, wthreads); + } + + ~rpc_server() { } + +protected: + void start_timeout_step(unsigned long interval) + { + struct timespec ts = {interval / 1000000, interval % 1000000 * 1000}; + wavy::timer(&ts, mp::bind(&Framework::step_timeout, + static_cast(this))); + LOG_TRACE("start timeout stepping interval = ",interval," usec"); + } +}; + + +#define RESOURCE_CONST_ACCESSOR(TYPE, NAME) \ + inline const TYPE& NAME() const { return m_##NAME; } + +#define RESOURCE_ACCESSOR(TYPE, NAME) \ + inline TYPE& NAME() { return m_##NAME; } \ + RESOURCE_CONST_ACCESSOR(TYPE, NAME) + +#define SESSION_IS_ACTIVE(SESSION) \ + (SESSION && !SESSION->is_lost()) + + +#define SHARED_ZONE(life, z) shared_zone life(z.release()) + + +#define RPC_DISPATCH(SCOPE, NAME) \ + case SCOPE::NAME::method::id: \ + { \ + rpc::request req(from, param); \ + m_##SCOPE.rpc_##NAME(req, z, response); \ + break; \ + } \ + + +#define RPC_IMPL(SCOPE, NAME, req, z, response) \ + void SCOPE::rpc_##NAME(rpc::request& req, rpc::auto_zone z, \ + rpc::weak_responder response) + + +#define RPC_REPLY_DECL(NAME, from, res, err, life, ...) \ + void res_##NAME(basic_shared_session from, rpc::msgobj res, rpc::msgobj err, \ + shared_zone life, ##__VA_ARGS__); + +#define RPC_REPLY_IMPL(SCOPE, NAME, from, res, err, life, ...) \ + void SCOPE::res_##NAME(basic_shared_session from, rpc::msgobj res, rpc::msgobj err, \ + shared_zone life, ##__VA_ARGS__) + + +#define BIND_RESPONSE(SCOPE, NAME, ...) \ + mp::bind(&SCOPE::res_##NAME, this, _1, _2, _3, _4, ##__VA_ARGS__) + + + +#define RPC_CATCH(NAME, response) \ +catch (msgpack::type_error& e) { \ + LOG_ERROR(#NAME " FAILED: type error"); \ + try { \ + response.error((uint8_t)rpc::protocol::SERVER_ERROR); \ + } catch (...) { } \ + throw; \ +} catch (std::exception& e) { \ + LOG_WARN(#NAME " FAILED: ",e.what()); \ + try { \ + response.error((uint8_t)rpc::protocol::SERVER_ERROR); \ + } catch (...) { } \ + throw; \ +} catch (...) { \ + LOG_ERROR(#NAME " FAILED: unknown error"); \ + try { \ + response.error((uint8_t)rpc::protocol::UNKNOWN_ERROR); \ + } catch (...) { } \ + throw; \ +} +// FIXME more specific error + + +} // namespace kumo + +#endif /* logic/rpc_server.h */ + diff --git a/src/logic/server.proto.h b/src/logic/server.proto.h new file mode 100644 index 0000000..ffda90c --- /dev/null +++ b/src/logic/server.proto.h @@ -0,0 +1,294 @@ +#include "server/proto.h" +#include "logic/msgtype.h" +#include "logic/cluster_logic.h" +#include +#include +#include + +namespace kumo { +namespace server { + + +@message proto_network::KeepAlive = 0 +@message proto_network::HashSpaceSync = 81 +@message proto_store::Get = 96 +@message proto_store::Set = 97 +@message proto_store::Delete = 98 +@message proto_store::ReplicateSet = 64 +@message proto_store::ReplicateDelete = 65 +@message proto_replace::ReplaceCopyStart = 16 +@message proto_replace::ReplaceDeleteStart = 17 +@message proto_replace_stream::ReplaceOffer = 66 +@message proto_control::CreateBackup = 18 +@message proto_control::GetStatus = 112 +@message proto_control::SetConfig = 113 + + +@rpc proto_network + message KeepAlive.1 +cluster { + uint32_t clock; + // ok: UNDEFINED + }; + + message HashSpaceSync.1 { + msgtype::HSSeed wseed; + msgtype::HSSeed rseed; + uint32_t clock; + // success: true + // obsolete: nil + }; + +public: + void keep_alive(); + void renew_w_hash_space(); + void renew_r_hash_space(); + +private: + RPC_REPLY_DECL(KeepAlive_1, from, res, err, life); + RPC_REPLY_DECL(WHashSpaceRequest_1, from, res, err, life); + RPC_REPLY_DECL(RHashSpaceRequest_1, from, res, err, life); +@end + + +@code proto_store +struct store_flags; +typedef msgtype::flags store_flags_none; +typedef msgtype::flags store_flags_async; +struct store_flags : public msgtype::flags_base { + bool is_async() { return is_set(); } +}; + +struct replicate_flags; +typedef msgtype::flags replicate_flags_none; +typedef msgtype::flags replicate_flags_by_rhs; +struct replicate_flags : msgtype::flags_base { + bool is_rhs() const { return is_set(); } +}; +@end + + +@rpc proto_store + message Get.1 { + msgtype::DBKey dbkey; + // success: value:DBValue + // not found: nil + }; + + message Set.1 { + store_flags flags; + msgtype::DBKey dbkey; + msgtype::DBValue dbval; + // success: tuple< clocktime:uint64 > + // failed: nil + }; + + message Delete.1 { + store_flags flags; + msgtype::DBKey dbkey; + // success: true + // not foud: false + // failed: nil + }; + + message ReplicateSet.1 { + uint32_t clock; + replicate_flags flags; + msgtype::DBKey dbkey; + msgtype::DBValue dbval; + // success: true + // ignored: false + }; + + message ReplicateDelete.1 { + uint64_t clocktime; + uint32_t clock; + replicate_flags flags; + msgtype::DBKey dbkey; + // success: true + // ignored: false + }; + +private: + void check_replicator_assign(HashSpace& hs, uint64_t h); + void check_coordinator_assign(HashSpace& hs, uint64_t h); + + bool SetByRhsWhs(weak_responder response, auto_zone& z, + msgtype::DBKey& key, msgtype::DBValue& val, + bool is_async); + void SetByWhs(weak_responder response, auto_zone& z, + msgtype::DBKey& key, msgtype::DBValue& val, + bool is_async); + + RPC_REPLY_DECL(ReplicateSet_1, from, res, err, life, + rpc::retry* retry, + volatile unsigned int* copy_required, + rpc::weak_responder response, uint64_t clocktime); + + bool DeleteByRhsWhs(weak_responder response, auto_zone& z, + msgtype::DBKey& key, + bool is_async); + void DeleteByWhs(weak_responder response, auto_zone& z, + msgtype::DBKey& key, + bool is_async); + + RPC_REPLY_DECL(ReplicateDelete_1, from, res, err, life, + rpc::retry* retry, + volatile unsigned int* copy_required, + rpc::weak_responder response, bool deleted); +@end + + + +@rpc proto_replace + message ReplaceCopyStart.1 +cluster { + msgtype::HSSeed hsseed; + uint32_t clock; + // accepted: true + }; + + message ReplaceDeleteStart.1 +cluster { + msgtype::HSSeed hsseed; + uint32_t clock; + // accepted: true + }; + +private: + static bool test_replicator_assign(const HashSpace& hs, uint64_t h, const address& target); + + typedef std::vector
addrvec_t; + typedef addrvec_t::iterator addrvec_iterator; + + void replace_copy(const address& manager_addr, HashSpace& hs); + struct for_each_replace_copy; + void finish_replace_copy(ClockTime clocktime, REQUIRE_STLK); + RPC_REPLY_DECL(ReplaceCopyEnd_1, from, res, err, life); + + void replace_delete(shared_node& manager, HashSpace& hs); + struct for_each_replace_delete; + RPC_REPLY_DECL(ReplaceDeleteEnd_1, from, res, err, life); + +private: + class replace_state { + public: + replace_state(); + ~replace_state(); + public: + void reset(const address& mgr, ClockTime ct); + void pushed(ClockTime ct); + void push_returned(ClockTime ct); + const address& mgr_addr() const; + bool is_finished(ClockTime ct) const; + void invalidate(); + private: + int m_push_waiting; + ClockTime m_clocktime; + address m_mgr; + }; + + mp::pthread_mutex m_state_mutex; + replace_state m_state; + +public: + void replace_offer_push(ClockTime replace_time, REQUIRE_STLK); + void replace_offer_pop(ClockTime replace_time, REQUIRE_STLK); + mp::pthread_mutex& state_mutex() { return m_state_mutex; } +@end + + +@rpc proto_replace_stream + message ReplaceOffer.1 +cluster { + uint16_t port; + // no response + }; + +public: + proto_replace_stream(address stream_addr); + ~proto_replace_stream(); + +private: + int m_stream_lsock; + address m_stream_addr; + +public: + const address& stream_addr() const + { + return m_stream_addr; + } + + void init_stream(int lsock); + void run_stream(); + void stop_stream(); + +private: + class OfferStorage; + struct SharedOfferMapComp; + typedef mp::shared_ptr SharedOfferStorage; + typedef std::vector SharedOfferMap; + +public: + class OfferStorageMap { + public: + OfferStorageMap(const std::string& basename, ClockTime replace_time); + ~OfferStorageMap(); + public: + void add(const address& addr, + const char* key, size_t keylen, + const char* val, size_t vallen); + void commit(SharedOfferMap* dst); + private: + SharedOfferMap m_map; + const std::string& m_basename; + ClockTime m_replace_time; + private: + OfferStorageMap(); + OfferStorageMap(const OfferStorageMap&); + }; + + void send_offer(OfferStorageMap& offer, ClockTime replace_time); + +private: + mp::pthread_mutex m_offer_map_mutex; + SharedOfferMap m_offer_map; + static SharedOfferMap::iterator find_offer_map( + SharedOfferMap& map, const address& addr); + + RPC_REPLY_DECL(ReplaceOffer_1, from, res, err, life, + ClockTime replace_time, address addr); + + void stream_accepted(int fd, int err); + void stream_connected(int fd, int err); + + std::auto_ptr m_stream_core; + class OfferStreamHandler; + friend class OfferStreamHandler; +@end + + +@code proto_control +enum status_type { + STAT_PID = 0, + STAT_UPTIME = 1, + STAT_TIME = 2, + STAT_VERSION = 3, + STAT_CMD_GET = 4, + STAT_CMD_SET = 5, + STAT_CMD_DELETE = 6, + STAT_DB_ITEMS = 7, +}; +@end + +@rpc proto_control + message CreateBackup.1 { + std::string suffix; + // success: true + }; + + message GetStatus.1 { + uint32_t command; + }; +@end + + +} // namespace server +} // namespace kumo + diff --git a/src/logic/server/buffer_queue.h b/src/logic/server/buffer_queue.h new file mode 100644 index 0000000..6a1fe72 --- /dev/null +++ b/src/logic/server/buffer_queue.h @@ -0,0 +1,146 @@ +#ifndef BUFFER_QUEUE_H__ +#define BUFFER_QUEUE_H__ + +#include +#include +#include +#include + +namespace kumo { + + +class buffer_queue { +public: + buffer_queue(); + ~buffer_queue(); + +public: + void push(const void* buf, size_t buflen); + const void* front(size_t* result_buflen) const; + void pop(); + + size_t total_size() const; + +private: + size_t m_total_size; + + struct entry { + void* data; + size_t size; + }; + + typedef std::queue queue_type; + queue_type m_queue; + + mp::source<128, 2048> m_source; +}; + +inline buffer_queue::buffer_queue() : + m_total_size(0) { } + +inline buffer_queue::~buffer_queue() +{ + // source::~source frees all memory + //for(queue_type::iterator it(m_queue.begin()), + // it_end(m_queue.end()); it != it_end; ++it) { + // m_source.free(*it); + //} +} + +inline void buffer_queue::push(const void* buf, size_t buflen) +{ + void* data = m_source.malloc(buflen); + ::memcpy(data, buf, buflen); + + entry e = {data, buflen}; + try { + m_queue.push(e); + } catch (...) { + m_source.free(data); + throw; + } + + m_total_size += buflen; +} + +inline const void* buffer_queue::front(size_t* result_buflen) const +{ + if(m_queue.empty()) { + return NULL; + } + + const entry& e = m_queue.front(); + + *result_buflen = e.size; + return e.data; +} + +inline void buffer_queue::pop() +{ + entry& e = m_queue.front(); + + m_total_size -= e.size; + m_source.free( e.data ); + + m_queue.pop(); +} + +inline size_t buffer_queue::total_size() const +{ + return m_total_size; +} + + +} // namespace kumo + + +#if 0 +using kumo::buffer_queue; + +struct kumo_buffer_queue; + +kumo_buffer_queue* kumo_buffer_queue_new(void) +try { + buffer_queue* impl = new buffer_queue(); + return reinterpret_cast(impl); +} catch (...) { + return NULL; +} + +void kumo_buffer_queue_free(kumo_buffer_queue* bq) +try { + buffer_queue* impl = reinterpret_cast(bq); + delete impl; +} catch (...) { } + +bool kumo_buffer_queue_push(kumo_buffer_queue* bq, const void* buf, size_t buflen) +try { + buffer_queue* impl = reinterpret_cast(bq); + impl->push(buf, buflen); + return true; +} catch (...) { + return false; +} + +const void* kumo_buffer_queue_front(kumo_buffer_queue* bq, size_t* result_buflen) +{ + buffer_queue* impl = reinterpret_cast(bq); + return impl->front(result_buflen); +} + +void kumo_buffer_queue_pop(kumo_buffer_queue* bq) +try { + buffer_queue* impl = reinterpret_cast(bq); + impl->pop(); +} catch (...) { } + +size_t kumo_buffer_queue_total_size(kumo_buffer_queue* bq) +{ + buffer_queue* impl = reinterpret_cast(bq); + return impl->total_size(); +} +#endif + + +#endif /* storage/buffer_queue.h */ + diff --git a/src/logic/server/framework.cc b/src/logic/server/framework.cc new file mode 100644 index 0000000..f5ded11 --- /dev/null +++ b/src/logic/server/framework.cc @@ -0,0 +1,89 @@ +#include "server/framework.h" + +namespace kumo { +namespace server { + + +std::auto_ptr net; +std::auto_ptr share; + + +void framework::cluster_dispatch( + shared_node from, weak_responder response, + rpc::method_id method, rpc::msgobj param, auto_zone z) +{ + // FIXME try & catch + switch(method.get()) { + RPC_DISPATCH(proto_network, KeepAlive_1); + RPC_DISPATCH(proto_network, HashSpaceSync_1); + RPC_DISPATCH(proto_store, ReplicateSet_1); + RPC_DISPATCH(proto_store, ReplicateDelete_1); + RPC_DISPATCH(proto_replace, ReplaceCopyStart_1); + RPC_DISPATCH(proto_replace, ReplaceDeleteStart_1); + RPC_DISPATCH(proto_replace_stream, ReplaceOffer_1); + RPC_DISPATCH(proto_control, CreateBackup_1); + default: + // FIXME exception class + throw std::runtime_error("unknown method"); + } +} + +void framework::subsystem_dispatch( + shared_peer from, weak_responder response, + rpc::method_id method, rpc::msgobj param, auto_zone z) +{ + // FIXME try & catch + switch(method.get()) { + RPC_DISPATCH(proto_store, Get_1); + RPC_DISPATCH(proto_store, Set_1); + RPC_DISPATCH(proto_store, Delete_1); + RPC_DISPATCH(proto_control, GetStatus_1); + default: + // FIXME exception class + throw std::runtime_error("unknown method"); + } +} + + +void framework::run() +{ + wavy_server::run(); + scope_proto_replace_stream().run_stream(); + // FIXME end +} + +void framework::end_preprocess() +{ + scope_proto_replace_stream().stop_stream(); +} + + +void framework::step_timeout() +{ + rpc::cluster::step_timeout(); +} + + +void framework::new_node(address addr, role_type id, shared_node n) +{ + // XXX + LOG_WARN("new node ",(uint16_t)id," ",addr); + if(addr == share->manager1()) { + scope_proto_network().renew_r_hash_space(); + scope_proto_network().renew_w_hash_space(); + } else if(share->manager2().connectable() && addr == share->manager2()) { + scope_proto_network().renew_r_hash_space(); + scope_proto_network().renew_w_hash_space(); + } +} + +void framework::lost_node(address addr, role_type id) +{ + // XXX + LOG_WARN("lost node ",(uint16_t)id," ",addr); +} + + +} // namespace server +} // namespace kumo + diff --git a/src/logic/server/framework.h b/src/logic/server/framework.h new file mode 100644 index 0000000..288464b --- /dev/null +++ b/src/logic/server/framework.h @@ -0,0 +1,162 @@ +#ifndef SERVER_FRAMEWORK_H__ +#define SERVER_FRAMEWORK_H__ + +#include "logic/cluster_logic.h" +#include "server/proto_control.h" +#include "server/proto_network.h" +#include "server/proto_replace.h" +#include "server/proto_replace_stream.h" +#include "server/proto_store.h" + +#define EACH_ASSIGN(HS, HASH, REAL, CODE) \ +{ \ + HashSpace::iterator _it_(HS.find(HASH)); \ + HashSpace::iterator _origin_(_it_); \ + HashSpace::node REAL; \ + REAL = *_it_; \ + CODE; \ + ++_it_; \ + for(; _it_ != _origin_; ++_it_) { \ + if(*_it_ == *_origin_) { continue; } \ + HashSpace::node _rep1_ = *_it_; \ + REAL = _rep1_; \ + CODE; \ + ++_it_; \ + for(; _it_ != _origin_; ++_it_) { \ + if(*_it_ == *_origin_ || *_it_ == _rep1_) { continue; } \ + HashSpace::node _rep2_ = *_it_; \ + REAL = _rep2_; \ + CODE; \ + break; \ + } \ + break; \ + } \ +} + +namespace kumo { +namespace server { + + + +class framework : public cluster_logic { +public: + template + framework(const Config& cfg); + + void cluster_dispatch( + shared_node from, weak_responder response, + rpc::method_id method, rpc::msgobj param, auto_zone z); + + void subsystem_dispatch( + shared_peer from, weak_responder response, + rpc::method_id method, rpc::msgobj param, auto_zone z); + + void new_node(address addr, role_type id, shared_node n); + void lost_node(address addr, role_type id); + + void step_timeout(); + + // override wavy_server::run + virtual void run(); + virtual void end_preprocess(); + + // cluster_logic + void keep_alive() + { + scope_proto_network().keep_alive(); + } + +private: + proto_network m_proto_network; + proto_control m_proto_control; + proto_replace m_proto_replace; + proto_store m_proto_store; + proto_replace_stream m_proto_replace_stream; + +public: + proto_network& scope_proto_network() { return m_proto_network; } + proto_control& scope_proto_control() { return m_proto_control; } + proto_replace& scope_proto_replace() { return m_proto_replace; } + proto_store& scope_proto_store() { return m_proto_store; } + proto_replace_stream& scope_proto_replace_stream() { return m_proto_replace_stream; } + + // FIXME proto_replace_stream::rpc_ReplaceOffer_1 + unsigned int connect_timeout_msec() const { + return m_connect_timeout_msec; // rpc::client<> + } + +private: + framework(); + framework(const framework&); +}; + + +class resource { +public: + template + resource(const Config& cfg); + +private: + Clock m_clock; + + mp::pthread_rwlock m_rhs_mutex; + HashSpace m_rhs; + + mp::pthread_rwlock m_whs_mutex; + HashSpace m_whs; + + Storage& m_db; + + const address m_manager1; + const address m_manager2; + +private: + std::string m_cfg_offer_tmpdir; + std::string m_cfg_db_backup_basename; + + const unsigned short m_cfg_replicate_set_retry_num; + const unsigned short m_cfg_replicate_delete_retry_num; + + const time_t m_stat_start_time; // FIXME m_start_time -> m_stat_start_time + volatile uint64_t m_stat_num_get; + volatile uint64_t m_stat_num_set; + volatile uint64_t m_stat_num_delete; + +public: + RESOURCE_ACCESSOR(Clock, clock); + + RESOURCE_ACCESSOR(mp::pthread_rwlock, rhs_mutex); + RESOURCE_ACCESSOR(mp::pthread_rwlock, whs_mutex); + RESOURCE_ACCESSOR(HashSpace, rhs); + RESOURCE_ACCESSOR(HashSpace, whs); + + RESOURCE_ACCESSOR(Storage, db); + + RESOURCE_CONST_ACCESSOR(address, manager1); + RESOURCE_CONST_ACCESSOR(address, manager2); + + RESOURCE_CONST_ACCESSOR(std::string, cfg_offer_tmpdir); + RESOURCE_CONST_ACCESSOR(std::string, cfg_db_backup_basename); + RESOURCE_CONST_ACCESSOR(unsigned short, cfg_replicate_set_retry_num); + RESOURCE_CONST_ACCESSOR(unsigned short, cfg_replicate_delete_retry_num); + + RESOURCE_CONST_ACCESSOR(time_t, stat_start_time); + RESOURCE_ACCESSOR(volatile uint64_t, stat_num_get); + RESOURCE_ACCESSOR(volatile uint64_t, stat_num_set); + RESOURCE_ACCESSOR(volatile uint64_t, stat_num_delete); + +private: + resource(); + resource(const resource&); +}; + + +extern std::auto_ptr net; +extern std::auto_ptr share; + + +} // namespace server +} // namespace kumo + +#endif /* server/framework.h */ + diff --git a/src/logic/server/init.h b/src/logic/server/init.h new file mode 100644 index 0000000..97d3dfd --- /dev/null +++ b/src/logic/server/init.h @@ -0,0 +1,66 @@ +#ifndef SERVER_INIT_H__ +#define SERVER_INIT_H__ + +#include "server/framework.h" + +namespace kumo { +namespace server { + + +template +framework::framework(const Config& cfg) : + cluster_logic( + cfg.rthreads, cfg.wthreads, + ROLE_SERVER, + cfg.cluster_addr, + cfg.connect_timeout_msec, + cfg.connect_retry_limit), + m_proto_replace_stream(cfg.stream_addr) +{ + LOG_INFO("start server ",addr()); + LOGPACK("SS",2, + "time", time(NULL), + "addr", cfg.cluster_addr, + "db", cfg.dbpath, + "mgr1", share->manager1(), + "mgr2", share->manager2(), + "sadd", cfg.stream_addr, + "tmpd", share->cfg_offer_tmpdir(), + "bkup", share->cfg_db_backup_basename()); + listen_cluster(cfg.cluster_lsock); // cluster_logic + scope_proto_replace_stream().init_stream(cfg.stream_lsock); + start_timeout_step(cfg.clock_interval_usec); // rpc_server + start_keepalive(cfg.keepalive_interval_usec); // cluster_logic +} + +template +resource::resource(const Config& cfg) : + m_db(*cfg.db), + m_manager1(cfg.manager1), + m_manager2(cfg.manager2), + + m_cfg_offer_tmpdir(cfg.offer_tmpdir), + m_cfg_db_backup_basename(cfg.db_backup_basename), + m_cfg_replicate_set_retry_num(cfg.replicate_set_retry_num), + m_cfg_replicate_delete_retry_num(cfg.replicate_delete_retry_num), + + m_stat_start_time(time(NULL)), + m_stat_num_get(0), + m_stat_num_set(0), + m_stat_num_delete(0) +{ } + +template +static void init(const Config& cfg) +{ + share.reset(new resource(cfg)); + net.reset(new framework(cfg)); +} + + + +} // namespace server +} // namespace kumo + +#endif /* server/init.h */ + diff --git a/src/logic/server/main.cc b/src/logic/server/main.cc new file mode 100644 index 0000000..dc425ee --- /dev/null +++ b/src/logic/server/main.cc @@ -0,0 +1,142 @@ +#include "logic/boot.h" +#include "server/framework.h" +#include "server/init.h" + +using namespace kumo; + +struct arg_t : rpc_cluster_args { + + std::string dbpath; + + sockaddr_in manager1_in; + sockaddr_in manager2_in; + bool manager2_set; + rpc::address manager1; // convert + rpc::address manager2; // convert + + uint16_t stream_port; + rpc::address stream_addr; // convert + int stream_lsock; + + std::string offer_tmpdir; + + server::Storage* db; + std::string db_backup_basename; // convert? + + unsigned short replicate_set_retry_num; + unsigned short replicate_delete_retry_num; + + unsigned int garbage_min_time_sec; + unsigned int garbage_max_time_sec; + size_t garbage_mem_limit_kb; + + virtual void convert() + { + cluster_addr = rpc::address(cluster_addr_in); + cluster_lsock = scoped_listen_tcp::listen(cluster_addr); + stream_addr = cluster_addr; + stream_addr.set_port(stream_port); + stream_lsock = scoped_listen_tcp::listen(stream_addr); + manager1 = rpc::address(manager1_in); + if(manager2_set) { + manager2 = rpc::address(manager2_in); + if(manager2 == manager1) { + throw std::runtime_error("-m and -p must be different"); + } + } + db_backup_basename = dbpath + "-"; + rpc_cluster_args::convert(); + } + + arg_t(int argc, char** argv) : + stream_port(SERVER_STREAM_DEFAULT_PORT), + replicate_set_retry_num(20), + replicate_delete_retry_num(20), + garbage_min_time_sec(60), + garbage_max_time_sec(60*60), + garbage_mem_limit_kb(2*1024) + { + clock_interval = 8.0; + + using namespace kazuhiki; + set_basic_args(); + on("-l", "--listen", + type::connectable(&cluster_addr_in, SERVER_DEFAULT_PORT)); + on("-L", "--stream-listen", + type::numeric(&stream_port, stream_port)); + on("-f", "--offer-tmp", + type::string(&offer_tmpdir, "/tmp")); + on("-s", "--store", + type::string(&dbpath)); + on("-m", "--manager1", + type::connectable(&manager1_in, MANAGER_DEFAULT_PORT)); + on("-p", "--manager2", &manager2_set, + type::connectable(&manager2_in, MANAGER_DEFAULT_PORT)); + on("-S", "--replicate-set-retry", + type::numeric(&replicate_set_retry_num, replicate_set_retry_num)); + on("-D", "--replicate-delete-retry", + type::numeric(&replicate_delete_retry_num, replicate_delete_retry_num)); + on("-gN", "--garbage-min-time", + type::numeric(&garbage_min_time_sec, garbage_min_time_sec)); + on("-gX", "--garbage-max-time", + type::numeric(&garbage_max_time_sec, garbage_max_time_sec)); + on("-gS", "--garbage-mem-limit", + type::numeric(&garbage_mem_limit_kb, garbage_mem_limit_kb)); + parse(argc, argv); + } + + void show_usage() + { +std::cout << +"usage: "< -p -l -s \n" +"\n" +" -l " "--listen listen address\n" +" -L " "--stream-listen listen port for replacing stream\n" +" -f " "--offer-tmp path to temporary directory for replacing\n" +" -s " "--store path to database\n" +" -m " "--manager1 address of manager 1\n" +" -p " "--manager2 address of manager 2\n" +" -S " "--replicate-set-retry replicate set retry limit\n" +" -D " "--replicate-delete-retry replicate delete retry limit\n" +" -gN " "--garbage-min-time minimum time to maintenance deleted key\n" +" -gX " "--garbage-max-time maximum time to maintenance deleted key\n" +" -gS " "--garbage-mem-limit maximum memory usage to memory deleted key\n" +; +rpc_cluster_args::show_usage(); + } +}; + +int main(int argc, char* argv[]) +{ + arg_t arg(argc, argv); + + // initialize logger first + mlogger::level loglevel = (arg.verbose ? mlogger::TRACE : mlogger::WARN); + init_mlogger(arg.logfile, arg.pidfile.empty(), loglevel); + + // daemonize + if(!arg.pidfile.empty()) { + do_daemonize(!arg.logfile.empty(), arg.pidfile.c_str()); + } + + // initialize binary logger + if(arg.logpack_path_set) { + logpacker::initialize(arg.logpack_path.c_str()); + } + + // open database + std::auto_ptr db( + new server::Storage(arg.dbpath.c_str(), + arg.garbage_min_time_sec, + arg.garbage_max_time_sec, + arg.garbage_mem_limit_kb*1024)); + arg.db = db.get(); + + // run server + server::init(arg); + server::net->run(); + server::net->join(); + + return 0; +} + diff --git a/src/logic/server/proto_control.cc b/src/logic/server/proto_control.cc new file mode 100644 index 0000000..ed6a1dc --- /dev/null +++ b/src/logic/server/proto_control.cc @@ -0,0 +1,68 @@ +#include "server/framework.h" +#include "server/proto_control.h" + +namespace kumo { +namespace server { + + +RPC_IMPL(proto_control, CreateBackup_1, req, z, response) +try { + std::string dst = share->cfg_db_backup_basename() + req.param().suffix; + LOG_INFO("create backup: ",dst); + + share->db().backup(dst.c_str()); + + response.result(true); +} +RPC_CATCH(CreateBackup, response) + + +RPC_IMPL(proto_control, GetStatus_1, req, z, response) +try { + LOG_DEBUG("GetStatus_1"); + + switch((status_type)req.param().command) { + case STAT_PID: + response.result((uint32_t)getpid()); + break; + + case STAT_UPTIME: + response.result(time(NULL) - share->stat_start_time()); + break; + + case STAT_TIME: + response.result((uint64_t)time(NULL)); + break; + + case STAT_VERSION: + response.result(std::string(VERSION)); + break; + + case STAT_CMD_GET: + response.result(share->stat_num_get()); + break; + + case STAT_CMD_SET: + response.result(share->stat_num_set()); + break; + + case STAT_CMD_DELETE: + response.result(share->stat_num_delete()); + break; + + case STAT_DB_ITEMS: + response.result( share->db().rnum() ); + break; + + default: + response.result(msgpack::type::nil()); + break; + } +} +RPC_CATCH(GetStatus_1, response) + + + +} // namespace server +} // namespace kumo + diff --git a/src/logic/server/proto_network.cc b/src/logic/server/proto_network.cc new file mode 100644 index 0000000..2ba0aae --- /dev/null +++ b/src/logic/server/proto_network.cc @@ -0,0 +1,186 @@ +#include "server/framework.h" +#include "server/proto_network.h" +#include "manager/proto_network.h" + +namespace kumo { +namespace server { + + +RPC_IMPL(proto_network, KeepAlive_1, req, z, response) +try { + share->clock().update(req.param().clock); + response.null(); +} +RPC_CATCH(KeepAlive_1, response) + + +void proto_network::keep_alive() +{ + LOG_TRACE("keep alive ..."); + shared_zone nullz; + manager::proto_network::KeepAlive_1 param(share->clock().get_incr()); + + using namespace mp::placeholders; + rpc::callback_t callback( BIND_RESPONSE(proto_network, KeepAlive_1) ); + + net->get_node(share->manager1())->call( + param, nullz, callback, 10); + + if(share->manager2().connectable()) { + net->get_node(share->manager2())->call( + param, nullz, callback, 10); + } +} + +RPC_REPLY_IMPL(proto_network, KeepAlive_1, from, res, err, life) +{ + if(err.is_nil()) { + LOG_TRACE("KeepAlive succeeded"); + } else { + LOG_DEBUG("KeepAlive failed: ",err); + } +} + + + +RPC_IMPL(proto_network, HashSpaceSync_1, req, z, response) +try { + LOG_DEBUG("HashSpaceSync_1"); + + share->clock().update(req.param().clock); + + bool ret = false; + + pthread_scoped_wrlock whlk(share->whs_mutex()); + + if(share->whs().clocktime() <= ClockTime(req.param().wseed.clocktime())) { + share->whs() = HashSpace(req.param().wseed); + ret = true; + } + + pthread_scoped_wrlock rhlk(share->rhs_mutex()); + + if(share->rhs().clocktime() <= ClockTime(req.param().rseed.clocktime())) { + share->rhs() = HashSpace(req.param().rseed); + ret = true; + } + + rhlk.unlock(); + whlk.unlock(); + + if(ret) { + response.result(true); + } else { + response.null(); + } +} +RPC_CATCH(HashSpaceSync_1, response) + + + +// FIXME needed?: renew_w_hash_space, renew_r_hash_space +// COPY A-1 +void proto_network::renew_w_hash_space() +{ + shared_zone nullz; + manager::proto_network::WHashSpaceRequest_1 param; + // ^ + + rpc::callback_t callback( BIND_RESPONSE(proto_network, WHashSpaceRequest_1) ); + // ^ + + net->get_node(share->manager1())->call( + param, nullz, callback, 10); + // ^ + + if(share->manager2().connectable()) { + net->get_node(share->manager2())->call( + param, nullz, callback, 10); + // ^ + } +} + +// COPY A-2 +void proto_network::renew_r_hash_space() +{ + shared_zone nullz; + manager::proto_network::RHashSpaceRequest_1 param; + // ^ + + rpc::callback_t callback( BIND_RESPONSE(proto_network, RHashSpaceRequest_1) ); + // ^ + + net->get_node(share->manager1())->call( + param, nullz, callback, 10); + // ^ + + if(share->manager2().connectable()) { + net->get_node(share->manager2())->call( + param, nullz, callback, 10); + // ^ + } +} + + +// COPY B-1 +RPC_REPLY_IMPL(proto_network, WHashSpaceRequest_1, from, res, err, life) +{ + // FIXME is this function needed? + if(!err.is_nil()) { + LOG_DEBUG("WHashSpaceRequest failed ",err); + if(SESSION_IS_ACTIVE(from)) { + shared_zone nullz; + manager::proto_network::WHashSpaceRequest_1 param; + // ^ + + from->call(param, nullz, + // ^ + BIND_RESPONSE(proto_network, WHashSpaceRequest_1), 10); + // ^ + } // retry on lost_node() if err.via.u64 == NODE_LOST? + } else { + LOG_DEBUG("renew hash space"); + HashSpace::Seed hsseed(res.convert()); + + pthread_scoped_wrlock whlk(share->whs_mutex()); + if(share->whs().empty() || share->whs().clocktime() < ClockTime(hsseed.clocktime())) { + // ^ ^ + share->whs() = HashSpace(hsseed); + //^ + } + } +} + +// COPY B-2 +RPC_REPLY_IMPL(proto_network, RHashSpaceRequest_1, from, res, err, life) +{ + // FIXME is this function needed? + if(!err.is_nil()) { + LOG_DEBUG("WHashSpaceRequest failed ",err); + if(SESSION_IS_ACTIVE(from)) { + shared_zone nullz; + manager::proto_network::RHashSpaceRequest_1 param; + // ^ + + from->call(param, nullz, + // ^ + BIND_RESPONSE(proto_network, RHashSpaceRequest_1), 10); + // ^ + } // retry on lost_node() if err.via.u64 == NODE_LOST? + } else { + LOG_DEBUG("renew hash space"); + HashSpace::Seed hsseed(res.convert()); + + pthread_scoped_wrlock rhlk(share->rhs_mutex()); + if(share->rhs().empty() || share->rhs().clocktime() < ClockTime(hsseed.clocktime())) { + // ^ ^ + share->rhs() = HashSpace(hsseed); + //^ + } + } +} + + +} // namespace server +} // namespace kumo + diff --git a/src/logic/server/proto_replace.cc b/src/logic/server/proto_replace.cc new file mode 100644 index 0000000..5952768 --- /dev/null +++ b/src/logic/server/proto_replace.cc @@ -0,0 +1,369 @@ +#include "server/framework.h" +#include "server/proto_replace.h" +#include "manager/proto_replace.h" + +namespace kumo { +namespace server { + + +bool proto_replace::test_replicator_assign(const HashSpace& hs, uint64_t h, const address& target) +{ + EACH_ASSIGN(hs, h, r, + if(r.is_active()) { // don't write to fault node + if(r.addr() == target) return true; + }) + return false; +} + + +proto_replace::replace_state::replace_state() : + m_push_waiting(0), + m_clocktime(0) {} + +proto_replace::replace_state::~replace_state() {} + +inline void proto_replace::replace_state::reset(const address& mgr, ClockTime ct) +{ + m_push_waiting = 0; + m_clocktime = ct; + m_mgr = mgr; +} + +inline void proto_replace::replace_state::pushed(ClockTime ct) +{ + if(ct == m_clocktime) { ++m_push_waiting; } +} + +inline void proto_replace::replace_state::push_returned(ClockTime ct) +{ + if(ct == m_clocktime) { --m_push_waiting; } +} + +inline bool proto_replace::replace_state::is_finished(ClockTime ct) const +{ + return m_clocktime == ct && m_push_waiting == 0; +} + +inline void proto_replace::replace_state::invalidate() +{ + m_push_waiting = -1; +} + +inline const address& proto_replace::replace_state::mgr_addr() const +{ + return m_mgr; +} + + +void proto_replace::replace_offer_push(ClockTime replace_time, REQUIRE_STLK) +{ + m_state.pushed(replace_time); +} + +void proto_replace::replace_offer_pop(ClockTime replace_time, REQUIRE_STLK) +{ + m_state.push_returned(replace_time); + if(m_state.is_finished(replace_time)) { + finish_replace_copy(replace_time, stlk); + } +} + + + +RPC_IMPL(proto_replace, ReplaceCopyStart_1, req, z, response) +try { + share->clock().update(req.param().clock); + + HashSpace hs(req.param().hsseed); + + response.result(true); + + try { + replace_copy(req.node()->addr(), hs); + } catch (std::runtime_error& e) { + LOG_ERROR("replace copy failed: ",e.what()); + } catch (...) { + LOG_ERROR("replace copy failed: unknown error"); + } +} +RPC_CATCH(ReplaceCopyStart, response) + + +RPC_IMPL(proto_replace, ReplaceDeleteStart_1, req, z, response) +try { + share->clock().update(req.param().clock); + + HashSpace hs(req.param().hsseed); + + response.result(true); + + try { + replace_delete(req.node(), hs); + } catch (std::runtime_error& e) { + LOG_ERROR("replace delete failed: ",e.what()); + } catch (...) { + LOG_ERROR("replace delete failed: unknown error"); + } +} +RPC_CATCH(ReplaceDeleteStart, response) + + +struct proto_replace::for_each_replace_copy { + for_each_replace_copy( + const address& addr, + const HashSpace& src, const HashSpace& dst, + proto_replace_stream::OfferStorageMap& accumulator, + const addrvec_t& faults) : + self(addr), + srchs(src), dsths(dst), + offer(accumulator), fault_nodes(faults) + { + Sa.reserve(NUM_REPLICATION+1); + Da.reserve(NUM_REPLICATION+1); + current_owners.reserve(NUM_REPLICATION+1); + newbies.reserve(NUM_REPLICATION+1); + } + + inline void operator() (Storage::iterator& kv); + +private: + addrvec_t Sa; + addrvec_t Da; + addrvec_t current_owners; + addrvec_t newbies; + + const address& self; + + const HashSpace& srchs; + const HashSpace& dsths; + + proto_replace_stream::OfferStorageMap& offer; + const addrvec_t& fault_nodes; + +private: + for_each_replace_copy(); +}; + +void proto_replace::replace_copy(const address& manager_addr, HashSpace& hs) +{ + ClockTime replace_time = hs.clocktime(); + + { + pthread_scoped_lock stlk(m_state_mutex); + m_state.reset(manager_addr, replace_time); + replace_offer_push(replace_time, stlk); // replace_copy; + } + + LOG_INFO("start replace copy for time(",replace_time.get(),")"); + + pthread_scoped_wrlock whlock(share->whs_mutex()); + pthread_scoped_wrlock rhlk(share->rhs_mutex()); + + HashSpace srchs(share->rhs()); + whlock.unlock(); + + share->whs() = hs; + whlock.unlock(); + + HashSpace& dsths(hs); + + addrvec_t fault_nodes; + { + addrvec_t src_nodes; + addrvec_t dst_nodes; + + srchs.get_active_nodes(src_nodes); + dsths.get_active_nodes(dst_nodes); + + for(addrvec_iterator it(src_nodes.begin()); it != src_nodes.end(); ++it) { + LOG_INFO("src active node: ",*it); + } + for(addrvec_iterator it(dst_nodes.begin()); it != dst_nodes.end(); ++it) { + LOG_INFO("dst active node: ",*it); + } + + if(src_nodes.empty() || dst_nodes.empty()) { + LOG_INFO("empty hash space. skip replacing."); + goto skip_replace; + } + + std::sort(src_nodes.begin(), src_nodes.end()); + std::sort(dst_nodes.begin(), dst_nodes.end()); + + for(addrvec_iterator it(src_nodes.begin()); it != src_nodes.end(); ++it) { + if(!std::binary_search(dst_nodes.begin(), dst_nodes.end(), *it)) { + fault_nodes.push_back(*it); + } + } + + for(addrvec_iterator it(fault_nodes.begin()); it != fault_nodes.end(); ++it) { + LOG_INFO("fault node: ",*it); + } + + if(std::binary_search(fault_nodes.begin(), fault_nodes.end(), net->addr())) { + LOG_WARN("I'm marked as fault. skip replacing."); + goto skip_replace; + } + } + + { + proto_replace_stream::OfferStorageMap offer(share->cfg_offer_tmpdir(), replace_time); + + share->db().for_each( + for_each_replace_copy(net->addr(), srchs, dsths, offer, fault_nodes), + share->clock().now()); + + net->scope_proto_replace_stream().send_offer(offer, replace_time); + } + +skip_replace: + pthread_scoped_lock stlk(m_state_mutex); + replace_offer_pop(replace_time, stlk); // replace_copy +} + +void proto_replace::for_each_replace_copy::operator() (Storage::iterator& kv) +{ + const char* raw_key = kv.key(); + size_t raw_keylen = kv.keylen(); + const char* raw_val = kv.val(); + size_t raw_vallen = kv.vallen(); + + // FIXME do it in storage module. + //if(raw_vallen < Storage::VALUE_META_SIZE) { return; } + //if(raw_keylen < Storage::KEY_META_SIZE) { return; } + + uint64_t h = Storage::hash_of(kv.key()); + + Sa.clear(); + EACH_ASSIGN(srchs, h, r, { + if(r.is_active()) Sa.push_back(r.addr()); }); + + Da.clear(); + EACH_ASSIGN(dsths, h, r, { + if(r.is_active()) Da.push_back(r.addr()); }); + + current_owners.clear(); + for(addrvec_iterator it(Sa.begin()); it != Sa.end(); ++it) { + if(!std::binary_search(fault_nodes.begin(), fault_nodes.end(), *it)) { + current_owners.push_back(*it); + } + } + + // FIXME 再配置中にServerがダウンしたときコピーが正常に行われないかもしれない + //if(current_owners.empty() || current_owners.front() != self) { return; } + if(std::find(current_owners.begin(), current_owners.end(), self) + == current_owners.end()) { return; } + + newbies.clear(); + for(addrvec_iterator it(Da.begin()); it != Da.end(); ++it) { + if(std::find(Sa.begin(), Sa.end(), *it) == Sa.end()) { + newbies.push_back(*it); + } + } + + if(newbies.empty()) { return; } + + for(addrvec_iterator it(newbies.begin()); it != newbies.end(); ++it) { + offer.add(*it, + raw_key, raw_keylen, + raw_val, raw_vallen); + } +} + + + +void proto_replace::finish_replace_copy(ClockTime replace_time, REQUIRE_STLK) +{ + LOG_INFO("finish replace copy for time(",replace_time.get(),")"); + + shared_zone nullz; + manager::proto_replace::ReplaceCopyEnd_1 param( + replace_time.get(), share->clock().get_incr()); + + address addr; + //{ + // pthread_scoped_lock stlk(m_state_mutex); + addr = m_state.mgr_addr(); + m_state.invalidate(); + //} + + using namespace mp::placeholders; + net->get_node(addr)->call(param, nullz, + BIND_RESPONSE(proto_replace, ReplaceCopyEnd_1), 10); +} + +RPC_REPLY_IMPL(proto_replace, ReplaceCopyEnd_1, from, res, err, life) +{ + if(!err.is_nil()) { LOG_ERROR("ReplaceCopyEnd failed: ",err); } + // FIXME +} + + +struct proto_replace::for_each_replace_delete { + for_each_replace_delete(const HashSpace& hs, const address& addr) : + self(addr), m_hs(hs) { } + + inline void operator() (Storage::iterator& kv); + +private: + const address& self; + const HashSpace& m_hs; + +private: + for_each_replace_delete(); +}; + +void proto_replace::replace_delete(shared_node& manager, HashSpace& hs) +{ + pthread_scoped_rdlock whlk(share->whs_mutex()); + + pthread_scoped_wrlock rhlk(share->rhs_mutex()); + share->rhs() = share->whs(); + rhlk.unlock(); + + LOG_INFO("start replace delete for time(",share->whs().clocktime().get(),")"); + + if(!share->whs().empty()) { + share->db().for_each( + for_each_replace_delete(share->whs(), net->addr()), + share->clock().now() ); + } + + shared_zone nullz; + manager::proto_replace::ReplaceDeleteEnd_1 param( + share->whs().clocktime().get(), share->clock().get_incr()); + + using namespace mp::placeholders; + manager->call(param, nullz, + BIND_RESPONSE(proto_replace, ReplaceDeleteEnd_1), 10); + + LOG_INFO("finish replace for time(",share->whs().clocktime().get(),")"); +} + +void proto_replace::for_each_replace_delete::operator() (Storage::iterator& kv) +{ + // FIXME do it in storage module. + //if(kv.keylen() < Storage::KEY_META_SIZE || + // kv.vallen() < Storage::VALUE_META_SIZE) { + // LOG_TRACE("delete invalid key: ",kv.key()); + // kv.del(); + //} + uint64_t h = Storage::hash_of(kv.key()); + if(!proto_replace::test_replicator_assign(m_hs, h, self)) { + LOG_TRACE("replace delete key: ",kv.key()); + kv.del(); + } +} + +RPC_REPLY_IMPL(proto_replace, ReplaceDeleteEnd_1, from, res, err, life) +{ + if(!err.is_nil()) { + LOG_ERROR("ReplaceDeleteEnd failed: ",err); + } + // FIXME retry? +} + + +} // namespace server +} // namespace kumo + diff --git a/src/logic/server/proto_replace_stream.cc b/src/logic/server/proto_replace_stream.cc new file mode 100644 index 0000000..0d9f51c --- /dev/null +++ b/src/logic/server/proto_replace_stream.cc @@ -0,0 +1,639 @@ +#include "server/framework.h" +#include "server/proto_replace_stream.h" +#include +#include +#include +#include +#include +#include +#include + +#ifndef KUMO_OFFER_INITIAL_MAP_SIZE +#define KUMO_OFFER_INITIAL_MAP_SIZE 32768 +#endif + +namespace kumo { +namespace server { + + +proto_replace_stream::proto_replace_stream(address stream_addr) : + m_stream_addr(stream_addr) +{ } + +proto_replace_stream::~proto_replace_stream() { } + +void proto_replace_stream::init_stream(int fd) +{ + m_stream_core.reset(new mp::wavy::core()); + using namespace mp::placeholders; + m_stream_core->listen(fd, mp::bind( + &proto_replace_stream::stream_accepted, this, + _1, _2)); +} + +void proto_replace_stream::run_stream() +{ + m_stream_core->add_thread(2); // FIXME 2 +} + +void proto_replace_stream::stop_stream() +{ + m_stream_core->end(); +} + + +class proto_replace_stream::OfferStorage { +public: + OfferStorage(const std::string& basename, + const address& addr, ClockTime replace_time); + ~OfferStorage(); +public: + void add(const char* key, size_t keylen, + const char* val, size_t vallen); + void send(int sock); + + const address& addr() const { return m_addr; } + ClockTime replace_time() const { return m_replace_time; } +private: + address m_addr; + ClockTime m_replace_time; + + struct scoped_fd { + scoped_fd(int fd) : m(fd) { } + ~scoped_fd() { ::close(m); } + int get() { return m; } + private: + int m; + scoped_fd(); + scoped_fd(const scoped_fd&); + }; + static int openfd(const std::string& basename); + scoped_fd m_fd; + + class mmap_stream; + std::auto_ptr m_mmap; +private: + OfferStorage(); + OfferStorage(const OfferStorage&); +}; + + +RPC_IMPL(proto_replace_stream, ReplaceOffer_1, req, z, response) +try { + address stream_addr = req.node()->addr(); + stream_addr.set_port(req.param().port); + char addrbuf[stream_addr.addrlen()]; + stream_addr.getaddr((sockaddr*)addrbuf); + + using namespace mp::placeholders; + m_stream_core->connect( + PF_INET, SOCK_STREAM, 0, + (sockaddr*)addrbuf, sizeof(addrbuf), + net->connect_timeout_msec(), + mp::bind(&proto_replace_stream::stream_connected, this, _1, _2)); + + // Note: don't return any result + LOG_TRACE("connect replace offer to ",req.node()->addr()," with stream port ",req.param().port); +} +RPC_CATCH(ReplaceDeleteStart, response) + + +void proto_replace_stream::send_offer(proto_replace_stream::OfferStorageMap& offer, ClockTime replace_time) +{ + pthread_scoped_lock oflk(m_offer_map_mutex); + offer.commit(&m_offer_map); + + pthread_scoped_lock relk(net->scope_proto_replace().state_mutex()); + + for(SharedOfferMap::iterator it(m_offer_map.begin()), + it_end(m_offer_map.end()); it != it_end; ++it) { + const address& addr( (*it)->addr() ); + + LOG_DEBUG("send offer to ",(*it)->addr()); + shared_zone nullz; + proto_replace_stream::ReplaceOffer_1 param(m_stream_addr.port()); + + using namespace mp::placeholders; + net->get_node(addr)->call(param, nullz, + BIND_RESPONSE(proto_replace_stream, ReplaceOffer_1, replace_time, addr), 160); // FIXME 160 + + net->scope_proto_replace().replace_offer_push(replace_time, relk); + } +} + + +RPC_REPLY_IMPL(proto_replace_stream, ReplaceOffer_1, from, res, err, life, + ClockTime replace_time, address addr) +{ + LOG_TRACE("ResReplaceOffer from ",addr," res:",res," err:",err); + // Note: this request always timed out + + pthread_scoped_lock oflk(m_offer_map_mutex); + + SharedOfferMap::iterator it = find_offer_map(m_offer_map, addr); + if(it == m_offer_map.end()) { + return; + } + + m_offer_map.erase(it); +} + + + +class proto_replace_stream::OfferStorage::mmap_stream { +public: + mmap_stream(int fd); + ~mmap_stream(); + size_t size() const; + + void write(const void* buf, size_t len); + void flush(); + +private: + z_stream m_z; + + char* m_map; + int m_fd; + void expand_map(size_t req); + +private: + msgpack::packer m_mpk; + +public: + msgpack::packer& get() { return m_mpk; } + +private: + mmap_stream(); + mmap_stream(const mmap_stream&); +}; + +proto_replace_stream::OfferStorage::mmap_stream::mmap_stream(int fd) : + m_fd(fd), + m_mpk(*this) +{ + m_z.zalloc = Z_NULL; + m_z.zfree = Z_NULL; + m_z.opaque = Z_NULL; + if(deflateInit(&m_z, Z_DEFAULT_COMPRESSION) != Z_OK) { + throw std::runtime_error(m_z.msg); + } + + if(::ftruncate(m_fd, KUMO_OFFER_INITIAL_MAP_SIZE) < 0) { + deflateEnd(&m_z); + throw mp::system_error(errno, "failed to truncate offer storage"); + } + + m_map = (char*)::mmap(NULL, KUMO_OFFER_INITIAL_MAP_SIZE, + PROT_WRITE, MAP_SHARED, m_fd, 0); + if(m_map == MAP_FAILED) { + deflateEnd(&m_z); + throw mp::system_error(errno, "failed to mmap offer storage"); + } + + m_z.avail_out = KUMO_OFFER_INITIAL_MAP_SIZE; + m_z.next_out = (Bytef*)m_map; +} + +proto_replace_stream::OfferStorage::mmap_stream::~mmap_stream() +{ + size_t used = (char*)m_z.next_out - m_map; + size_t csize = used + m_z.avail_out; + ::munmap(m_map, csize); + //::ftruncate(m_fd, used); + deflateEnd(&m_z); +} + +size_t proto_replace_stream::OfferStorage::mmap_stream::size() const +{ + return (char*)m_z.next_out - m_map; +} + +void proto_replace_stream::OfferStorage::mmap_stream::write(const void* buf, size_t len) +{ + m_z.next_in = (Bytef*)buf; + m_z.avail_in = len; + + while(true) { + if(m_z.avail_out < RPC_BUFFER_RESERVATION_SIZE) { // FIXME size + expand_map(KUMO_OFFER_INITIAL_MAP_SIZE); // FIXME size + } + + if(deflate(&m_z, Z_NO_FLUSH) != Z_OK) { + throw std::runtime_error("deflate failed"); + } + + if(m_z.avail_in == 0) { + break; + } + } +} + +void proto_replace_stream::OfferStorage::mmap_stream::flush() +{ + while(true) { + switch(deflate(&m_z, Z_FINISH)) { + + case Z_STREAM_END: + return; + + case Z_OK: + break; + + default: + throw std::runtime_error("deflate flush failed"); + } + + expand_map(m_z.avail_in); + } +} + +void proto_replace_stream::OfferStorage::mmap_stream::expand_map(size_t req) +{ + size_t used = (char*)m_z.next_out - m_map; + size_t csize = used + m_z.avail_out; + size_t nsize = csize * 2; + while(nsize < req) { nsize *= 2; } + + if(::ftruncate(m_fd, nsize) < 0 ) { + throw mp::system_error(errno, "failed to resize offer storage"); + } + +#ifdef __linux__ + void* tmp = ::mremap(m_map, csize, nsize, MREMAP_MAYMOVE); + if(tmp == MAP_FAILED) { + throw mp::system_error(errno, "failed to mremap offer storage"); + } + m_map = (char*)tmp; + +#else + if(::munmap(m_map, csize) < 0) { + throw mp::system_error(errno, "failed to munmap offer storage"); + } + m_map = NULL; + m_z.next_out = NULL; + m_z.avail_out = 0; + + m_map = (char*)::mmap(NULL, nsize, + PROT_WRITE, MAP_SHARED, m_fd, 0); + if(m_map == MAP_FAILED) { + throw mp::system_error(errno, "failed to mmap"); + } + +#endif + m_z.next_out = (Bytef*)(m_map + used); + m_z.avail_out = nsize - used; +} + + +struct proto_replace_stream::SharedOfferMapComp { + bool operator() (const SharedOfferStorage& x, const address& y) const + { return x->addr() < y; } + bool operator() (const address& x, const SharedOfferStorage& y) const + { return x < y->addr(); } + bool operator() (const SharedOfferStorage& x, const SharedOfferStorage& y) const + { return x->addr() < y->addr(); } +}; + + +proto_replace_stream::OfferStorageMap::OfferStorageMap( + const std::string& basename, ClockTime replace_time) : + m_basename(basename), + m_replace_time(replace_time) { } + +proto_replace_stream::OfferStorageMap::~OfferStorageMap() { } + +void proto_replace_stream::OfferStorageMap::add( + const address& addr, + const char* key, size_t keylen, + const char* val, size_t vallen) +{ + SharedOfferMap::iterator it = find_offer_map(m_map, addr); + if(it != m_map.end()) { + (*it)->add(key, keylen, val, vallen); + } else { + SharedOfferStorage of(new OfferStorage(m_basename, addr, m_replace_time)); + //m_map.insert(it, of); // FIXME + m_map.push_back(of); + std::sort(m_map.begin(), m_map.end(), SharedOfferMapComp()); + of->add(key, keylen, val, vallen); + } +} + +void proto_replace_stream::OfferStorageMap::commit(SharedOfferMap* dst) +{ + *dst = m_map; +} + + +proto_replace_stream::SharedOfferMap::iterator proto_replace_stream::find_offer_map( + SharedOfferMap& map, const address& addr) +{ + SharedOfferMap::iterator it = + std::lower_bound(map.begin(), map.end(), + addr, SharedOfferMapComp()); + if(it != map.end() && (*it)->addr() == addr) { + return it; + } else { + return map.end(); + } +} + + +int proto_replace_stream::OfferStorage::openfd(const std::string& basename) +{ + char* path = (char*)::malloc(basename.size()+8); + if(!path) { throw std::bad_alloc(); } + memcpy(path, basename.data(), basename.size()); + memcpy(path+basename.size(), "/XXXXXX", 8); // '/XXXXXX' + 1(='\0') + + int fd = ::mkstemp(path); + if(fd < 0) { + ::free(path); + throw mp::system_error(errno, "failed to mktemp"); + } + + ::unlink(path); + ::free(path); + + return fd; +} + +proto_replace_stream::OfferStorage::OfferStorage(const std::string& basename, + const address& addr, ClockTime replace_time): + m_addr(addr), + m_replace_time(replace_time), + m_fd(openfd(basename)), + m_mmap(new mmap_stream(m_fd.get())) +{ + LOG_TRACE("create OfferStorage for ",addr); +} + +proto_replace_stream::OfferStorage::~OfferStorage() { } + + +void proto_replace_stream::OfferStorage::add( + const char* key, size_t keylen, + const char* val, size_t vallen) +{ + msgpack::packer& pk(m_mmap->get()); + pk.pack_array(2); + pk.pack_raw(keylen); + pk.pack_raw_body(key, keylen); + pk.pack_raw(vallen); + pk.pack_raw_body(val, vallen); +} + +void proto_replace_stream::OfferStorage::send(int sock) +{ + m_mmap->flush(); + size_t size = m_mmap->size(); + //m_mmap.reset(NULL); // FIXME needed? + while(size > 0) { + // FIXME linux + ssize_t rl = ::sendfile(sock, m_fd.get(), NULL, size); + if(rl <= 0) { throw mp::system_error(errno, "offer send error"); } + size -= rl; + } +} + + +struct scopeout_close { + scopeout_close(int fd) : m(fd) {} + ~scopeout_close() { if(m >= 0) { ::close(m); } } + void release() { m = -1; } +private: + int m; + scopeout_close(); + scopeout_close(const scopeout_close&); +}; + + +void proto_replace_stream::stream_accepted(int fd, int err) +try { + LOG_TRACE("stream accepted fd(",fd,") err:",err); + + if(fd < 0) { + LOG_FATAL("accept failed: ",strerror(err)); + net->signal_end(); + return; + } + + scopeout_close fdscope(fd); + if(::fcntl(fd, F_SETFL, 0) < 0) { // set blocking mode + LOG_ERROR("stream connect: fcntl failed", strerror(err)); + return; + } + + // recv init address + address iaddr; + { + size_t sz = address::MAX_DUMP_SIZE+1; + char addrbuf[sz]; + char* p = addrbuf; + while(true) { + ssize_t rl = ::read(fd, p, sz); + if(rl <= 0) { + LOG_ERROR("failed to recv init address", strerror(err)); + return; + } + if((size_t)rl >= sz) { break; } + sz -= rl; + p += rl; + } + iaddr = address(addrbuf+1, (uint8_t)addrbuf[0]); + } + + // take out OfferStorage from m_offer_map + SharedOfferStorage st; + { + pthread_scoped_lock oflk(m_offer_map_mutex); + SharedOfferMap::iterator it = find_offer_map(m_offer_map, iaddr); + if(it == m_offer_map.end()) { + LOG_DEBUG("storage offer to ",iaddr," is already timed out"); + return; + } + st = *it; + m_offer_map.erase(it); + } + + LOG_DEBUG("send offer storage to ",iaddr); + st->send(fd); + LOG_DEBUG("finish to send offer storage to ",iaddr); + + pthread_scoped_lock relk(net->scope_proto_replace().state_mutex()); + net->scope_proto_replace().replace_offer_pop(st->replace_time(), relk); + +} catch (std::exception& e) { + LOG_WARN("failed to send offer storage: ",e.what()); + throw; +} catch (...) { + LOG_WARN("failed to send offer storage: unknown error"); + throw; +} + + +void proto_replace_stream::stream_connected(int fd, int err) +try { + LOG_TRACE("stream connected fd(",fd,") err:",err); + if(fd < 0) { + LOG_ERROR("stream connect failed", strerror(err)); + return; + } + + scopeout_close fdscope(fd); + + if(::fcntl(fd, F_SETFL, 0) < 0) { // set blocking mode + LOG_ERROR("stream connect: fcntl failed", strerror(err)); + return; + } + + // send init address + { + size_t sz = address::MAX_DUMP_SIZE+1; + char addrbuf[sz]; + ::memset(addrbuf, 0, sz); + addrbuf[0] = (uint8_t)net->addr().dump_size(); + ::memcpy(addrbuf+1, net->addr().dump(), net->addr().dump_size()); + const char* p = addrbuf; + while(true) { + ssize_t rl = ::write(fd, p, sz); + if(rl <= 0) { + LOG_ERROR("failed to send init address", strerror(err)); + return; + } + if((size_t)rl >= sz) { break; } + sz -= rl; + p += rl; + } + } + + mp::set_nonblock(fd); + + m_stream_core->add(fd); + fdscope.release(); + +} catch (std::exception& e) { + LOG_WARN("failed to receve offer storage: ",e.what()); + throw; +} catch (...) { + LOG_WARN("failed to receve offer storage: unknown error"); + throw; +} + + +class proto_replace_stream::OfferStreamHandler : public mp::wavy::handler { +public: + OfferStreamHandler(int fd); + ~OfferStreamHandler(); + + void read_event(); + void submit_message(rpc::msgobj msg, rpc::auto_zone& z); + +private: + msgpack::unpacker m_pac; + z_stream m_z; + char* m_buffer; + +private: + OfferStreamHandler(); + OfferStreamHandler(const OfferStreamHandler&); +}; + +proto_replace_stream::OfferStreamHandler::OfferStreamHandler(int fd) : + mp::wavy::handler(fd), + m_pac(RPC_INITIAL_BUFFER_SIZE) +{ + m_buffer = (char*)::malloc(RPC_INITIAL_BUFFER_SIZE); + if(!m_buffer) { + throw std::bad_alloc(); + } + + m_z.zalloc = Z_NULL; + m_z.zfree = Z_NULL; + m_z.opaque = Z_NULL; + + if(inflateInit(&m_z) != Z_OK) { + ::free(m_buffer); + throw std::runtime_error(m_z.msg); + } +} + +proto_replace_stream::OfferStreamHandler::~OfferStreamHandler() +{ + inflateEnd(&m_z); + ::free(m_buffer); +} + +void proto_replace_stream::OfferStreamHandler::read_event() +try { + ssize_t rl = ::read(fd(), m_buffer, RPC_INITIAL_BUFFER_SIZE); + if(rl < 0) { + if(errno == EAGAIN || errno == EINTR) { + return; + } else { + throw std::runtime_error("read error"); + } + } else if(rl == 0) { + throw std::runtime_error("connection closed"); + } + + m_z.next_in = (Bytef*)m_buffer; + m_z.avail_in = rl; + + while(true) { + if(m_pac.buffer_capacity() < RPC_BUFFER_RESERVATION_SIZE) { // FIXME size + m_pac.reserve_buffer(KUMO_OFFER_INITIAL_MAP_SIZE); // FIXME size + } + + m_z.next_out = (Bytef*)m_pac.buffer(); + m_z.avail_out = m_pac.buffer_capacity(); + + int ret = inflate(&m_z, Z_SYNC_FLUSH); + if(ret != Z_OK && ret != Z_STREAM_END) { + throw std::runtime_error("inflate failed"); + } + + m_pac.buffer_consumed( m_pac.buffer_capacity() - m_z.avail_out ); + + if(m_z.avail_in == 0) { + break; + } + } + + while(m_pac.execute()) { + rpc::msgobj msg = m_pac.data(); + std::auto_ptr z( m_pac.release_zone() ); + m_pac.reset(); + submit_message(msg, z); + } + +} catch(msgpack::type_error& e) { + LOG_ERROR("rpc packet: type error"); + throw; +} catch(std::exception& e) { + LOG_WARN("rpc packet: ", e.what()); + throw; +} catch(...) { + LOG_ERROR("rpc packet: unknown error"); + throw; +} + + +void proto_replace_stream::OfferStreamHandler::submit_message(rpc::msgobj msg, rpc::auto_zone& z) +{ + msgpack::type::tuple kv(msg); + msgtype::DBKey key = kv.get<0>(); + msgtype::DBValue val = kv.get<1>(); + + // FIXME updatev + share->db().update( + key.raw_data(), key.raw_size(), + val.raw_data(), val.raw_size()); + + // update() returns false means that key is overwritten while replicating. +} + + +} // namespace server +} // namespace kumo + diff --git a/src/logic/server/proto_store.cc b/src/logic/server/proto_store.cc new file mode 100644 index 0000000..50d5134 --- /dev/null +++ b/src/logic/server/proto_store.cc @@ -0,0 +1,569 @@ +#include "server/framework.h" +#include "server/proto_control.h" + +#define EACH_ASSIGNED_ACTIVE_NODE_EXCLUDE_ONE(EXCLUDE, HS, HASH, NODE, CODE) \ + EACH_ASSIGN(HS, HASH, _real_, \ + if(_real_.addr() != EXCLUDE && _real_.is_active()) { \ + shared_node NODE(net->get_node(_real_.addr())); \ + CODE; \ + }) + +#define EACH_ASSIGNED_ACTIVE_NODE_EXCLUDE_N(EXCLUDES, EXCLUDES_NUM, HS, HASH, NODE, CODE) \ + EACH_ASSIGN(HS, HASH, _real_, \ + if(_real_.is_active()) { \ + bool exclude = false; \ + for(unsigned int i=0; i < (EXCLUDES_NUM); ++i) { \ + if(_real_.addr() == EXCLUDES[i]) { \ + exclude = true; \ + break; \ + } \ + } \ + if(!exclude) { \ + shared_node NODE(net->get_node(_real_.addr())); \ + CODE; \ + } \ + }) + +namespace kumo { +namespace server { + + +void proto_store::check_replicator_assign(HashSpace& hs, uint64_t h) +{ + if(hs.empty()) { + throw std::runtime_error("server not ready"); + } + EACH_ASSIGN(hs, h, r, + if(r.is_active()) { // don't write to fault node + if(r.addr() == net->addr()) return; + }) + throw std::runtime_error("obsolete hash space"); +} + +void proto_store::check_coordinator_assign(HashSpace& hs, uint64_t h) +{ + if(hs.empty()) { + throw std::runtime_error("server not ready"); + } + EACH_ASSIGN(hs, h, r, + if(r.is_active()) { // don't write to fault node + if(r.addr() != net->addr()) + throw std::runtime_error("obsolete hash space"); + else + return; + }) +} + + +RPC_IMPL(proto_store, Get_1, req, z, response) +try { + msgtype::DBKey key(req.param().dbkey); + LOG_DEBUG("Get '", + /*std::string(key.data(),key.size()),*/"' with hash ", + key.hash()); + + { + pthread_scoped_rdlock rhlk(share->rhs_mutex()); + check_replicator_assign(share->rhs(), key.hash()); + } + + uint32_t raw_vallen; + const char* raw_val = share->db().get( + key.raw_data(), key.raw_size(), + &raw_vallen, z.get()); + + if(raw_val) { + LOG_DEBUG("key found"); + msgtype::raw_ref res(raw_val, raw_vallen); + response.result(res, z); + + } else { + LOG_DEBUG("key not found"); + response.null(); + } + + ++share->stat_num_get(); +} +RPC_CATCH(Get_1, response) + + +bool proto_store::SetByRhsWhs(weak_responder response, auto_zone& z, + msgtype::DBKey& key, msgtype::DBValue& val, + bool is_async) +{ + unsigned int rrep_num = 0; + unsigned int wrep_num = 0; + shared_node rrepto[NUM_REPLICATION]; + shared_node wrepto[NUM_REPLICATION]; + + { + pthread_scoped_rdlock whlk(share->whs_mutex()); + check_coordinator_assign(share->whs(), key.hash()); + + pthread_scoped_rdlock rhlk(share->rhs_mutex()); + + if(share->whs().clocktime() == share->rhs().clocktime()) { + return false; + } + + address wrep_addrs[NUM_REPLICATION+1]; + + EACH_ASSIGNED_ACTIVE_NODE_EXCLUDE_ONE(net->addr(), + share->whs(), key.hash(), n, { + wrepto[wrep_num] = n; + wrep_addrs[wrep_num] = n->addr(); + ++wrep_num; + }) + + whlk.unlock(); + + wrep_addrs[wrep_num] = net->addr(); // exclude self + + EACH_ASSIGNED_ACTIVE_NODE_EXCLUDE_N(wrep_addrs, wrep_num+1, + share->rhs(), key.hash(), n, { + rrepto[rrep_num++] = n; + }) + } + + ClockTime ct(share->clock().now_incr()); + val.raw_set_clocktime(ct.get()); + + volatile unsigned int* pcr = + (volatile unsigned int*)z->malloc(sizeof(volatile unsigned int)); + if(is_async) { *pcr = 0; } + else { *pcr = wrep_num + rrep_num; } + + using namespace mp::placeholders; + + // rhs Replication + rpc::retry* rretry = + z->allocate< rpc::retry >( + ReplicateSet_1( + ct.clock().get(), replicate_flags_by_rhs(), // flags = by rhs + msgtype::DBKey(key.raw_data(), key.raw_size()), + msgtype::DBValue(val.raw_data(), val.raw_size())) + ); + rretry->set_callback( BIND_RESPONSE(proto_store, ReplicateSet_1, + rretry, + pcr, + response, ct.get()) ); + + // whs Replication + rpc::retry* wretry = + z->allocate< rpc::retry >( + ReplicateSet_1( + ct.clock().get(), replicate_flags_none(), // flags = none + msgtype::DBKey(key.raw_data(), key.raw_size()), + msgtype::DBValue(val.raw_data(), val.raw_size())) + ); + wretry->set_callback( BIND_RESPONSE(proto_store, ReplicateSet_1, + wretry, + pcr, + response, ct.get()) ); + + SHARED_ZONE(life, z); + + for(unsigned int i=0; i < rrep_num; ++i) { + rretry->call(rrepto[i], life, 10); + } + + for(unsigned int i=0; i < wrep_num; ++i) { + wretry->call(wrepto[i], life, 10); + } + + share->db().set( + key.raw_data(), key.raw_size(), + val.raw_data(), val.raw_size()); + + LOG_DEBUG("set copy required: ", wrep_num+rrep_num); + if((wrep_num == 0 && rrep_num == 0) || is_async) { + response.result( msgtype::tuple(ct.get()) ); + } + + return true; +} + +void proto_store::SetByWhs(weak_responder response, auto_zone& z, + msgtype::DBKey& key, msgtype::DBValue& val, + bool is_async) +{ + unsigned int wrep_num = 0; + shared_node wrepto[NUM_REPLICATION]; + + { + pthread_scoped_rdlock whlk(share->whs_mutex()); + check_coordinator_assign(share->whs(), key.hash()); + + EACH_ASSIGNED_ACTIVE_NODE_EXCLUDE_ONE(net->addr(), + share->whs(), key.hash(), n, { + wrepto[wrep_num++] = n; + }) + } + + ClockTime ct(share->clock().now_incr()); + val.raw_set_clocktime(ct.get()); + + volatile unsigned int* pcr = + (volatile unsigned int*)z->malloc(sizeof(volatile unsigned int)); + if(is_async) { *pcr = 0; } + else { *pcr = wrep_num; } + + using namespace mp::placeholders; + + // whs Replication + rpc::retry* retry = + z->allocate< rpc::retry >( + ReplicateSet_1( + ct.clock().get(), replicate_flags_none(), // flags = none + msgtype::DBKey(key.raw_data(), key.raw_size()), + msgtype::DBValue(val.raw_data(), val.raw_size())) + ); + retry->set_callback( BIND_RESPONSE(proto_store, ReplicateSet_1, + retry, + pcr, + response, ct.get()) ); + + SHARED_ZONE(life, z); + for(unsigned int i=0; i < wrep_num; ++i) { + retry->call(wrepto[i], life, 10); + } + + share->db().set( + key.raw_data(), key.raw_size(), + val.raw_data(), val.raw_size()); + + LOG_DEBUG("set copy required: ", wrep_num); + if(wrep_num == 0 || is_async) { + response.result( msgtype::tuple(ct.get()) ); + } +} + +RPC_IMPL(proto_store, Set_1, req, z, response) +try { + msgtype::DBKey key(req.param().dbkey); + msgtype::DBValue val(req.param().dbval); + LOG_DEBUG("Set '", + /*std::string(key.data(),key.size()),*/"' => '", + /*std::string(val.data(),val.size()),*/"' with hash ", + key.hash(),", with meta ",val.meta()); + + if(share->whs().clocktime() != share->rhs().clocktime()) { + if( !SetByRhsWhs(response, z, key, val, req.param().flags.is_async()) ) { + SetByWhs(response, z, key, val, req.param().flags.is_async()); + } + } else { + SetByWhs(response, z, key, val, req.param().flags.is_async()); + } + + ++share->stat_num_set(); +} +RPC_CATCH(Set_1, response) + + + +bool proto_store::DeleteByRhsWhs(weak_responder response, auto_zone& z, + msgtype::DBKey& key, + bool is_async) +{ + unsigned int rrep_num = 0; + unsigned int wrep_num = 0; + shared_node rrepto[NUM_REPLICATION]; + shared_node wrepto[NUM_REPLICATION]; + + { + pthread_scoped_rdlock whlk(share->whs_mutex()); + check_coordinator_assign(share->whs(), key.hash()); + + pthread_scoped_rdlock rhlk(share->rhs_mutex()); + + if(share->whs().clocktime() == share->rhs().clocktime()) { + return false; + } + + address wrep_addrs[NUM_REPLICATION+1]; + + EACH_ASSIGNED_ACTIVE_NODE_EXCLUDE_ONE(net->addr(), + share->whs(), key.hash(), n, { + wrepto[wrep_num] = n; + wrep_addrs[wrep_num] = n->addr(); + ++wrep_num; + }) + + whlk.unlock(); + + wrep_addrs[wrep_num] = net->addr(); // exclude self + + EACH_ASSIGNED_ACTIVE_NODE_EXCLUDE_N(wrep_addrs, wrep_num+1, + share->rhs(), key.hash(), n, { + rrepto[rrep_num++] = n; + }) + } + + ClockTime ct(share->clock().now_incr()); + + bool deleted = share->db().remove(key.raw_data(), key.raw_size(), ct); + if(!deleted) { + //response.result(false); + // the key is not stored + //return true; + wrep_num = 0; + } + + LOG_DEBUG("delete copy required: ", wrep_num+rrep_num); + if((wrep_num == 0 && rrep_num == 0) || is_async) { + response.result(true); + } + + volatile unsigned int* pcr = + (volatile unsigned int*)z->malloc(sizeof(volatile unsigned int)); + if(is_async) { *pcr = 0; } + else { *pcr = wrep_num + rrep_num; } + + using namespace mp::placeholders; + + // rhs Replication + rpc::retry* rretry = + z->allocate< rpc::retry >( + ReplicateDelete_1( + ct.get(), + ct.clock().get(), + replicate_flags_by_rhs(), // flag = by rhs + msgtype::DBKey(key.raw_data(), key.raw_size())) + ); + rretry->set_callback( BIND_RESPONSE(proto_store, ReplicateDelete_1, + rretry, + pcr, + response, deleted) ); + + // whs Replication + rpc::retry* wretry = + z->allocate< rpc::retry >( + ReplicateDelete_1( + ct.get(), + ct.clock().get(), + replicate_flags_none(), // flag = none + msgtype::DBKey(key.raw_data(), key.raw_size())) + ); + wretry->set_callback( BIND_RESPONSE(proto_store, ReplicateDelete_1, + wretry, + pcr, + response, deleted) ); + + SHARED_ZONE(life, z); + + for(unsigned int i=0; i < rrep_num; ++i) { + rretry->call(rrepto[i], life, 10); + } + + for(unsigned int i=0; i < wrep_num; ++i) { + wretry->call(wrepto[i], life, 10); + } + + return true; +} + +void proto_store::DeleteByWhs(weak_responder response, auto_zone& z, + msgtype::DBKey& key, + bool is_async) +{ + unsigned int wrep_num = 0; + shared_node wrepto[NUM_REPLICATION]; + + { + pthread_scoped_rdlock whlk(share->whs_mutex()); + check_coordinator_assign(share->whs(), key.hash()); + + EACH_ASSIGNED_ACTIVE_NODE_EXCLUDE_ONE(net->addr(), + share->whs(), key.hash(), n, { + wrepto[wrep_num++] = n; + }) + } + + ClockTime ct(share->clock().now_incr()); + + bool deleted = share->db().remove(key.raw_data(), key.raw_size(), ct); + if(!deleted) { + response.result(false); + // the key is not stored + return; + } + + LOG_DEBUG("delete copy required: ", wrep_num); + if(wrep_num == 0 || is_async) { + response.result(true); + } + + volatile unsigned int* pcr = + (volatile unsigned int*)z->malloc(sizeof(volatile unsigned int)); + if(is_async) { *pcr = 0; } + else { *pcr = wrep_num; } + + using namespace mp::placeholders; + + // whs Replication + rpc::retry* retry = + z->allocate< rpc::retry >( + ReplicateDelete_1( + ct.get(), ct.clock().get(), + replicate_flags_none(), + msgtype::DBKey(key.raw_data(), key.raw_size())) + ); + retry->set_callback( BIND_RESPONSE(proto_store, ReplicateDelete_1, + retry, + pcr, + response, deleted) ); + + SHARED_ZONE(life, z); + for(unsigned int i=0; i < wrep_num; ++i) { + retry->call(wrepto[i], life, 10); + } +} + +RPC_IMPL(proto_store, Delete_1, req, z, response) +try { + msgtype::DBKey key(req.param().dbkey); + LOG_DEBUG("Delete '", + std::string(key.data(),key.size()),"' with hash", + key.hash()); + + if(share->whs().clocktime() != share->rhs().clocktime()) { + if( !DeleteByRhsWhs(response, z, key, req.param().flags.is_async()) ) { + DeleteByWhs(response, z, key, req.param().flags.is_async()); + } + } else { + DeleteByWhs(response, z, key, req.param().flags.is_async()); + } + + ++share->stat_num_delete(); +} +RPC_CATCH(Delete_1, response) + + + +RPC_REPLY_IMPL(proto_store, ReplicateSet_1, from, res, err, life, + rpc::retry* retry, + volatile unsigned int* copy_required, + rpc::weak_responder response, uint64_t clocktime) +{ + LOG_DEBUG("ResReplicateSet ",res,",",err," remain:",*copy_required); + // retry if failed + if(!err.is_nil()) { + if(SESSION_IS_ACTIVE(from)) { + // FIXME delayed retry? + if(retry->retry_incr(share->cfg_replicate_set_retry_num())) { + retry->call(from, life); + LOG_WARN("ReplicateSet error: ",err,", retry ",retry->num_retried()); + return; + } + } + if(!retry->param().flags.is_rhs()) { // FIXME ? + response.null(); + LOGPACK("ers",2, + "key",msgtype::raw_ref( + retry->param().dbkey.data(), + retry->param().dbkey.size()), + "val",msgtype::raw_ref( + retry->param().dbval.data(), + retry->param().dbval.size())); + LOG_ERROR("ReplicateSet failed: ",err); + return; + } + } + + LOG_DEBUG("ReplicateSet succeeded"); + + if(__sync_sub_and_fetch(copy_required, 1) == 0) { + response.result( msgtype::tuple(clocktime) ); + } +} + +RPC_REPLY_IMPL(proto_store, ReplicateDelete_1, from, res, err, life, + rpc::retry* retry, + volatile unsigned int* copy_required, + rpc::weak_responder response, bool deleted) +{ + // retry if failed + if(!err.is_nil()) { + if(SESSION_IS_ACTIVE(from)) { + // FIXME delayed retry? + if(retry->retry_incr(share->cfg_replicate_delete_retry_num())) { + retry->call(from, life); + LOG_WARN("ReplicateDelete error: ",err,", retry ",retry->num_retried()); + return; + } + } + if(!retry->param().flags.is_rhs()) { // FIXME ? + response.null(); + LOGPACK("erd",2, + "key",msgtype::raw_ref( + retry->param().dbkey.data(), + retry->param().dbkey.size())); + LOG_ERROR("ReplicateDelete failed: ",err); + return; + } + } + + LOG_DEBUG("ReplicateDelete succeeded"); + + if(__sync_sub_and_fetch(copy_required, 1) == 0) { + if(!deleted && retry->param().flags.is_rhs() && + res.type == msgtype::BOOLEAN && res.via.boolean == true) { + deleted = true; + } + response.result(deleted); + } +} + + +RPC_IMPL(proto_store, ReplicateSet_1, req, z, response) +try { + msgtype::DBKey key = req.param().dbkey; + msgtype::DBValue val = req.param().dbval; + LOG_TRACE("ReplicateSet"); + + if(req.param().flags.is_rhs()) { + pthread_scoped_rdlock rhlk(share->rhs_mutex()); + check_replicator_assign(share->rhs(), key.hash()); + } else { + pthread_scoped_rdlock whlk(share->whs_mutex()); + check_replicator_assign(share->whs(), key.hash()); + } + + share->clock().update(req.param().clock); + + bool updated = share->db().update( + key.raw_data(), key.raw_size(), + val.raw_data(), val.raw_size()); + + response.result(updated); +} +RPC_CATCH(ReplicateSet_1, response) + + +RPC_IMPL(proto_store, ReplicateDelete_1, req, z, response) +try { + msgtype::DBKey key = req.param().dbkey; + LOG_TRACE("ReplicateDelete"); + + if(req.param().flags.is_rhs()) { + pthread_scoped_rdlock rhlk(share->rhs_mutex()); + check_replicator_assign(share->rhs(), key.hash()); + } else { + pthread_scoped_rdlock whlk(share->whs_mutex()); + check_replicator_assign(share->whs(), key.hash()); + } + + share->clock().update(req.param().clock); + + bool deleted = share->db().remove(key.raw_data(), key.raw_size(), + req.param().clocktime); + + response.result(deleted); +} +RPC_CATCH(ReplicateDelete_1, response) + + + +} // namespace server +} // namespace kumo + diff --git a/src/logic/server/storage.cc b/src/logic/server/storage.cc new file mode 100644 index 0000000..329ea39 --- /dev/null +++ b/src/logic/server/storage.cc @@ -0,0 +1,294 @@ +#include "server/storage.h" +#include "log/mlogger.h" + +namespace kumo { +namespace server { + + +Storage::Storage(const char* path, + uint32_t garbage_min_time, + uint32_t garbage_max_time, + size_t garbage_mem_limit) : + m_garbage_min_time(garbage_min_time), + m_garbage_max_time(garbage_max_time), + m_garbage_mem_limit(garbage_mem_limit) +{ + m_op = kumo_storage_init(); + + m_data = m_op.create(); + if(!m_data) { + throw std::runtime_error("failed to initialize storage module"); + } + + if(!m_op.open(m_data, path)) { + std::string msg = error(); + m_op.free(m_data); + throw std::runtime_error(msg); + } +} + +Storage::~Storage() +{ + m_op.close(m_data); + m_op.free(m_data); +} + + +void Storage::set( + const char* raw_key, uint32_t raw_keylen, + const char* raw_val, uint32_t raw_vallen) +{ + if(!m_op.set(m_data, + raw_key, raw_keylen, + raw_val, raw_vallen)) { + throw std::runtime_error("set failed"); + } +} + + +static bool storage_casproc(void* casdata, + const char* oldval, size_t oldvallen) +{ + if(oldvallen < Storage::VALUE_CLOCKTIME_SIZE) { + return true; + } + + ClockTime update_clocktime = + ClockTime( *reinterpret_cast(casdata) ); + + ClockTime old_clocktime = ClockTime( Storage::clocktime_of(oldval) ); + + return old_clocktime < update_clocktime; +} + + +bool Storage::update( + const char* raw_key, uint32_t raw_keylen, + const char* raw_val, uint32_t raw_vallen) +{ + ClockTime update_clocktime = clocktime_of(raw_val); + + return m_op.update(m_data, + raw_key, raw_keylen, + raw_val, raw_vallen, + &storage_casproc, + reinterpret_cast(&update_clocktime)); +} + + +namespace { +struct scoped_clock_key { + scoped_clock_key(const char* key, uint32_t keylen, ClockTime clocktime) + { + m_data = (char*)::malloc(keylen+8); + if(!m_data) { + throw std::bad_alloc(); + } + + *(uint64_t*)m_data = clocktime.get(); + memcpy(m_data+8, key, keylen); + } + + ~scoped_clock_key() + { + ::free(m_data); + } + + void* data() + { + return m_data; + } + + size_t size(size_t keylen) + { + return keylen + 8; + } + + struct wrap { + wrap(const char* buf, size_t buflen) : + m_buf(buf), + m_buflen(buflen) + { } + + const char* key() const + { + return m_buf + 8; + } + + size_t keylen() const { + return m_buflen - 8; + } + + ClockTime clocktime() const { + return ClockTime( *(uint64_t*)m_buf ); + } + + private: + const char* m_buf; + size_t m_buflen; + wrap(); + }; + +private: + char* m_data; + scoped_clock_key(); + scoped_clock_key(const scoped_clock_key&); +}; +} // noname namespace + + +bool Storage::remove( + const char* raw_key, uint32_t raw_keylen, + ClockTime update_clocktime) +{ + char clockbuf[VALUE_CLOCKTIME_SIZE]; + clocktime_to(update_clocktime, clockbuf); + + bool removed = update(raw_key, raw_keylen, clockbuf, sizeof(clockbuf)); + if(!removed) { + return false; + } + + // push garbage + + mp::pthread_scoped_lock gclk; + + { + scoped_clock_key clock_key(raw_key, raw_keylen, update_clocktime); + + gclk.relock(m_garbage_mutex); + + m_garbage.push(clock_key.data(), clock_key.size(raw_keylen)); +//LOG_ERROR("push garbage '",std::string(raw_key, raw_keylen),"' ",update_clocktime.get()," ",m_garbage.total_size()); + } + + while(true) { + size_t size; + const char* data = (const char*)m_garbage.front(&size); + if(!data) { + break; + } + + scoped_clock_key::wrap garbage_key(data, size); + + if(m_garbage.total_size() > m_garbage_mem_limit) { +//LOG_ERROR("over memory '",std::string(garbage_key.key(), garbage_key.keylen()),"' ",update_clocktime.get()," ",m_garbage.total_size()); + // over usage over, pop garbage + if(garbage_key.clocktime() < + update_clocktime.before_sec(m_garbage_min_time)) { // min check + m_op.del(m_data, + garbage_key.key(), garbage_key.keylen(), + &storage_casproc, + reinterpret_cast(&update_clocktime)); + } + m_garbage.pop(); + + } else if(garbage_key.clocktime() < + update_clocktime.before_sec(m_garbage_max_time)) { // max check +//LOG_ERROR("garbage collect '",std::string(garbage_key.key(), garbage_key.keylen()),"' ",update_clocktime.get()," ",m_garbage.total_size()); + m_op.del(m_data, + garbage_key.key(), garbage_key.keylen(), + &storage_casproc, + reinterpret_cast(&update_clocktime)); + m_garbage.pop(); + + } else { + break; + } + } + + return true; + + // unlock gclk +} + + +namespace { +struct for_each_data { + kumo_storage_op* op; + void (*callback)(void* obj, Storage::iterator& it); + void* obj; + ClockTime clocktime_limit; +}; + +static int for_each_collect(void* user, void* iterator_data) +try { + for_each_data* data = reinterpret_cast(user); + + const char* val = data->op->iterator_val(iterator_data); + size_t vallen = data->op->iterator_vallen(iterator_data); + + if(vallen < Storage::VALUE_META_SIZE) { + if(data->clocktime_limit != 0) { // for mergedb + + if(vallen < Storage::VALUE_CLOCKTIME_SIZE) { + // invalid value + data->op->iterator_del(iterator_data, + &storage_casproc, + reinterpret_cast(&data->clocktime_limit)); + + } else { + // garbage + ClockTime garbage_clocktime = Storage::clocktime_of(val); + if(garbage_clocktime < data->clocktime_limit) { +//LOG_ERROR("iterator garbage collect '",std::string(val, vallen),"' ",data->clocktime_limit.get()); + data->op->iterator_del(iterator_data, + &storage_casproc, + reinterpret_cast(&data->clocktime_limit)); + } + } + + } + return 0; + } + + Storage::iterator it(data->op, iterator_data); + (*data->callback)(data->obj, it); + + return 0; + +} catch (...) { + return -1; +} +} // noname namespace + +void Storage::for_each_impl(void* obj, void (*callback)(void* obj, iterator& it), + ClockTime clocktime) +{ + for_each_data data = { + &m_op, + callback, + obj, + clocktime.before_sec(m_garbage_max_time), + }; + + int ret = m_op.for_each(m_data, + reinterpret_cast(&data), for_each_collect); + + if(ret < 0) { + throw std::runtime_error("error while iterating database"); + } +} + + +uint64_t Storage::rnum() +{ + return m_op.rnum(m_data); +} + +void Storage::backup(const char* dstpath) +{ + if(!m_op.backup(m_data, dstpath)) { + throw std::runtime_error("backup failed"); + } +} + +std::string Storage::error() +{ + return std::string( m_op.error(m_data) ); +} + + +} // namespace server +} // namespace kumo + diff --git a/src/logic/server/storage.h b/src/logic/server/storage.h new file mode 100644 index 0000000..18a4b4b --- /dev/null +++ b/src/logic/server/storage.h @@ -0,0 +1,264 @@ +#ifndef SERVER_STORAGE_H__ +#define SERVER_STORAGE_H__ + +#include "server/storage/interface.h" +#include "server/buffer_queue.h" +#include "logic/clock.h" +#include +#include +#include +#include +#include + +#ifdef __LITTLE_ENDIAN__ +#if defined(__bswap_64) +# define kumo_be64(x) __bswap_64(x) +#elif defined(__DARWIN_OSSwapInt64) +# define kumo_be64(x) __DARWIN_OSSwapInt64(x) +#else +static inline uint64_t kumo_be64(uint64_t x) { + return ((x << 56) & 0xff00000000000000ULL ) | + ((x << 40) & 0x00ff000000000000ULL ) | + ((x << 24) & 0x0000ff0000000000ULL ) | + ((x << 8) & 0x000000ff00000000ULL ) | + ((x >> 8) & 0x00000000ff000000ULL ) | + ((x >> 24) & 0x0000000000ff0000ULL ) | + ((x >> 40) & 0x000000000000ff00ULL ) | + ((x >> 56) & 0x00000000000000ffULL ) ; +} +#endif +#else +#define kumo_be64(x) (x) +#endif + +/* Big endian + * + * key: + * +--------+-----------------+ + * | 64 | ... | + * +--------+-----------------+ + * hash + * key + * + * value: + * +--------+--------+-----------------+ + * | 64 | 64 | ... | + * +--------+--------+-----------------+ + * clocktime + * meta + * data + * + * value (garbage): + * +--------+ + * | 64 | + * +--------+ + * clocktime + */ + +namespace kumo { +namespace server { + + +class Storage { +public: + Storage(const char* path, + uint32_t garbage_min_time, + uint32_t garbage_max_time, + size_t garbage_mem_limit); + + ~Storage(); + + static const size_t KEY_META_SIZE = 8; + static const size_t VALUE_CLOCKTIME_SIZE = 8; + static const size_t VALUE_META_SIZE = VALUE_CLOCKTIME_SIZE + 8; + + + static ClockTime clocktime_of(const char* raw_val); + static void clocktime_to(ClockTime clocktime, char* raw_val); + + static uint64_t meta_of(const char* raw_val); + static void meta_to(uint64_t meta, char* raw_val); + + static uint64_t hash_of(const char* raw_key); + static void hash_to(uint64_t hash, char* raw_key); + +public: + const char* get( + const char* raw_key, uint32_t raw_keylen, + uint32_t* result_raw_vallen, msgpack::zone* z); + + void set( + const char* raw_key, uint32_t raw_keylen, + const char* raw_val, uint32_t raw_vallen); + + bool update( + const char* raw_key, uint32_t raw_keylen, + const char* raw_val, uint32_t raw_vallen); + + bool remove( + const char* raw_key, uint32_t raw_keylen, + ClockTime update_clocktime); + + // FIXME + //void updatev() + + uint64_t rnum(); + + void backup(const char* dstpath); + + std::string error(); + + template + void for_each(F f, ClockTime clocktime); + + struct iterator { + public: + iterator(kumo_storage_op* op, void* data); + ~iterator(); + + public: + const char* key(); + const char* val(); + size_t keylen(); + size_t vallen(); + void release_key(msgpack::zone* z); + void release_val(msgpack::zone* z); + void del(); + + private: + void* m_data; + kumo_storage_op* m_op; + }; + +private: + void* m_data; + kumo_storage_op m_op; + + mp::pthread_mutex m_garbage_mutex; + buffer_queue m_garbage; + + uint32_t m_garbage_min_time; + uint32_t m_garbage_max_time; + size_t m_garbage_mem_limit; + +private: + template + static void for_each_callback(void* obj, iterator& it); + + void for_each_impl(void* obj, void (*callback)(void* obj, iterator& it), + ClockTime clocktime); +}; + + +inline ClockTime Storage::clocktime_of(const char* raw_val) +{ + return ClockTime( kumo_be64(*(uint64_t*)raw_val) ); +} + +inline void Storage::clocktime_to(ClockTime clocktime, char* raw_val) +{ + *(uint64_t*)raw_val = kumo_be64(clocktime.get()); +} + +inline uint64_t Storage::meta_of(const char* raw_val) +{ + return kumo_be64(*(uint64_t*)(raw_val+8)); +} + +inline void Storage::meta_to(uint64_t meta, char* raw_val) +{ + *((uint64_t*)(raw_val+8)) = kumo_be64(meta); +} + +inline uint64_t Storage::hash_of(const char* raw_key) +{ + return kumo_be64(*(uint64_t*)raw_key); +} + +inline void Storage::hash_to(uint64_t hash, char* raw_key) +{ + *(uint64_t*)raw_key = kumo_be64(hash); +} + + +inline const char* Storage::get( + const char* raw_key, uint32_t raw_keylen, + uint32_t* result_raw_vallen, msgpack::zone* z) +{ + const char* raw_val = m_op.get(m_data, + raw_key, raw_keylen, + result_raw_vallen, + z); + if(raw_val && *result_raw_vallen < VALUE_META_SIZE) { + return NULL; + } + return raw_val; +} + + +template +inline void Storage::for_each(F f, ClockTime clocktime) +{ + for_each_impl( + reinterpret_cast(&f), + &Storage::for_each_callback, + clocktime); +} + +template +void Storage::for_each_callback(void* obj, iterator& it) +{ + (*reinterpret_cast(obj))(it); +} + + +inline Storage::iterator::iterator(kumo_storage_op* op, void* data) : + m_data(data), m_op(op) { } + +inline Storage::iterator::~iterator() { } + +inline const char* Storage::iterator::key() +{ + return m_op->iterator_key(m_data); +} + +inline const char* Storage::iterator::val() +{ + return m_op->iterator_val(m_data); +} + +inline size_t Storage::iterator::keylen() +{ + return m_op->iterator_keylen(m_data); +} + +inline size_t Storage::iterator::vallen() +{ + return m_op->iterator_vallen(m_data); +} + +inline void Storage::iterator::release_key(msgpack::zone* z) +{ + if(!m_op->iterator_release_key(m_data, z)) { + throw std::bad_alloc(); + } +} + +inline void Storage::iterator::release_val(msgpack::zone* z) +{ + if(!m_op->iterator_release_val(m_data, z)) { + throw std::bad_alloc(); + } +} + +inline void Storage::iterator::del() +{ + m_op->iterator_del_force(m_data); +} + + +} // namespace server +} // namespace kumo + +#endif /* server/storage.h */ + diff --git a/src/logic/server/storage/interface.h b/src/logic/server/storage/interface.h new file mode 100644 index 0000000..0bc2e2c --- /dev/null +++ b/src/logic/server/storage/interface.h @@ -0,0 +1,99 @@ +#ifndef KUMO_STORAGE_H__ +#define KUMO_STORAGE_H__ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef bool (*kumo_storage_casproc)(void* casdata, const char* oldval, size_t oldvallen); + +typedef struct { + + // failed: NULL + void* (*create)(void); + + void (*free)(void* data); + + // success: true; faied: false + //bool (*open)(void* data, int* argc, char** argv); + bool (*open)(void* data, const char* path); + + void (*close)(void* data); + + // found: value; not-found: NULL + const char* (*get)(void* data, + const char* key, uint32_t keylen, + uint32_t* result_vallen, + msgpack_zone* zone); + + // success: true; failed: false + bool (*set)(void* data, + const char* key, uint32_t keylen, + const char* val, uint32_t vallen); + + // deleted: true; not-deleted: false + bool (*del)(void* data, + const char* key, uint32_t keylen, + kumo_storage_casproc proc, void* casdata); + + // updated: true; not-updated: false + bool (*update)(void* data, + const char* key, uint32_t keylen, + const char* val, uint32_t vallen, + kumo_storage_casproc proc, void* casdata); + + // number of processed keys + int (*updatev)(void* data, + const char** keys, const size_t* keylens, + const char** vals, const size_t* vallens, + uint16_t num); + + // number of stored keys + uint64_t (*rnum)(void* data); + + // success: true; not-success: false + bool (*backup)(void* data, const char* dstpath); + + const char* (*error)(void* data); + + // success >= 0; failed < 0 + int (*for_each)(void* data, + void* user, + int (*func)(void* user, void* iterator_data)); + + const char* (*iterator_key)(void* iterator_data); + const char* (*iterator_val)(void* iterator_data); + + size_t (*iterator_keylen)(void* iterator_data); + size_t (*iterator_vallen)(void* iterator_data); + + // success: true; failed: false + bool (*iterator_release_key)(void* iterator_data, msgpack_zone* zone); + bool (*iterator_release_val)(void* iterator_data, msgpack_zone* zone); + + // deleted: true; not-deleted: false + bool (*iterator_del)(void* iterator_data, + kumo_storage_casproc proc, void* casdata); + + // deleted: true; not-deleted: false + bool (*iterator_del_force)(void* iterator_data); + +} kumo_storage_op; + + +kumo_storage_op kumo_storage_init(void); + + +#ifdef __cplusplus +} +#endif + +#endif /* kumo/storage.h */ + diff --git a/src/logic/server/storage/luxio.cc b/src/logic/server/storage/luxio.cc new file mode 100644 index 0000000..e384436 --- /dev/null +++ b/src/logic/server/storage/luxio.cc @@ -0,0 +1,393 @@ +#include "interface.h" // FIXME +#include +#include +#include + +static __thread std::string *s_error = NULL; + +static void set_error(const std::string& msg) +try { + if(!s_error) { + s_error = new std::string; + } + *s_error = msg; + +} catch (...) { } + +static void* kumo_luxio_create(void) +try { + Lux::IO::Btree* db = new Lux::IO::Btree(Lux::IO::CLUSTER); + + // FIXME LOCK_NONE + rwlock + db->set_lock_type(Lux::IO::LOCK_THREAD); + return reinterpret_cast(db); + +} catch (std::exception& e) { + set_error(e.what()); + return NULL; +} + +static void kumo_luxio_free(void* data) +try { + Lux::IO::Btree* db = reinterpret_cast(data); + + delete db; + +} catch (std::exception& e) { + set_error(e.what()); +} + + +static bool kumo_luxio_open(void* data, const char* path) +try { + Lux::IO::Btree* db = reinterpret_cast(data); + + return db->open(path, Lux::DB_CREAT); + +} catch (std::exception& e) { + set_error(e.what()); + return false; +} + + +static void kumo_luxio_close(void* data) +try { + Lux::IO::Btree* db = reinterpret_cast(data); + + db->close(); + +} catch (std::exception& e) { + set_error(e.what()); +} + + + +static void kumo_luxio_clean_data(void* val) +{ + // FIXME + reinterpret_cast(NULL)->clean_data( + reinterpret_cast(val)); +} + +static const char* kumo_luxio_get(void* data, + const char* key, uint32_t keylen, + uint32_t* result_vallen, + msgpack_zone* zone) +try { + Lux::IO::Btree* db = reinterpret_cast(data); + + Lux::IO::data_t k = {key, keylen}; + Lux::IO::data_t* v = NULL; + if(!db->get(&k, &v, Lux::IO::SYSTEM) || v == NULL) { + return NULL; + } + + if(!msgpack_zone_push_finalizer( + zone, kumo_luxio_clean_data, v)) { + db->clean_data(v); + return NULL; + } + + *result_vallen = v->size; + return (const char*)v->data; + +} catch (std::exception& e) { + set_error(e.what()); + return NULL; +} + + +static bool kumo_luxio_set(void* data, + const char* key, uint32_t keylen, + const char* val, uint32_t vallen) +try { + Lux::IO::Btree* db = reinterpret_cast(data); + + return db->put(key, keylen, val, vallen); + +} catch (std::exception& e) { + set_error(e.what()); + return false; +} + + +static bool kumo_luxio_update(void* data, + const char* key, uint32_t keylen, + const char* val, uint32_t vallen) +try { + // FIXME + return kumo_luxio_set(data, key, keylen, val, vallen); + +} catch (std::exception& e) { + set_error(e.what()); + return false; +} + +static bool kumo_luxio_del(void* data, + const char* key, uint32_t keylen, + uint64_t clocktime) +try { + Lux::IO::Btree* db = reinterpret_cast(data); + + Lux::IO::data_t k = {key, keylen}; + // FIXME + return db->del(&k); + +} catch (std::exception& e) { + set_error(e.what()); + return false; +} + + +static uint64_t kumo_luxio_rnum(void* data) +try { + Lux::IO::Btree* db = reinterpret_cast(data); + + // FIXME + return 0; + +} catch (std::exception& e) { + set_error(e.what()); + return 0; +} + + +static bool kumo_luxio_backup(void* data, const char* dstpath) +try { + Lux::IO::Btree* db = reinterpret_cast(data); + + // FIXME + return false; + +} catch (std::exception& e) { + set_error(e.what()); + return false; +} + + +static const char* kumo_luxio_error(void* data) +{ + if(s_error) { + return s_error->c_str(); + } else { + return ""; + } +} + + +struct kumo_luxio_iterator { + kumo_luxio_iterator(Lux::IO::Btree* pdb) : + key(NULL), val(NULL), db(pdb) { } + + ~kumo_luxio_iterator() + { + if(key) { db->clean_data(key); } + if(val) { db->clean_data(val); } + } + + Lux::IO::data_t* key; + Lux::IO::data_t* val; + Lux::IO::Btree* db; + +private: + kumo_luxio_iterator(); + kumo_luxio_iterator(const kumo_luxio_iterator&); +}; + +static int kumo_luxio_for_each(void* data, + void* user, int (*func)(void* user, void* iterator_data)) +try { + Lux::IO::Btree* db = reinterpret_cast(data); + + kumo_luxio_iterator it(db); + + std::auto_ptr cur( db->cursor_init() ); + + if(!db->first(cur.get())) { + return 0; + } + + do { + if(!db->cursor_get(cur.get(), &it.key, &it.val, Lux::IO::SYSTEM)) { + continue; + } + + if(it.val->size < 16 || it.key->size < 8) { + // FIXME delete it? + continue; + } + + int ret = (*func)(user, reinterpret_cast(&it)); + if(ret < 0) { + return ret; + } + + } while(db->next(cur.get())); + + return 0; + +} catch (std::exception& e) { + set_error(e.what()); + return -1; +} + + +static const char* kumo_luxio_iterator_key(void* iterator_data) +try { + kumo_luxio_iterator* it = + reinterpret_cast(iterator_data); + + return (const char*)it->key->data; + +} catch (std::exception& e) { + set_error(e.what()); + return NULL; +} + + +static const char* kumo_luxio_iterator_val(void* iterator_data) +try { + kumo_luxio_iterator* it = + reinterpret_cast(iterator_data); + + return (const char*)it->val->data; + +} catch (std::exception& e) { + set_error(e.what()); + return NULL; +} + + +static size_t kumo_luxio_iterator_keylen(void* iterator_data) +try { + kumo_luxio_iterator* it = + reinterpret_cast(iterator_data); + + return it->key->size; + +} catch (std::exception& e) { + set_error(e.what()); + return NULL; +} + + +static size_t kumo_luxio_iterator_vallen(void* iterator_data) +try { + kumo_luxio_iterator* it = + reinterpret_cast(iterator_data); + + return it->val->size; + +} catch (std::exception& e) { + set_error(e.what()); + return NULL; +} + + +static bool kumo_luxio_iterator_release_key(void* iterator_data, msgpack_zone* zone) +try { + kumo_luxio_iterator* it = + reinterpret_cast(iterator_data); + + if(!msgpack_zone_push_finalizer( + zone, kumo_luxio_clean_data, it->key)) { + return false; + } + + it->key = NULL; + return true; + +} catch (std::exception& e) { + set_error(e.what()); + return NULL; +} + + +static bool kumo_luxio_iterator_release_val(void* iterator_data, msgpack_zone* zone) +try { + kumo_luxio_iterator* it = + reinterpret_cast(iterator_data); + + if(!msgpack_zone_push_finalizer( + zone, kumo_luxio_clean_data, it->val)) { + return false; + } + + it->val = NULL; + return true; + +} catch (std::exception& e) { + set_error(e.what()); + return NULL; +} + + +static bool kumo_luxio_iterator_del(void* iterator_data) +try { + kumo_luxio_iterator* it = + reinterpret_cast(iterator_data); + + Lux::IO::data_t k = {it->key->data, it->key->size}; + return it->db->del(&k); + +} catch (std::exception& e) { + set_error(e.what()); + return false; +} + +#if 0 +static bool kumo_luxio_iterator_del_if_older(void* iterator_data, uint64_t if_older) +try { + kumo_luxio_iterator* it = + reinterpret_cast(iterator_data); + + const char* val = (const char*)it->val->data; + size_t vallen = it->val->size; + + if(vallen < 8 || kumo_clocktime_less( + kumo_storage_clocktime_of(val), + if_older)) { + + Lux::IO::data_t k = {it->key->data, it->key->size}; + return it->db->del(&k); + } + + return false; + +} catch (std::exception& e) { + set_error(e.what()); + return false; +} +#endif + + +static kumo_storage_op kumo_luxio_op = +{ + kumo_luxio_create, + kumo_luxio_free, + kumo_luxio_open, + kumo_luxio_close, + kumo_luxio_get, + kumo_luxio_set, + kumo_luxio_update, + NULL, + kumo_luxio_del, + kumo_luxio_rnum, + kumo_luxio_backup, + kumo_luxio_error, + kumo_luxio_for_each, + kumo_luxio_iterator_key, + kumo_luxio_iterator_val, + kumo_luxio_iterator_keylen, + kumo_luxio_iterator_vallen, + kumo_luxio_iterator_release_key, + kumo_luxio_iterator_release_val, + kumo_luxio_iterator_del, +}; + +extern "C" +kumo_storage_op kumo_storage_init(void) +{ + return kumo_luxio_op; +} + diff --git a/src/logic/server/storage/tchdb.cc b/src/logic/server/storage/tchdb.cc new file mode 100644 index 0000000..1597703 --- /dev/null +++ b/src/logic/server/storage/tchdb.cc @@ -0,0 +1,349 @@ +#include "interface.h" // FIXME +#include +#include + + +struct kumo_tchdb { + kumo_tchdb() + { + db = tchdbnew(); + if(!db) { + throw std::bad_alloc(); + } + //tchdbsetcache(db, 32000); FIXME + //tchdbsetxmsiz(db, 1024*1024); FIXME + } + + ~kumo_tchdb() + { + tchdbdel(db); + } + + TCHDB* db; + mp::pthread_mutex iterator_mutex; + +private: + kumo_tchdb(const kumo_tchdb&); +}; + + +static void* kumo_tchdb_create(void) +try { + kumo_tchdb* ctx = new kumo_tchdb(); + return reinterpret_cast(ctx); + +} catch (...) { + return NULL; +} + +static void kumo_tchdb_free(void* data) +{ + kumo_tchdb* ctx = reinterpret_cast(data); + delete ctx; +} + +static bool kumo_tchdb_open(void* data, const char* path) +{ + kumo_tchdb* ctx = reinterpret_cast(data); + + if(!tchdbsetmutex(ctx->db)) { + return false; + } + + if(!tchdbopen(ctx->db, path, HDBOWRITER|HDBOCREAT)) { + return false; + } + + return true; +} + +static void kumo_tchdb_close(void* data) +{ + kumo_tchdb* ctx = reinterpret_cast(data); + tchdbclose(ctx->db); +} + + +static const char* kumo_tchdb_get(void* data, + const char* key, uint32_t keylen, + uint32_t* result_vallen, + msgpack_zone* zone) +{ + kumo_tchdb* ctx = reinterpret_cast(data); + + int len; + char* val = (char*)tchdbget(ctx->db, key, keylen, &len); + if(!val) { + return NULL; + } + *result_vallen = len; + + if(!msgpack_zone_push_finalizer(zone, free, val)) { + free(val); + return NULL; + } + + return val; +} + +static bool kumo_tchdb_set(void* data, + const char* key, uint32_t keylen, + const char* val, uint32_t vallen) +{ + kumo_tchdb* ctx = reinterpret_cast(data); + return tchdbput(ctx->db, key, keylen, val, vallen); +} + + +static bool kumo_tchdb_del(void* data, + const char* key, uint32_t keylen, + kumo_storage_casproc proc, void* casdata) +{ + // FIXME tchdboutproc + kumo_tchdb* ctx = reinterpret_cast(data); + return tchdbout(ctx->db, key, keylen); +} + + +typedef struct { + const char* val; + uint32_t vallen; + kumo_storage_casproc proc; + void* casdata; +} kumo_tchdb_update_ctx; + +static void* kumo_tchdb_update_proc(const void* vbuf, int vsiz, int *sp, void* op) +{ + kumo_tchdb_update_ctx* upctx = (kumo_tchdb_update_ctx*)op; + + if( upctx->proc(upctx->casdata, (const char*)vbuf, vsiz) ) { + // update + + void* mem = ::malloc(upctx->vallen); + if(!mem) { + return NULL; // FIXME + } + + *sp = upctx->vallen; + memcpy(mem, upctx->val, upctx->vallen); + return mem; + + } else { + // don't update + return NULL; + } +} + +static bool kumo_tchdb_update(void* data, + const char* key, uint32_t keylen, + const char* val, uint32_t vallen, + kumo_storage_casproc proc, void* casdata) +{ + kumo_tchdb* ctx = reinterpret_cast(data); + + kumo_tchdb_update_ctx upctx = { val, vallen, proc, casdata }; + + return tchdbputproc(ctx->db, + key, keylen, + val, vallen, + kumo_tchdb_update_proc, &upctx); +} + + +static uint64_t kumo_tchdb_rnum(void* data) +{ + kumo_tchdb* ctx = reinterpret_cast(data); + return tchdbrnum(ctx->db); +} + +static bool kumo_tchdb_backup(void* data, const char* dstpath) +{ + kumo_tchdb* ctx = reinterpret_cast(data); + return tchdbcopy(ctx->db, dstpath); +} + +static const char* kumo_tchdb_error(void* data) +{ + kumo_tchdb* ctx = reinterpret_cast(data); + return tchdberrmsg(tchdbecode(ctx->db)); +} + + +struct kumo_tchdb_iterator { + kumo_tchdb_iterator(kumo_tchdb* pctx) : + ctx(pctx) + { + key = tcxstrnew(); + if(!key) { + throw std::bad_alloc(); + } + val = tcxstrnew(); + if(!val) { + tcxstrdel(key); + throw std::bad_alloc(); + } + } + + ~kumo_tchdb_iterator() + { + if(key != NULL) { tcxstrdel(key); } + if(val != NULL) { tcxstrdel(val); } + } + + void reset() + { + if(!key) { + key = tcxstrnew(); + if(!key) { + throw std::bad_alloc(); + } + } + + if(!val) { + val = tcxstrnew(); + if(!val) { + throw std::bad_alloc(); + } + } + } + + TCXSTR* key; + TCXSTR* val; + kumo_tchdb* ctx; + +private: + kumo_tchdb_iterator(); + kumo_tchdb_iterator(const kumo_tchdb_iterator&); +}; + +static int kumo_tchdb_for_each(void* data, + void* user, int (*func)(void* user, void* iterator_data)) +try { + kumo_tchdb* ctx = reinterpret_cast(data); + + // only one thread can use iterator + mp::pthread_scoped_lock itlk(ctx->iterator_mutex); + + if(!tchdbiterinit(ctx->db)) { + return -1; + } + + kumo_tchdb_iterator it(ctx); + + while( tchdbiternext3(ctx->db, it.key, it.val) ) { + int ret = (*func)(user, (void*)&it); + if(ret < 0) { + return ret; + } + + it.reset(); + } + + return 0; + +} catch (...) { + return -1; +} + +static const char* kumo_tchdb_iterator_key(void* iterator_data) +{ + kumo_tchdb_iterator* it = reinterpret_cast(iterator_data); + return TCXSTRPTR(it->key); +} + +static const char* kumo_tchdb_iterator_val(void* iterator_data) +{ + kumo_tchdb_iterator* it = reinterpret_cast(iterator_data); + return TCXSTRPTR(it->val); +} + +static size_t kumo_tchdb_iterator_keylen(void* iterator_data) +{ + kumo_tchdb_iterator* it = reinterpret_cast(iterator_data); + return TCXSTRSIZE(it->key); +} + +static size_t kumo_tchdb_iterator_vallen(void* iterator_data) +{ + kumo_tchdb_iterator* it = reinterpret_cast(iterator_data); + return TCXSTRSIZE(it->val); +} + + +static bool kumo_tchdb_iterator_release_key(void* iterator_data, msgpack_zone* zone) +{ + kumo_tchdb_iterator* it = reinterpret_cast(iterator_data); + + if(!msgpack_zone_push_finalizer(zone, (void (*)(void*))tcxstrdel, it->key)) { + return false; + } + + it->key = NULL; + return true; +} + +static bool kumo_tchdb_iterator_release_val(void* iterator_data, msgpack_zone* zone) +{ + kumo_tchdb_iterator* it = reinterpret_cast(iterator_data); + + if(!msgpack_zone_push_finalizer(zone, (void (*)(void*))tcxstrdel, it->val)) { + return false; + } + + it->val = NULL; + return true; +} + +static bool kumo_tchdb_iterator_del(void* iterator_data, + kumo_storage_casproc proc, void* casdata) +{ + // FIXME tchdboutproc + kumo_tchdb_iterator* it = reinterpret_cast(iterator_data); + + const char* key = TCXSTRPTR(it->key); + size_t keylen = TCXSTRSIZE(it->key); + + return tchdbout(it->ctx->db, key, keylen); +} + +static bool kumo_tchdb_iterator_del_force(void* iterator_data) +{ + kumo_tchdb_iterator* it = reinterpret_cast(iterator_data); + + const char* key = TCXSTRPTR(it->key); + size_t keylen = TCXSTRSIZE(it->key); + + return tchdbout(it->ctx->db, key, keylen); +} + + +static kumo_storage_op kumo_tchdb_op = +{ + kumo_tchdb_create, + kumo_tchdb_free, + kumo_tchdb_open, + kumo_tchdb_close, + kumo_tchdb_get, + kumo_tchdb_set, + kumo_tchdb_del, + kumo_tchdb_update, + NULL, + kumo_tchdb_rnum, + kumo_tchdb_backup, + kumo_tchdb_error, + kumo_tchdb_for_each, + kumo_tchdb_iterator_key, + kumo_tchdb_iterator_val, + kumo_tchdb_iterator_keylen, + kumo_tchdb_iterator_vallen, + kumo_tchdb_iterator_release_key, + kumo_tchdb_iterator_release_val, + kumo_tchdb_iterator_del, + kumo_tchdb_iterator_del_force, +}; + +kumo_storage_op kumo_storage_init(void) +{ + return kumo_tchdb_op; +} + diff --git a/src/logic/wavy_server.cc b/src/logic/wavy_server.cc new file mode 100644 index 0000000..51ec453 --- /dev/null +++ b/src/logic/wavy_server.cc @@ -0,0 +1,94 @@ +#include "logic/wavy_server.h" +#include + +namespace kumo { + + +wavy_server::wavy_server() : + m_core_threads(0), m_output_threads(0) +{ } + +wavy_server::~wavy_server() { } + + +namespace { + // avoid compile error + typedef void (*sigend_callback)(void*, int); + static sigend_callback get_signal_handler() + { + sigend_callback f = &mp::object_callback:: + mem_fun; + return f; + } +} // noname namespace + +void wavy_server::init_wavy(unsigned short rthreads, unsigned short wthreads) +{ + // ignore SIGPIPE + if( signal(SIGPIPE, SIG_IGN) == SIG_ERR ) { + perror("signal"); + throw mp::system_error(errno, "signal"); + } + + // initialize signal handler before starting threads + sigset_t ss; + sigemptyset(&ss); + sigaddset(&ss, SIGHUP); + sigaddset(&ss, SIGINT); + sigaddset(&ss, SIGTERM); + + s_pth.reset( new mp::pthread_signal(ss, + get_signal_handler(), + reinterpret_cast(this)) ); + + // initialize wavy + m_core_threads = rthreads; + m_output_threads = wthreads; + wavy::initialize(0,0); +} + + +void wavy_server::run() +{ + wavy::add_output_thread(m_output_threads); + wavy::add_core_thread(m_core_threads); +} + +void wavy_server::join() +{ + wavy::join(); +} + + +void wavy_server::signal_handler(int signo) +{ + if(signo == SIGINT || signo == SIGTERM) { + signal_end(); + } else { + signal_hup(); + } +} + + +// dummy function +static void finished() { } + +void wavy_server::signal_end() +{ + wavy::end(); + wavy::submit(finished); // submit dummy function + end_preprocess(); + LOG_INFO("end"); +} + +void wavy_server::signal_hup() +{ + LOG_INFO("SIGHUP"); + if(logpacker::is_active()) { + logpacker::reopen(); + } +} + + +} // namespace kumo + diff --git a/src/logic/wavy_server.h b/src/logic/wavy_server.h new file mode 100644 index 0000000..9aee33f --- /dev/null +++ b/src/logic/wavy_server.h @@ -0,0 +1,43 @@ +#ifndef LOGIC_WAVY_SERVER_H__ +#define LOGIC_WAVY_SERVER_H__ + +#include "rpc/wavy.h" +#include "log/mlogger.h" +#include "log/logpacker.h" +#include + +namespace kumo { + + +using rpc::wavy; + + +class wavy_server { +public: + wavy_server(); + ~wavy_server(); + +protected: + void init_wavy(unsigned short rthreads, unsigned short wthreads); + + virtual void end_preprocess() { } + +public: + virtual void run(); + virtual void join(); + + void signal_handler(int signo); + void signal_end(); + void signal_hup(); + +private: + unsigned short m_core_threads; + unsigned short m_output_threads; + std::auto_ptr s_pth; +}; + + +} // namespace kumo + +#endif /* logic/wavy_server.h */ + diff --git a/src/mp/Makefile.am b/src/mp/Makefile.am new file mode 100644 index 0000000..951d1a2 --- /dev/null +++ b/src/mp/Makefile.am @@ -0,0 +1,82 @@ + +NEED_PREPROCESS = \ + object_callback.h \ + wavy/core.h \ + wavy/singleton.h + +MOSTLYCLEANFILES = $(NEED_PREPROCESS) + +EXTRA_DIST = $(NEED_PREPROCESS) + +noinst_HEADERS = \ + exception.h \ + functional.h \ + memory.h \ + object_callback.h \ + pp.h \ + pthread.h \ + pthread_impl.h \ + shared_buffer.h \ + shared_buffer_impl.h \ + source.h \ + source_impl.h \ + stream_buffer.h \ + stream_buffer_impl.h \ + utility.h \ + wavy/core.h \ + wavy/output.h \ + wavy/singleton.h \ + wavy.h + +# FIXME GNU make extension +%.h: %.pre.h + $(RUBY) -e '\ + def args(n, &block) ;\ + Array.new(n) {|i| yield i+1} .join(", ") ;\ + end ;\ + src = ARGF.read ;\ + src.gsub!(/^MP_ARGS_BEGIN$$(.*?)^MP_ARGS_END$$/m) {|code| ;\ + result = [] ;\ + 1.upto(15) {|n| ;\ + line = code.split("\n")[1..-2].join("\n") ;\ + line.gsub!(/MP_ARGS_TEMPLATE/, args(n) {|i| "typename A#{i}" }) ;\ + line.gsub!(/MP_ARGS_PARAMS_PTR/, args(n) {|i| "A#{i}* a#{i}" }) ;\ + line.gsub!(/MP_ARGS_PARAMS_REF/, args(n) {|i| "A#{i}& a#{i}" }) ;\ + line.gsub!(/MP_ARGS_PARAMS/, args(n) {|i| "A#{i} a#{i}" }) ;\ + line.gsub!(/MP_ARGS_FUNC/, args(n) {|i| "a#{i}" }) ;\ + line.gsub!(/MP_ARGS_TYPES_PTR/, args(n) {|i| "A#{i}*" }) ;\ + line.gsub!(/MP_ARGS_TYPES_REF/, args(n) {|i| "A#{i}&" }) ;\ + line.gsub!(/MP_ARGS_TYPES/, args(n) {|i| "A#{i}" }) ;\ + line.gsub!(/MP_ARGS_ITERATOR_BEGIN$$(.*?)^MP_ARGS_ITERATOR_END$$/m) {|sub| ;\ + subresult = [] ;\ + subline = sub.split("\n")[1..-2].join("\n") ;\ + 1.upto(n) {|it| ;\ + sublineit = subline.dup ;\ + sublineit.gsub!(/MP_ARGS_ITERATOR_PARAM/, "a#{it}") ;\ + sublineit.gsub!(/MP_ARGS_ITERATOR_TYPE_PTR/, "A#{it}*") ;\ + sublineit.gsub!(/MP_ARGS_ITERATOR_TYPE_REF/, "A#{it}&") ;\ + sublineit.gsub!(/MP_ARGS_ITERATOR_TYPE/, "A#{it}") ;\ + sublineit.gsub!(/\[MP_ARGS_ITERATOR\]/, "#{it}" ) ;\ + if it == 1 ;\ + sublineit.gsub!(/MP_ARGS_ITERATOR_FIRST_COLON/, " " ) ;\ + else ;\ + sublineit.gsub!(/MP_ARGS_ITERATOR_FIRST_COLON/, "," ) ;\ + end ;\ + if it == n ;\ + sublineit.gsub!(/MP_ARGS_ITERATOR_LAST_COLON/, " " ) ;\ + else ;\ + sublineit.gsub!(/MP_ARGS_ITERATOR_LAST_COLON/, "," ) ;\ + end ;\ + subresult << sublineit ;\ + } ;\ + subresult.join("\n") ;\ + } ;\ + result << line ;\ + } ;\ + result.join("\n") ;\ + } ;\ + puts src' \ + $< > $@.tmp + mv $@.tmp $@ + + diff --git a/src/mp/exception.h b/src/mp/exception.h new file mode 100644 index 0000000..b027f63 --- /dev/null +++ b/src/mp/exception.h @@ -0,0 +1,45 @@ +// +// mpio exception +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_EXCEPTION_H__ +#define MP_EXCEPTION_H__ + +#include +#include +#include + +namespace mp { + + +struct system_error : std::runtime_error { + system_error(int errno_, const std::string& msg) : + std::runtime_error(msg + ": " + strerror(errno_)) {} +}; + + +struct event_error : system_error { + event_error(int errno_, const std::string& msg) : + system_error(errno, msg) {} +}; + + +} // namespace mp + + +#endif /* mp/exception.h */ + diff --git a/src/mp/functional.h b/src/mp/functional.h new file mode 100644 index 0000000..7148657 --- /dev/null +++ b/src/mp/functional.h @@ -0,0 +1,64 @@ +// +// mp::functional +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_FUNCTIONAL_H__ +#define MP_FUNCTIONAL_H__ + +#ifdef MP_FUNCTIONAL_BOOST +#include +namespace mp { + using std::tr1::function; + using std::tr1::bind; + namespace placeholders { + using namespace std::tr1::placeholders; + } +} +#else +#ifdef MP_FUNCTIONAL_BOOST_ORG +#include +#include +namespace mp { + using boost::function; + using boost::bind; + namespace placeholders { } +} +#else +#ifndef MP_FUNCTIONAL_STANDARD +#include +namespace mp { + using std::tr1::function; + using std::tr1::bind; + namespace placeholders { + using namespace std::tr1::placeholders; + } +} +#else +#include +namespace mp { + using std::function; + using std::bind; + namespace placeholders { + using namespace std::placeholders; + } +} +#endif +#endif +#endif + +#endif /* mp/functional.h */ + diff --git a/src/mp/memory.h b/src/mp/memory.h new file mode 100644 index 0000000..a2a2929 --- /dev/null +++ b/src/mp/memory.h @@ -0,0 +1,67 @@ +// +// mp::memory +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_MEMORY_H__ +#define MP_MEMORY_H__ + +#ifdef MP_MEMORY_BOOST +#include +namespace mp { + using std::tr1::shared_ptr; + using std::tr1::wak_ptr; + //using std::tr2::scoped_ptr; + using std::tr1::static_pointer_cast; + using std::tr1::dynamic_pointer_cast; +} +#else +#ifdef MP_MEMORY_BOOST_ORG +#include +#include +//#include +namespace mp { + using boost::shared_ptr; + using boost::weak_ptr; + //using boost::scoped_ptr; + using boost::static_pointer_cast; + using boost::dynamic_pointer_cast; +} +#else +#ifndef MP_MEMORY_STANDARD +#include +namespace mp { + using std::tr1::shared_ptr; + using std::tr1::weak_ptr; + //using std::tr2::scoped_ptr; + using std::tr1::static_pointer_cast; + using std::tr1::dynamic_pointer_cast; +} +#else +#include +namespace mp { + using std::shared_ptr; + using std::weak_ptr; + //using std::scoped_ptr; + using std::static_pointer_cast; + using std::dynamic_pointer_cast; +} +#endif +#endif +#endif + +#endif /* mp/memory.h */ + diff --git a/src/mp/object_callback.pre.h b/src/mp/object_callback.pre.h new file mode 100644 index 0000000..a3f1260 --- /dev/null +++ b/src/mp/object_callback.pre.h @@ -0,0 +1,94 @@ +// +// mp::object_callback +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_OBJECT_CALLBACK_H__ +#define MP_OBJECT_CALLBACK_H__ + +#include "mp/memory.h" + +namespace mp { + + +template +static void object_destructor(void* obj) +{ + reinterpret_cast(obj)->~T(); +} + + +template +static void object_delete(void* obj) +{ + delete reinterpret_cast(obj); +} + + +template +struct object_callback; + +template +struct object_callback +{ + template + static R mem_fun(void* obj) + { + return (reinterpret_cast(obj)->*MemFun)(); + } + + template + static R const_mem_fun(const void* obj) + { + return (reinterpret_cast(obj)->*MemFun)(); + } + + template + static R shared_fun(shared_ptr obj) + { + return (obj.get()->*MemFun)(); + } +}; + +MP_ARGS_BEGIN +template +struct object_callback +{ + template + static R mem_fun(void* obj, MP_ARGS_PARAMS) + { + return (reinterpret_cast(obj)->*MemFun)(MP_ARGS_FUNC); + } + + template + static R const_mem_fun(const void* obj, MP_ARGS_PARAMS) + { + return (reinterpret_cast(obj)->*MemFun)(MP_ARGS_FUNC); + } + + template + static R shared_fun(shared_ptr obj, MP_ARGS_PARAMS) + { + return (obj.get()->*MemFun)(MP_ARGS_FUNC); + } +}; + +MP_ARGS_END + +} // namespace mp + +#endif /* mp/object_callback.h */ + diff --git a/src/mp/pp.h b/src/mp/pp.h new file mode 100644 index 0000000..6396fac --- /dev/null +++ b/src/mp/pp.h @@ -0,0 +1,14 @@ +#ifndef MP_PP_H__ +#define MP_PP_H__ + +#define MP_PP_STR(s) #s +#define MP_PP_XSTR(s) MP_PP_STR(s) +#define MP_PP_CONCAT(a,b) a##b +#define MP_PP_XCONCAT(a,b) MP_PP_CONCAT(a,b) +#define MP_PP_HEADER(dir, prefix, file, suffix) \ + MP_PP_XSTR( \ + MP_PP_XCONCAT(dir/prefix ## file, suffix).h \ + ) + +#endif /* mp/pp.h */ + diff --git a/src/mp/pthread.h b/src/mp/pthread.h new file mode 100644 index 0000000..1f68788 --- /dev/null +++ b/src/mp/pthread.h @@ -0,0 +1,196 @@ +// +// mpio pthread +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_PTHREAD_H__ +#define MP_PTHREAD_H__ + +#include "mp/exception.h" +#include +#include + +namespace mp { + + +struct pthread_error : system_error { + pthread_error(int errno_, const std::string& msg) : + system_error(errno_, msg) {} +}; + + +struct pthread_thread { + template + pthread_thread(IMPL* pimpl); + + virtual ~pthread_thread(); + + void run(); + void detach(); + void* join(); + void cancel(); + + bool operator== (const pthread_thread& other) const; + bool operator!= (const pthread_thread& other) const; + + static void exit(void* retval = NULL); + +private: + pthread_t m_thread; + void* (*m_thread_func)(void*); + void* m_pimpl; + + template + static void* trampoline(void* obj); +}; + + +template +struct pthread_thread_impl : public pthread_thread { + pthread_thread_impl(); + virtual ~pthread_thread_impl(); +}; + + +class pthread_mutex { +public: + pthread_mutex(const pthread_mutexattr_t *attr = NULL); + pthread_mutex(int kind); + ~pthread_mutex(); +public: + void lock(); + bool trylock(); + void unlock(); +public: + pthread_mutex_t* get() { return &m_mutex; } +private: + pthread_mutex_t m_mutex; +private: + pthread_mutex(const pthread_mutex&); +}; + + +class pthread_rwlock { +public: + pthread_rwlock(const pthread_rwlockattr_t *attr = NULL); + //pthread_rwlock(int kind); + ~pthread_rwlock(); +public: + void rdlock(); + bool tryrdlock(); + void wrlock(); + bool trywrlock(); + void unlock(); +public: + pthread_rwlock_t* get() { return &m_mutex; } +private: + pthread_rwlock_t m_mutex; +private: + pthread_rwlock(const pthread_rwlock&); +}; + + +class pthread_scoped_lock { +public: + pthread_scoped_lock(); + pthread_scoped_lock(pthread_mutex& mutex); + ~pthread_scoped_lock(); +public: + void unlock(); + void relock(pthread_mutex& mutex); +private: + pthread_mutex* m_mutex; +private: + pthread_scoped_lock(const pthread_scoped_lock&); +}; + + +class pthread_scoped_rdlock { +public: + pthread_scoped_rdlock(); + pthread_scoped_rdlock(pthread_rwlock& mutex); + ~pthread_scoped_rdlock(); +public: + void unlock(); + void relock(pthread_rwlock& mutex); +private: + pthread_rwlock* m_mutex; +private: + pthread_scoped_rdlock(const pthread_scoped_rdlock&); +}; + +class pthread_scoped_wrlock { +public: + pthread_scoped_wrlock(); + pthread_scoped_wrlock(pthread_rwlock& mutex); + ~pthread_scoped_wrlock(); +public: + void unlock(); + void relock(pthread_rwlock& mutex); +private: + pthread_rwlock* m_mutex; +private: + pthread_scoped_wrlock(const pthread_scoped_wrlock&); +}; + + +class pthread_cond { +public: + pthread_cond(const pthread_condattr_t *attr = NULL); + ~pthread_cond(); +public: + void signal(); + void broadcast(); + void wait(pthread_mutex& mutex); + bool timedwait(pthread_mutex& mutex, const struct timespec *abstime); +public: + pthread_cond_t* get() { return &m_cond; } +private: + pthread_cond_t m_cond; +private: + pthread_cond(const pthread_cond&); +}; + + +class pthread_signal { +public: + pthread_signal(const sigset_t& ss, void (*handler)(void*, int), void* data); + ~pthread_signal(); +public: + void operator() (); +private: + struct scoped_sigprocmask { + scoped_sigprocmask(const sigset_t& ss); + ~scoped_sigprocmask(); + const sigset_t* get() const { return &m_ss; } + private: + sigset_t m_ss; + scoped_sigprocmask(); + scoped_sigprocmask(const scoped_sigprocmask&); + }; + scoped_sigprocmask m_sigmask; + void (*m_handler)(void*, int); + void* m_data; + mp::pthread_thread m_thread; +}; + + +} // namespace mp + +#include "mp/pthread_impl.h" + +#endif /* mp/pthread.h */ + diff --git a/src/mp/pthread_impl.h b/src/mp/pthread_impl.h new file mode 100644 index 0000000..05027ba --- /dev/null +++ b/src/mp/pthread_impl.h @@ -0,0 +1,393 @@ +// +// mpio pthread +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_PTHREAD_IMPL_H__ +#define MP_PTHREAD_IMPL_H__ + +#include +#include +#ifndef MP_NO_CXX_ABI_H +#include +#endif + +namespace mp { + + +template +pthread_thread::pthread_thread(IMPL* pimpl) : + m_thread_func(&pthread_thread::trampoline), + m_pimpl(reinterpret_cast(pimpl)) +{ } + +inline pthread_thread::~pthread_thread() +{ } + +inline void pthread_thread::run() +{ + int err = pthread_create(&m_thread, NULL, + m_thread_func, m_pimpl); + if(err) { throw pthread_error(err, "failed to create thread"); } +} + +inline void pthread_thread::detach() +{ + int err = pthread_detach(m_thread); + if(err) { throw pthread_error(err, "failed to detach thread"); } +} + +inline void* pthread_thread::join() +{ + void* ret; + int err = pthread_join(m_thread, &ret); + if(err) { throw pthread_error(err, "failed to join thread"); } + return ret; +} + +inline void pthread_thread::cancel() +{ + pthread_cancel(m_thread); +} + +inline bool pthread_thread::operator== (const pthread_thread& other) const +{ + return pthread_equal(m_thread, other.m_thread); +} + +inline bool pthread_thread::operator!= (const pthread_thread& other) const +{ + return !(*this == other); +} + +template +void* pthread_thread::trampoline(void* obj) +try { + reinterpret_cast(obj)->operator()(); + return NULL; // FIXME + +} catch (std::exception& e) { + try { +#ifndef MP_NO_CXX_ABI_H + int status; + std::cerr + << "thread terminated with throwing an instance of '" + << abi::__cxa_demangle(typeid(e).name(), 0, 0, &status) + << "'\n" + << " what(): " << e.what() << std::endl; +#else + std::cerr + << "thread terminated with throwing an instance of '" + << typeid(e).name() + << "'\n" + << " what(): " << e.what() << std::endl; +#endif + } catch (...) {} + throw; + +} catch (...) { + try { + std::cerr << "thread terminated with throwing an unknown object" << std::endl; + } catch (...) {} + throw; +} + + +inline void pthread_thread::exit(void* retval) +{ + pthread_exit(retval); +} + + +template +pthread_thread_impl::pthread_thread_impl() : + pthread_thread(this) { } + +template +pthread_thread_impl::~pthread_thread_impl() { } + + +inline pthread_mutex::pthread_mutex(const pthread_mutexattr_t *attr) +{ + pthread_mutex_init(&m_mutex, attr); +} + +inline pthread_mutex::pthread_mutex(int kind) +{ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, kind); + pthread_mutex_init(&m_mutex, &attr); +} + +inline pthread_mutex::~pthread_mutex() +{ + pthread_mutex_destroy(&m_mutex); +} + +inline void pthread_mutex::pthread_mutex::lock() +{ + int err = pthread_mutex_lock(&m_mutex); + if(err != 0) { throw pthread_error(-err, "failed to lock pthread mutex"); } +} + +inline void pthread_mutex::pthread_mutex::unlock() +{ + int err = pthread_mutex_unlock(&m_mutex); + if(err != 0) { throw pthread_error(-err, "failed to unlock pthread mutex"); } +} + +inline bool pthread_mutex::trylock() +{ + int err = pthread_mutex_trylock(&m_mutex); + if(err != 0) { + if(err == EBUSY) { return false; } + throw pthread_error(-err, "failed to trylock pthread mutex"); + } + return true; +} + + +inline pthread_rwlock::pthread_rwlock(const pthread_rwlockattr_t *attr) +{ + pthread_rwlock_init(&m_mutex, attr); +} + +//inline pthread_rwlock::pthread_rwlock(int kind) +//{ +// pthread_rwlockattr_t attr; +// pthread_rwlockattr_init(&attr); +// pthread_rwlockattr_settype(&attr, kind); +// pthread_rwlock_init(&m_mutex, &attr); +//} + +inline pthread_rwlock::~pthread_rwlock() +{ + pthread_rwlock_destroy(&m_mutex); +} + +inline void pthread_rwlock::pthread_rwlock::rdlock() +{ + int err = pthread_rwlock_rdlock(&m_mutex); + if(err != 0) { throw pthread_error(-err, "failed to read lock pthread rwlock"); } +} + +inline bool pthread_rwlock::tryrdlock() +{ + int err = pthread_rwlock_tryrdlock(&m_mutex); + if(err != 0) { + if(err == EBUSY) { return false; } + throw pthread_error(-err, "failed to read trylock pthread rwlock"); + } + return true; +} + +inline void pthread_rwlock::pthread_rwlock::wrlock() +{ + int err = pthread_rwlock_wrlock(&m_mutex); + if(err != 0) { throw pthread_error(-err, "failed to write lock pthread rwlock"); } +} + +inline bool pthread_rwlock::trywrlock() +{ + int err = pthread_rwlock_trywrlock(&m_mutex); + if(err != 0) { + if(err == EBUSY) { return false; } + throw pthread_error(-err, "failed to write trylock pthread rwlock"); + } + return true; +} + +inline void pthread_rwlock::pthread_rwlock::unlock() +{ + int err = pthread_rwlock_unlock(&m_mutex); + if(err != 0) { throw pthread_error(-err, "failed to unlock pthread rwlock"); } +} + + +inline pthread_scoped_lock::pthread_scoped_lock() : + m_mutex(NULL) { } + +inline pthread_scoped_lock::pthread_scoped_lock(pthread_mutex& mutex) : + m_mutex(NULL) +{ + mutex.lock(); + m_mutex = &mutex; +} + +inline pthread_scoped_lock::~pthread_scoped_lock() +{ + if(m_mutex) { + m_mutex->unlock(); + } +} + +inline void pthread_scoped_lock::unlock() +{ + if(m_mutex) { + m_mutex->unlock(); + m_mutex = NULL; + } +} + +inline void pthread_scoped_lock::relock(pthread_mutex& mutex) +{ + unlock(); + mutex.lock(); + m_mutex = &mutex; +} + + +inline pthread_scoped_rdlock::pthread_scoped_rdlock() : + m_mutex(NULL) { } + +inline pthread_scoped_rdlock::pthread_scoped_rdlock(pthread_rwlock& mutex) : + m_mutex(NULL) +{ + mutex.rdlock(); + m_mutex = &mutex; +} + +inline pthread_scoped_rdlock::~pthread_scoped_rdlock() +{ + if(m_mutex) { + m_mutex->unlock(); + } +} + +inline void pthread_scoped_rdlock::unlock() +{ + if(m_mutex) { + m_mutex->unlock(); + m_mutex = NULL; + } +} + +inline void pthread_scoped_rdlock::relock(pthread_rwlock& mutex) +{ + unlock(); + mutex.rdlock(); + m_mutex = &mutex; +} + + +inline pthread_scoped_wrlock::pthread_scoped_wrlock() : + m_mutex(NULL) { } + +inline pthread_scoped_wrlock::pthread_scoped_wrlock(pthread_rwlock& mutex) : + m_mutex(NULL) +{ + mutex.wrlock(); + m_mutex = &mutex; +} + +inline pthread_scoped_wrlock::~pthread_scoped_wrlock() +{ + if(m_mutex) { + m_mutex->unlock(); + } +} + +inline void pthread_scoped_wrlock::unlock() +{ + if(m_mutex) { + m_mutex->unlock(); + m_mutex = NULL; + } +} + +inline void pthread_scoped_wrlock::relock(pthread_rwlock& mutex) +{ + unlock(); + mutex.wrlock(); + m_mutex = &mutex; +} + + +inline pthread_cond::pthread_cond(const pthread_condattr_t *attr) +{ + pthread_cond_init(&m_cond, attr); +} + +inline pthread_cond::~pthread_cond() +{ + pthread_cond_destroy(&m_cond); +} + +inline void pthread_cond::signal() +{ + int err = pthread_cond_signal(&m_cond); + if(err != 0) { throw pthread_error(-err, "failed to signal pthread cond"); } +} + +inline void pthread_cond::broadcast() +{ + int err = pthread_cond_broadcast(&m_cond); + if(err != 0) { throw pthread_error(-err, "failed to broadcast pthread cond"); } +} + +inline void pthread_cond::wait(pthread_mutex& mutex) +{ + int err = pthread_cond_wait(&m_cond, mutex.get()); + if(err != 0) { throw pthread_error(-err, "failed to wait pthread cond"); } +} + +inline bool pthread_cond::timedwait(pthread_mutex& mutex, const struct timespec *abstime) +{ + int err = pthread_cond_timedwait(&m_cond, mutex.get(), abstime); + if(err != 0) { + if(err == ETIMEDOUT) { return false; } + throw pthread_error(-err, "failed to timedwait pthread cond"); + } + return true; +} + + +inline pthread_signal::scoped_sigprocmask::scoped_sigprocmask(const sigset_t& ss) : + m_ss(ss) +{ + if( sigprocmask(SIG_BLOCK, &m_ss, NULL) < 0 ) { + throw pthread_error(errno, "failed to set sigprocmask"); + } +} + +inline pthread_signal::scoped_sigprocmask::~scoped_sigprocmask() +{ + sigprocmask(SIG_UNBLOCK, &m_ss, NULL); +} + +inline pthread_signal::pthread_signal(const sigset_t& ss, void (*handler)(void*, int), void* data) : + m_sigmask(ss), m_handler(handler), m_data(data), m_thread(this) +{ + m_thread.run(); +} + +inline pthread_signal::~pthread_signal() {} + +inline void pthread_signal::operator() () +{ + int signo; + while(true) { + if(sigwait(m_sigmask.get(), &signo) != 0) { return; } + (*m_handler)(m_data, signo); + } +} + + +} // namespace mp + +#endif /* mp/pthread.h */ + diff --git a/src/mp/shared_buffer.h b/src/mp/shared_buffer.h new file mode 100644 index 0000000..a510eb3 --- /dev/null +++ b/src/mp/shared_buffer.h @@ -0,0 +1,72 @@ +// +// mp::shared_buffer +// +// Copyright (C) 2009 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_SHARED_BUFFER_H__ +#define MP_SHARED_BUFFER_H__ + +#include +#include + +#ifndef MP_SHARED_BUFFER_INITIAL_BUFFER_SIZE +#define MP_SHARED_BUFFER_INITIAL_BUFFER_SIZE 8*1024 +#endif + +namespace mp { + + +class shared_buffer { +public: + shared_buffer(size_t init_size = MP_SHARED_BUFFER_INITIAL_BUFFER_SIZE); + ~shared_buffer(); + +public: + void reserve(size_t len, size_t init_size = MP_SHARED_BUFFER_INITIAL_BUFFER_SIZE); + + void* buffer(); + size_t buffer_capacity() const; + + struct reference; + + void* allocate(size_t size, reference* result_ref = NULL, + size_t init_size = MP_SHARED_BUFFER_INITIAL_BUFFER_SIZE); + +private: + char* m_buffer; + size_t m_used; + size_t m_free; + +private: + void expand_buffer(size_t len, size_t init_size); + + typedef volatile unsigned int count_t; + static void init_count(void* d); + static void decr_count(void* d); + static void incr_count(void* d); + static count_t get_count(void* d); + +private: + shared_buffer(const shared_buffer&); +}; + + +} // namespace mp + +#include "mp/shared_buffer_impl.h" + +#endif /* mp/shared_buffer.h */ + diff --git a/src/mp/shared_buffer_impl.h b/src/mp/shared_buffer_impl.h new file mode 100644 index 0000000..1fb49f4 --- /dev/null +++ b/src/mp/shared_buffer_impl.h @@ -0,0 +1,167 @@ +// +// mp::shared_buffer +// +// Copyright (C) 2009 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_SHARED_BUFFER_IMPL_H__ +#define MP_SHARED_BUFFER_IMPL_H__ + +namespace mp { + + +struct shared_buffer::reference { +public: + reference() : m(NULL) { } + + reference(void* p) : m(p) + { + incr_count(m); + } + + reference(const reference& o) + { + m = o.m; + incr_count(m); + } + + void reset(void* p) + { + if(m) { decr_count(m); } + m = p; + incr_count(m); + } + + ~reference() + { + if(m) { decr_count(m); } + } + +private: + void* m; +}; + + +inline void shared_buffer::init_count(void* d) +{ + *(volatile count_t*)d = 1; +} + +inline void shared_buffer::decr_count(void* d) +{ + //if(--*(count_t*)d == 0) { + if(__sync_sub_and_fetch((count_t*)d, 1) == 0) { + ::free(d); + } +} + +inline void shared_buffer::incr_count(void* d) +{ + //++*(count_t*)d; + __sync_add_and_fetch((count_t*)d, 1); +} + +inline shared_buffer::count_t shared_buffer::get_count(void* d) +{ + return *(count_t*)d; +} + + +inline shared_buffer::shared_buffer(size_t init_size) +{ + const size_t initsz = std::max(init_size, sizeof(count_t)); + m_buffer = (char*)::malloc(initsz); + if(m_buffer == NULL) { throw std::bad_alloc(); } + + init_count(m_buffer); + m_used = sizeof(count_t); + m_free = initsz - m_used; +} + +inline shared_buffer::~shared_buffer() +{ + decr_count(m_buffer); +} + +inline void* shared_buffer::buffer() +{ + return m_buffer + m_used; +} + +inline size_t shared_buffer::buffer_capacity() const +{ + return m_free; +} + +inline void shared_buffer::reserve(size_t len, size_t init_size) +{ + if(get_count(m_buffer) == 1) { + // rewind buffer + m_free += m_used - sizeof(count_t); + m_used = sizeof(count_t); + } + if(m_free < len) { + expand_buffer(len, init_size); + } +} + +inline void* shared_buffer::allocate(size_t len, + reference* result_ref, size_t init_size) +{ + reserve(len, init_size); + char* tmp = m_buffer + m_used; + m_used += len; + m_free -= len; + if(result_ref) { + result_ref->reset(m_buffer); + } + return tmp; +} + +inline void shared_buffer::expand_buffer(size_t len, size_t init_size) +{ + if(m_used == sizeof(count_t)) { + size_t next_size = (m_used + m_free) * 2; + while(next_size < len + m_used) { next_size *= 2; } + + char* tmp = (char*)::realloc(m_buffer, next_size); + if(!tmp) { throw std::bad_alloc(); } + + m_buffer = tmp; + m_free = next_size - m_used; + + } else { + const size_t initsz = std::max(init_size, sizeof(count_t)); + + size_t next_size = initsz; // include sizeof(count_t) + while(next_size < len + sizeof(count_t)) { next_size *= 2; } + + char* tmp = (char*)::malloc(next_size); + if(!tmp) { throw std::bad_alloc(); } + init_count(tmp); + + decr_count(m_buffer); + + m_buffer = tmp; + m_used = sizeof(count_t); + m_free = next_size - m_used; + } +} + + +} // namespace mp + +#endif /* mp/shared_buffer_impl.h */ + diff --git a/src/mp/source.h b/src/mp/source.h new file mode 100644 index 0000000..7f3ec95 --- /dev/null +++ b/src/mp/source.h @@ -0,0 +1,83 @@ +// +// mp::source +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_SOURCE_H__ +#define MP_SOURCE_H__ + +#include + +#ifndef MP_SOURCE_DEFAULT_ALLOCATION_SIZE +#define MP_SOURCE_DEFAULT_ALLOCATION_SIZE 32*1024 +#endif + +#ifndef MP_SOURCE_DEFAULT_LOTS_IN_CHUNK +#define MP_SOURCE_DEFAULT_LOTS_IN_CHUNK 4 +#endif + +namespace mp { + +static const size_t SOURCE_DEFAULT_ALLOCATION_SIZE = MP_SOURCE_DEFAULT_ALLOCATION_SIZE; +static const size_t SOURCE_DEFAULT_LOTS_IN_CHUNK = MP_SOURCE_DEFAULT_LOTS_IN_CHUNK; + +template < size_t EstimatedAllocationSize = SOURCE_DEFAULT_ALLOCATION_SIZE, + size_t OptimalLotsInChunk = SOURCE_DEFAULT_LOTS_IN_CHUNK > +class source { +public: + source(); + ~source(); + +public: + //! Allocate memory from the pool. + /* The allocated memory have to be freed using free() function. */ + void* malloc(size_t size); + + //! Free the allocated memory. + void free(void* x); + +private: + struct chunk_t { + chunk_t* next; + chunk_t* prev; + size_t free; + size_t lots; + size_t size; + }; + struct data_t { + chunk_t* chunk; + }; + +private: + chunk_t* m_free; + chunk_t* m_used; + +private: + void* expand_free(size_t req); + void splice_to_used(chunk_t* chunk); + void splice_to_free(chunk_t* chunk); + +private: + source(const source&); +}; + + +} // namespace mp + +#include "mp/source_impl.h" + +#endif /* mp/source.h */ + diff --git a/src/mp/source_impl.h b/src/mp/source_impl.h new file mode 100644 index 0000000..cbdcd83 --- /dev/null +++ b/src/mp/source_impl.h @@ -0,0 +1,143 @@ +// +// mp::source +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_SOURCE_IMPL_H__ +#define MP_SOURCE_IMPL_H__ + +#include +#include + +namespace mp { + + +template +source::source() +{ + m_free = (chunk_t*)std::malloc(sizeof(chunk_t)); + if( m_free == NULL ) { throw std::bad_alloc(); } + m_used = (chunk_t*)std::malloc(sizeof(chunk_t)); + if( m_used == NULL ) { std::free(m_free); throw std::bad_alloc(); } + m_free->next = m_free; + m_free->prev = m_free; + m_used->next = m_used; + m_used->prev = m_used; +} + +template +source::~source() +{ + chunk_t* f = m_free->next; + while(f != m_free) { + f = f->next; + std::free(f->prev); + } + std::free(f); + + f = m_used; + while(f->next != m_used) { + f = f->next; + std::free(f->prev); + } + std::free(f); +} + +template +void* source::malloc(size_t size) +{ + size_t req = size + sizeof(data_t); + for( chunk_t* f = m_free->next; f != m_free; f = f->next ) { + if( f->free < req ) { continue; } + data_t* data = reinterpret_cast( + ((char*)f) + sizeof(chunk_t) + f->size - f->free + ); + f->lots++; + f->free -= req; + data->chunk = f; + if( f->free < EstimatedAllocationSize + sizeof(chunk_t) ) { + splice_to_used(f); + } + return ((char*)data) + sizeof(data_t); + } + return expand_free(req); +} + +template +void source::free(void* x) +{ + data_t* data = reinterpret_cast( ((char*)x) - sizeof(data_t) ); + chunk_t* chunk = data->chunk; + chunk->lots--; + if( chunk->lots == 0 ) { + splice_to_free(chunk); + } +} + +template +void* source::expand_free(size_t req) +{ + const size_t default_chunk_size = (EstimatedAllocationSize + sizeof(chunk_t)) * OptimalLotsInChunk; + size_t chunk_size = req > default_chunk_size ? req : default_chunk_size; + chunk_t* n = (chunk_t*)std::malloc(sizeof(chunk_t) + chunk_size); + if( n == NULL ) { throw std::bad_alloc(); } + data_t* data = reinterpret_cast( ((char*)n) + sizeof(chunk_t) ); + n->lots = 1; + n->size = chunk_size; + n->free = chunk_size - req; + data->chunk = n; + if( n->free < EstimatedAllocationSize + sizeof(chunk_t) ) { + n->prev = m_used; + n->next = m_used->next; + m_used->next->prev = n; + m_used->next = n; + } else { + n->prev = m_free; + n->next = m_free->next; + m_free->next->prev = n; + m_free->next = n; + } + return ((char*)data) + sizeof(data_t); +} + +template +void source::splice_to_used(chunk_t* chunk) +{ + chunk->prev->next = chunk->next; + chunk->next->prev = chunk->prev; + chunk->prev = m_used; + chunk->next = m_used->next; + m_used->next->prev = chunk; + m_used->next = chunk; +} + +template +void source::splice_to_free(chunk_t* chunk) +{ + chunk->prev->next = chunk->next; + chunk->next->prev = chunk->prev; + chunk->next = m_free->next; + chunk->prev = m_free; + m_free->next->prev = chunk; + m_free->next = chunk; + chunk->free = chunk->size; +} + + +} // namespace mp + +#endif /* mp/source_impl.h */ + diff --git a/src/mp/stream_buffer.h b/src/mp/stream_buffer.h new file mode 100644 index 0000000..287c9c2 --- /dev/null +++ b/src/mp/stream_buffer.h @@ -0,0 +1,79 @@ +// +// mp::stream_buffer +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_STREAM_BUFFER_H__ +#define MP_STREAM_BUFFER_H__ + +#include +#include +#include +#include + +#ifndef MP_STREAM_BUFFER_INITIAL_BUFFER_SIZE +#define MP_STREAM_BUFFER_INITIAL_BUFFER_SIZE 8*1024 +#endif + +namespace mp { + + +class stream_buffer { +public: + stream_buffer(size_t initial_buffer_size = MP_STREAM_BUFFER_INITIAL_BUFFER_SIZE); + ~stream_buffer(); + +public: + void reserve_buffer(size_t len, size_t initial_buffer_size = MP_STREAM_BUFFER_INITIAL_BUFFER_SIZE); + + void* buffer(); + size_t buffer_capacity() const; + void buffer_consumed(size_t len); + + void* data(); + size_t data_size() const; + void data_used(size_t len); + + struct reference; + reference* release(); + +private: + char* m_buffer; + size_t m_used; + size_t m_free; + size_t m_off; + std::auto_ptr m_ref; + +private: + void expand_buffer(size_t len, size_t initial_buffer_size); + + typedef volatile unsigned int count_t; + static void init_count(void* d); + static void decl_count(void* d); + static void incr_count(void* d); + static count_t get_count(void* d); + +private: + stream_buffer(const stream_buffer&); +}; + + +} // namespace mp + +#include "mp/stream_buffer_impl.h" + +#endif /* mp/stream_buffer.h */ + diff --git a/src/mp/stream_buffer_impl.h b/src/mp/stream_buffer_impl.h new file mode 100644 index 0000000..f2df343 --- /dev/null +++ b/src/mp/stream_buffer_impl.h @@ -0,0 +1,203 @@ +// +// mp::stream_buffer +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_STREAM_BUFFER_IMPL_H__ +#define MP_STREAM_BUFFER_IMPL_H__ + +#include + +namespace mp { + + +struct stream_buffer::reference { +public: + reference() { } + + ~reference() + { + std::for_each(m_array.begin(), m_array.end(), decl()); + } + + void push(void* d) + { + m_array.push_back(d); + } + +private: + std::vector m_array; + struct decl { + void operator() (void* d) + { + stream_buffer::decl_count(d); + } + }; + +private: + reference(const reference&); +}; + + +inline void stream_buffer::init_count(void* d) +{ + *(volatile count_t*)d = 1; +} + +inline void stream_buffer::decl_count(void* d) +{ + //if(--*(count_t*)d == 0) { + if(__sync_sub_and_fetch((count_t*)d, 1) == 0) { + free(d); + } +} + +inline void stream_buffer::incr_count(void* d) +{ + //++*(count_t*)d; + __sync_add_and_fetch((count_t*)d, 1); +} + +inline stream_buffer::count_t stream_buffer::get_count(void* d) +{ + return *(count_t*)d; +} + + +inline stream_buffer::stream_buffer(size_t initial_buffer_size) : + m_buffer(NULL), + m_used(0), + m_free(0), + m_off(0), + m_ref(new reference()) +{ + const size_t initsz = std::max(initial_buffer_size, sizeof(count_t)); + + m_buffer = (char*)::malloc(initsz); + if(!m_buffer) { throw std::bad_alloc(); } + init_count(m_buffer); + + m_used = sizeof(count_t); + m_free = initsz - m_used; + m_off = sizeof(count_t); +} + +inline stream_buffer::~stream_buffer() +{ + decl_count(m_buffer); +} + +inline void* stream_buffer::buffer() +{ + return m_buffer + m_used; +} + +inline size_t stream_buffer::buffer_capacity() const +{ + return m_free; +} + +inline void stream_buffer::buffer_consumed(size_t len) +{ + m_used += len; + m_free -= len; +} + +inline void* stream_buffer::data() +{ + return m_buffer + m_off; +} + +inline size_t stream_buffer::data_size() const +{ + return m_used - m_off; +} + +inline void stream_buffer::data_used(size_t len) +{ + m_off += len; +} + + +inline stream_buffer::reference* stream_buffer::release() +{ + // FIXME + m_ref->push(m_buffer); + incr_count(m_buffer); + + //std::auto_ptr old(new reference()); + //m_ref.swap(old); + reference* n = new reference(); + std::auto_ptr old(m_ref.release()); + m_ref.reset(n); + + return old.release(); +} + +inline void stream_buffer::reserve_buffer(size_t len, size_t initial_buffer_size) +{ + if(m_used == m_off && get_count(m_buffer) == 1) { + // rewind buffer + m_free += m_used - sizeof(count_t); + m_used = sizeof(count_t); + m_off = sizeof(count_t); + } + if(m_free < len) { + expand_buffer(len, initial_buffer_size); + } +} + +inline void stream_buffer::expand_buffer(size_t len, size_t initial_buffer_size) +{ + if(m_off == sizeof(count_t)) { + size_t next_size = (m_used + m_free) * 2; + while(next_size < len + m_used) { next_size *= 2; } + + char* tmp = (char*)::realloc(m_buffer, next_size); + if(!tmp) { throw std::bad_alloc(); } + + m_buffer = tmp; + m_free = next_size - m_used; + + } else { + const size_t initsz = std::max(initial_buffer_size, sizeof(count_t)); + + size_t next_size = initsz; // include sizeof(count_t) + size_t not_used = m_used - m_off; + while(next_size < len + not_used + sizeof(count_t)) { next_size *= 2; } + + char* tmp = (char*)::malloc(next_size); + if(!tmp) { throw std::bad_alloc(); } + init_count(tmp); + + try { + m_ref->push(m_buffer); + } catch (...) { free(tmp); throw; } + + memcpy(tmp+sizeof(count_t), m_buffer+m_off, not_used); + + m_buffer = tmp; + m_used = not_used + sizeof(count_t); + m_free = next_size - m_used; + m_off = sizeof(count_t); + } +} + + +} // namespace mp + +#endif /* mp/stream_buffer_impl.h */ + diff --git a/src/mp/utility.h b/src/mp/utility.h new file mode 100644 index 0000000..e64b8d6 --- /dev/null +++ b/src/mp/utility.h @@ -0,0 +1,50 @@ +// +// mpio utility +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_UTILITY_H__ +#define MP_UTILITY_H__ + +#include "mp/exception.h" +#include +#include +#include +#include + +namespace mp { + + +template +struct thread_tag { + static const unsigned int ID = ThreadID; +}; + +typedef thread_tag<0> main_thread_tag; + + +inline void set_nonblock(int fd) +{ + if( ::fcntl(fd, F_SETFL, O_NONBLOCK) < 0 ) { + throw system_error(errno, "failed to set nonblock flag"); + } +} + + +} // namespace mp + +#endif /* mp/utility.h */ + diff --git a/src/mp/wavy.h b/src/mp/wavy.h new file mode 100644 index 0000000..3bdea23 --- /dev/null +++ b/src/mp/wavy.h @@ -0,0 +1,26 @@ +// +// mp::wavy +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_WAVY_H__ +#define MP_WAVY_H__ + +#include "mp/wavy/core.h" +#include "mp/wavy/output.h" + +#endif /* mp/wavy.h */ + diff --git a/src/mp/wavy/core.pre.h b/src/mp/wavy/core.pre.h new file mode 100644 index 0000000..e10a118 --- /dev/null +++ b/src/mp/wavy/core.pre.h @@ -0,0 +1,138 @@ +// +// mp::wavy::core +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_WAVY_CORE_H__ +#define MP_WAVY_CORE_H__ + +#include "mp/functional.h" +#include "mp/memory.h" +#include "mp/pthread.h" +#include "mp/functional.h" +#include +#include +#include +#include +#include +#include +#include + +namespace mp { +namespace wavy { + + +class core { +public: + core(); + ~core(); + + void add_thread(size_t num); + + void end(); + bool is_end() const; + + void join(); + void detach(); + + + struct handler { + handler(int fd) : m_fd(fd) { } + virtual ~handler() { ::close(m_fd); } + virtual void read_event() = 0; + + int fd() const { return m_fd; } + + template + shared_ptr shared_self() + { + return static_pointer_cast(*m_shared_self); + } + + private: + int m_fd; + shared_ptr* m_shared_self; + friend class core; + }; + + typedef function connect_callback_t; + void connect(int socket_family, int socket_type, int protocol, + const sockaddr* addr, socklen_t addrlen, + int timeout_msec, connect_callback_t callback); + + + typedef function listen_callback_t; + void listen(int lsock, listen_callback_t callback); + + + typedef function timer_callback_t; + void timer(const timespec* interval, timer_callback_t callback); + + + template + void add(int fd); +MP_ARGS_BEGIN + template + void add(int fd, MP_ARGS_PARAMS); +MP_ARGS_END + + template + void submit(F f); +MP_ARGS_BEGIN + template + void submit(F f, MP_ARGS_PARAMS); +MP_ARGS_END + +private: + void add_impl(int fd, handler* newh); + + typedef function task_t; + void submit_impl(task_t f); + +private: + class impl; + const std::auto_ptr m_impl; + + core(const core&); +}; + +typedef core::handler handler; + + +template +void core::add(int fd) + { add_impl(fd, new Handler(fd)); } +MP_ARGS_BEGIN +template +void core::add(int fd, MP_ARGS_PARAMS) + { add_impl(fd, new Handler(fd, MP_ARGS_FUNC)); } +MP_ARGS_END + +template +inline void core::submit(F f) + { submit_impl(task_t(f)); } +MP_ARGS_BEGIN +template +inline void core::submit(F f, MP_ARGS_PARAMS) + { submit_impl(bind(f, MP_ARGS_FUNC)); } +MP_ARGS_END + + +} // namespace wavy +} // namespace mp + +#endif /* mp/wavy/core.h */ + diff --git a/src/mp/wavy/output.h b/src/mp/wavy/output.h new file mode 100644 index 0000000..93867ab --- /dev/null +++ b/src/mp/wavy/output.h @@ -0,0 +1,73 @@ +// +// mp::wavy::output +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_WAVY_OUTPUT_H__ +#define MP_WAVY_OUTPUT_H__ + +#include +#include +#include + +namespace mp { +namespace wavy { + + +class output { +public: + output(); + ~output(); + + void add_thread(size_t num); + + void end(); + + void join(); + void detach(); + +public: + typedef void (*finalize_t)(void* user); + struct request { + request() : finalize(NULL), user(NULL) { } + request(finalize_t f, void* u) : finalize(f), user(u) { } + finalize_t finalize; + void* user; + }; + + void write(int fd, const char* buf, size_t buflen); + void writev(int fd, const iovec* vec, size_t veclen); + + void write(int fd, const char* buf, size_t buflen, request req); + void write(int fd, const char* buf, size_t buflen, finalize_t finalize, void* user); + void writev(int fd, const iovec* vec, size_t veclen, request req); + void writev(int fd, const iovec* vec, size_t veclen, finalize_t finalize, void* user); + + void writev(int fd, const iovec* bufvec, const request* reqvec, size_t veclen); + +private: + class impl; + const std::auto_ptr m_impl; + + output(const output&); +}; + + +} // namespace wavy +} // namespace mp + +#endif /* mp/wavy/output.h */ + diff --git a/src/mp/wavy/singleton.pre.h b/src/mp/wavy/singleton.pre.h new file mode 100644 index 0000000..7cc1cd7 --- /dev/null +++ b/src/mp/wavy/singleton.pre.h @@ -0,0 +1,214 @@ +// +// mp::wavy +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_WAVY_SINGLETON_H__ +#define MP_WAVY_SINGLETON_H__ + +#include "mp/wavy/core.h" +#include "mp/wavy/output.h" + +namespace mp { +namespace wavy { + + +template +struct singleton { + + typedef core::handler handler; + typedef output::finalize_t finalize_t; + typedef output::request request; + + static void initialize(size_t core_thread, size_t output_thread); + + static void add_core_thread(size_t num); + static void add_output_thread(size_t num); + + static void join(); + static void detach(); + static void end(); + + static void write(int fd, const char* buf, size_t buflen); + static void writev(int fd, const iovec* vec, size_t veclen); + + static void write(int fd, const char* buf, size_t buflen, request req); + static void write(int fd, const char* buf, size_t buflen, finalize_t finalize, void* user); + static void writev(int fd, const iovec* vec, size_t veclen, request req); + static void writev(int fd, const iovec* vec, size_t veclen, finalize_t finalize, void* user); + + static void writev(int fd, const iovec* bufvec, const request* reqvec, size_t veclen); + + + typedef core::connect_callback_t connect_callback_t; + static void connect( + int socket_family, int socket_type, int protocol, + const sockaddr* addr, socklen_t addrlen, + int timeout_msec, connect_callback_t callback); + + + typedef core::listen_callback_t listen_callback_t; + static void listen(int lsock, listen_callback_t callback); + + + typedef core::timer_callback_t timer_callback_t; + static void timer(const timespec* interval, timer_callback_t callback); + + + template + static void add(int fd); +MP_ARGS_BEGIN + template + static void add(int fd, MP_ARGS_PARAMS); +MP_ARGS_END + + template + static void submit(F f); +MP_ARGS_BEGIN + template + static void submit(F f, MP_ARGS_PARAMS); +MP_ARGS_END + +private: + static core* s_core; + static output* s_output; + + singleton(); +}; + +template +core* singleton::s_core; + +template +output* singleton::s_output; + +template +void singleton::initialize(size_t core_thread, size_t output_thread) +{ + s_core = new core(); + s_output = new output(); + add_core_thread(core_thread); + add_output_thread(output_thread); +} + +template +void singleton::add_core_thread(size_t num) + { s_core->add_thread(num); } + +template +void singleton::add_output_thread(size_t num) + { s_output->add_thread(num); } + +template +void singleton::join() +{ + s_core->join(); + s_output->join(); +} + +template +void singleton::detach() +{ + s_core->detach(); + s_output->detach(); +} + +template +void singleton::end() +{ + s_core->end(); + s_output->end(); +} + +template +inline void singleton::write(int fd, const char* buf, size_t buflen) + { s_output->write(fd, buf, buflen); } + +template +inline void singleton::writev(int fd, const iovec* vec, size_t veclen) + { s_output->writev(fd, vec, veclen); } + +template +inline void singleton::write(int fd, const char* buf, size_t buflen, request req) + { s_output->write(fd, buf, buflen, req); } + +template +inline void singleton::write(int fd, const char* buf, size_t buflen, finalize_t finalize, void* user) + { s_output->write(fd, buf, buflen, finalize, user); } + +template +inline void singleton::writev(int fd, const iovec* vec, size_t veclen, request req) + { s_output->writev(fd, vec, veclen, req); } + +template +inline void singleton::writev(int fd, const iovec* vec, size_t veclen, finalize_t finalize, void* user) + { s_output->writev(fd, vec, veclen, finalize, user); } + +template +inline void singleton::writev(int fd, const iovec* bufvec, const request* reqvec, size_t veclen) + { s_output->writev(fd, bufvec, reqvec, veclen); } + + +template +inline void singleton::connect( + int socket_family, int socket_type, int protocol, + const sockaddr* addr, socklen_t addrlen, + int timeout_msec, connect_callback_t callback) +{ + s_core->connect(socket_family, socket_type, protocol, + addr, addrlen, timeout_msec, callback); +} + + +template +inline void singleton::listen(int lsock, listen_callback_t callback) + { s_core->listen(lsock, callback); } + + +template +inline void singleton::timer( + const timespec* interval, timer_callback_t callback) + { s_core->timer(interval, callback); } + + +template +template +inline void singleton::add(int fd) + { return s_core->add(fd); } +MP_ARGS_BEGIN +template +template +inline void singleton::add(int fd, MP_ARGS_PARAMS) + { return s_core->add(fd, MP_ARGS_FUNC); } +MP_ARGS_END + +template +template +inline void singleton::submit(F f) + { s_core->submit(f); } +MP_ARGS_BEGIN +template +template +inline void singleton::submit(F f, MP_ARGS_PARAMS) + { s_core->submit(f, MP_ARGS_FUNC); } +MP_ARGS_END + + +} // namespace wavy +} // namespace mp + +#endif /* mp/wavy/singleton.h */ + diff --git a/src/mpsrc/Makefile.am b/src/mpsrc/Makefile.am new file mode 100644 index 0000000..887b7be --- /dev/null +++ b/src/mpsrc/Makefile.am @@ -0,0 +1,17 @@ +noinst_LIBRARIES = libmpio.a + +libmpio_a_CPPFLAGS = -I.. + +libmpio_a_SOURCES = \ + wavy_core.cc \ + wavy_connect.cc \ + wavy_listen.cc \ + wavy_output.cc \ + wavy_timer.cc + +noinst_HEADERS = \ + wavy_core.h \ + wavy_edge.h \ + wavy_edge_epoll.h \ + wavy_edge_kqueue.h + diff --git a/src/mpsrc/wavy_connect.cc b/src/mpsrc/wavy_connect.cc new file mode 100644 index 0000000..a141193 --- /dev/null +++ b/src/mpsrc/wavy_connect.cc @@ -0,0 +1,145 @@ +// +// mp::wavy::core::connect +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "wavy_core.h" +#include +#include +#include +#include + +namespace mp { +namespace wavy { + + +class core::impl::connext_thread { +public: + struct pack { + int socket_family; + int socket_type; + int protocol; + socklen_t addrlen; + int timeout_msec; + core* c; + sockaddr addr[0]; + }; + + connext_thread(core* c, + int socket_family, int socket_type, int protocol, + const sockaddr* addr, socklen_t addrlen, + int timeout_msec, connect_callback_t& callback) : + m((pack*)::malloc(sizeof(pack)+addrlen)), + m_callback(callback) + { + if(!m) { throw std::bad_alloc(); } + m->socket_family = socket_family; + m->socket_type = socket_type; + m->protocol = protocol; + m->addrlen = addrlen; + m->timeout_msec = timeout_msec; + m->c = c; + ::memcpy(m->addr, addr, addrlen); + } + + void operator() () + { + int err = 0; + int fd = ::socket(m->socket_family, m->socket_type, m->protocol); + if(fd < 0) { + err = errno; + goto out; + } + + if(::fcntl(fd, F_SETFL, O_NONBLOCK) < 0) { + goto errno_error; + } + + if(::connect(fd, m->addr, m->addrlen) >= 0) { + goto out; + } + + if(errno != EINPROGRESS) { + goto errno_error; + } + + while(true) { + struct pollfd pf = {fd, POLLOUT, 0}; + int ret = ::poll(&pf, 1, m->timeout_msec); + if(ret < 0) { + if(errno == EINTR) { continue; } + goto errno_error; + } + + if(ret == 0) { + errno = ETIMEDOUT; + goto specific_error; + } + + { + int value = 0; + int len = sizeof(value); + if(::getsockopt(fd, SOL_SOCKET, SO_ERROR, + &value, (socklen_t*)&len) < 0) { + goto errno_error; + } + if(value != 0) { + err = value; + goto specific_error; + } + goto out; + } + } + + errno_error: + err = errno; + + specific_error: + ::close(fd); + fd = -1; + + out: + try { + m->c->submit(m_callback, fd, err); + } catch (...) { + ::free(m); + throw; + } + + ::free(m); + return; + } + +private: + pack* m; + connect_callback_t m_callback; +}; + + +void core::connect(int socket_family, int socket_type, int protocol, + const sockaddr* addr, socklen_t addrlen, + int timeout_msec, connect_callback_t callback) +{ + impl::connext_thread t(this, + socket_family, socket_type, protocol, + addr, addrlen, timeout_msec, callback); + submit(t); +} + + +} // namespace wavy +} // namespace mp + diff --git a/src/mpsrc/wavy_core.cc b/src/mpsrc/wavy_core.cc new file mode 100644 index 0000000..f97d0ee --- /dev/null +++ b/src/mpsrc/wavy_core.cc @@ -0,0 +1,212 @@ +// +// mp::wavy::core +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "wavy_core.h" +#include "mp/object_callback.h" +#include "mp/utility.h" +#include +#include +#include + +#ifndef MP_WAVY_TASK_QUEUE_LIMIT +#define MP_WAVY_TASK_QUEUE_LIMIT 16 +#endif + +namespace mp { +namespace wavy { + + +core::core() : m_impl(new impl()) { } + +core::impl::impl() : + m_off(0), + m_num(0), + m_pollable(true), + m_end_flag(false) +{ + struct rlimit rbuf; + if(::getrlimit(RLIMIT_NOFILE, &rbuf) < 0) { + throw system_error(errno, "getrlimit() failed"); + } + m_state = new shared_handler[rbuf.rlim_cur]; +} + + +core::~core() { } + +core::impl::~impl() +{ + end(); + { + pthread_scoped_lock lk(m_mutex); + m_cond.broadcast(); + } + for(workers_t::iterator it(m_workers.begin()); + it != m_workers.end(); ++it) { + delete *it; + } + delete[] m_state; +} + + +void core::end() { m_impl->end(); } +void core::impl::end() +{ + m_end_flag = true; + { + pthread_scoped_lock lk(m_mutex); + m_cond.broadcast(); + } +} + +bool core::is_end() const { return m_impl->is_end(); } +bool core::impl::is_end() const +{ + return m_end_flag; +} + +void core::join() { m_impl->join(); } +void core::impl::join() +{ + for(workers_t::iterator it(m_workers.begin()); + it != m_workers.end(); ++it) { + (*it)->join(); + } +} + +void core::detach() { m_impl->detach(); } +void core::impl::detach() +{ + for(workers_t::iterator it(m_workers.begin()); + it != m_workers.end(); ++it) { + (*it)->detach(); + } +} + +void core::add_thread(size_t num) { m_impl->add_thread(num); } +void core::impl::add_thread(size_t num) +{ + for(size_t i=0; i < num; ++i) { + m_workers.push_back(NULL); + try { + m_workers.back() = new pthread_thread(this); + } catch (...) { + m_workers.pop_back(); + throw; + } + m_workers.back()->run(); + } +} + + +void core::submit_impl(task_t f) + { m_impl->submit_impl(f); } + +void core::impl::submit_impl(task_t& f) +{ + pthread_scoped_lock lk(m_mutex); + m_task_queue.push(f); + m_cond.signal(); +} + +void core::add_impl(int fd, handler* newh) + { m_impl->add_impl(fd, newh); } + +void core::impl::add_impl(int fd, handler* newh) +{ + try { + mp::set_nonblock(fd); + } catch (...) { + delete newh; + throw; + } + m_state[fd].reset(newh); + newh->m_shared_self = &m_state[fd]; + m_edge.add_notify(fd, EVEDGE_READ); +} + +void core::impl::operator() () +{ + retry: + while(true) { + pthread_scoped_lock lk(m_mutex); + + while(m_task_queue.size() > MP_WAVY_TASK_QUEUE_LIMIT || !m_pollable) { + if(m_end_flag) { return; } + + if(!m_task_queue.empty()) { + task_t ev = m_task_queue.front(); + m_task_queue.pop(); + if(!m_task_queue.empty()) { m_cond.signal(); } + lk.unlock(); + try { + ev(); + } catch (...) { } + goto retry; + } + + m_cond.wait(m_mutex); + } + + if(m_num == m_off) { + m_pollable = false; + lk.unlock(); + + retry_poll: + if(m_end_flag) { return; } + + int num = m_edge.wait(&m_backlog, 1000); + if(num < 0) { + if(errno == EINTR || errno == EAGAIN) { + goto retry_poll; + } else { + throw system_error(errno, "wavy core event failed"); + } + } else if(num == 0) { + goto retry_poll; + } + + lk.relock(m_mutex); + m_off = 0; + m_num = num; + + m_pollable = true; + m_cond.signal(); + } + + int fd = m_backlog[m_off]; + ++m_off; + lk.unlock(); + + try { + m_state[fd]->read_event(); + } catch (...) { + m_edge.shot_remove(fd, EVEDGE_READ); + m_state[fd]->m_shared_self = NULL; + m_state[fd].reset(); + goto retry; + } + + m_edge.shot_reactivate(fd, EVEDGE_READ); + } +} + + +} // namespace wavy +} // namespace mp + diff --git a/src/mpsrc/wavy_core.h b/src/mpsrc/wavy_core.h new file mode 100644 index 0000000..f182854 --- /dev/null +++ b/src/mpsrc/wavy_core.h @@ -0,0 +1,91 @@ +// +// mp::wavy::core +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef WAVY_CORE_H__ +#define WAVY_CORE_H__ + +#include "mp/wavy/core.h" +#include "mp/pthread.h" +#include "wavy_edge.h" + +namespace mp { +namespace wavy { + + +class core::impl { +public: + impl(); + ~impl(); + +public: + void add_thread(size_t num); + + void end(); + bool is_end() const; + + void join(); + void detach(); + + class connext_thread; + + class listen_handler; + void listen(int lsock, listen_callback_t callback); + + class timer_thread; + +public: + inline void add_impl(int fd, handler* newh); + inline void submit_impl(task_t& f); + +public: + void operator() (); + +private: + volatile size_t m_off; + volatile size_t m_num; + volatile bool m_pollable; + + edge::backlog m_backlog; + + typedef shared_ptr shared_handler; + shared_handler* m_state; + + edge m_edge; + + pthread_mutex m_mutex; + pthread_cond m_cond; + + volatile bool m_end_flag; + + typedef std::queue task_queue_t; + task_queue_t m_task_queue; + +private: + typedef std::vector workers_t; + workers_t m_workers; + +private: + impl(const impl&); +}; + + +} // namespace wavy +} // namespace mp + +#endif /* wavy_core.h */ + diff --git a/src/mpsrc/wavy_edge.h b/src/mpsrc/wavy_edge.h new file mode 100644 index 0000000..ca36cf2 --- /dev/null +++ b/src/mpsrc/wavy_edge.h @@ -0,0 +1,38 @@ +#ifndef MP_WAVY_EDGE_H__ +#define MP_WAVY_EDGE_H__ + +#include "mp/pp.h" + +#ifndef MP_WAVY_EDGE +# if defined(HAVE_SYS_EPOLL_H) +# define MP_WAVY_EDGE epoll +# elif defined(HAVE_SYS_EVENT_H) +# define MP_WAVY_EDGE kqueue +# elif defined(HAVE_PORT_H) +# define MP_WAVY_EDGE eventport +# else +# if defined(__linux__) +# define MP_WAVY_EDGE epoll +# elif defined(__APPLE__) && defined(__MACH__) +# define MP_WAVY_EDGE kqueue +# elif defined(__FreeBSD__) || defined(__NetBSD__) +# define MP_WAVY_EDGE kqueue +# elif defined(__SunOS__) // FIXME Solaris 10 +# define MP_WAVY_EDGE kqueue +# else +# define MP_WAVY_EDGE select +# endif +# endif +#endif + +#define MP_WAVY_EDGE_HEADER(sys) \ + MP_PP_HEADER(mpsrc, wavy_edge_, sys, ) + +#ifndef MP_WAVY_EDGE_BACKLOG_SIZE +#define MP_WAVY_EDGE_BACKLOG_SIZE 256 +#endif + +#include MP_WAVY_EDGE_HEADER(MP_WAVY_EDGE) + +#endif /* wavy_edge.h */ + diff --git a/src/mpsrc/wavy_edge_epoll.h b/src/mpsrc/wavy_edge_epoll.h new file mode 100644 index 0000000..bd2c1ab --- /dev/null +++ b/src/mpsrc/wavy_edge_epoll.h @@ -0,0 +1,126 @@ +// +// mp::wavy::edge +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_WAVY_EDGE_EPOLL_H__ +#define MP_WAVY_EDGE_EPOLL_H__ + +#include "mp/exception.h" +#include +#include +#include +#include + +namespace mp { +namespace wavy { + + +static const short EVEDGE_READ = EPOLLIN; +static const short EVEDGE_WRITE = EPOLLOUT; + + +class edge { +public: + edge() : m_ep(epoll_create(MP_WAVY_EDGE_BACKLOG_SIZE)) + { + if(m_ep < 0) { + throw system_error(errno, "failed to initialize epoll"); + } + } + + ~edge() + { + ::close(m_ep); + } + + int add_notify(int fd, short event) + { + struct epoll_event ev; + ::memset(&ev, 0, sizeof(ev)); // FIXME + ev.events = event | EPOLLONESHOT; + ev.data.fd = fd; + return epoll_ctl(m_ep, EPOLL_CTL_ADD, fd, &ev); + } + + int shot_reactivate(int fd, short event) + { + struct epoll_event ev; + ::memset(&ev, 0, sizeof(ev)); // FIXME + ev.events = event | EPOLLONESHOT; + ev.data.fd = fd; + return epoll_ctl(m_ep, EPOLL_CTL_MOD, fd, &ev); + } + + int shot_remove(int fd, short event) + { + return epoll_ctl(m_ep, EPOLL_CTL_DEL, fd, NULL); + } + + int remove(int fd, short event) + { + return epoll_ctl(m_ep, EPOLL_CTL_DEL, fd, NULL); + } + + struct backlog { + backlog() + { + buf = (struct epoll_event*)::calloc( + sizeof(struct epoll_event), + MP_WAVY_EDGE_BACKLOG_SIZE); + if(!buf) { throw std::bad_alloc(); } + } + + ~backlog() + { + ::free(buf); + } + + int operator[] (int n) + { + return buf[n].data.fd; + } + + private: + struct epoll_event* buf; + friend class edge; + backlog(const backlog&); + }; + + int wait(backlog* result) + { + return wait(result, -1); + } + + int wait(backlog* result, int timeout_msec) + { + return epoll_wait(m_ep, result->buf, + MP_WAVY_EDGE_BACKLOG_SIZE, timeout_msec); + } + +private: + int m_ep; + +private: + edge(const edge&); +}; + + +} // namespace wavy +} // namespace mp + +#endif /* wavy_edge_kqueue.h */ + diff --git a/src/mpsrc/wavy_edge_kqueue.h b/src/mpsrc/wavy_edge_kqueue.h new file mode 100644 index 0000000..53d7bbe --- /dev/null +++ b/src/mpsrc/wavy_edge_kqueue.h @@ -0,0 +1,124 @@ +// +// mp::wavy::edge +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef MP_WAVY_EDGE_KQUEUE_H__ +#define MP_WAVY_EDGE_KQUEUE_H__ + +#include "mp/exception.h" +#include +#include +#include +#include + +namespace mp { +namespace wavy { + + +static const short EVEDGE_READ = EVFILT_READ; +static const short EVEDGE_WRITE = EVFILT_WRITE; + + +class edge { +public: + edge() : m_kq(kqueue()) + { + if(m_kq < 0) { + throw system_error(errno, "failed to initialize kqueue"); + } + } + + ~edge() + { + ::close(m_kq); + } + + int add_notify(int fd, short event) + { + struct kevent kev; + EV_SET(&kev, fd, event, EV_ADD|EV_ONESHOT, 0, 0, NULL); + return kevent(m_kq, &kev, 1, NULL, 0, NULL); + } + + int shot_reactivate(int fd, short event) + { + return add_notify(fd, event); + } + + int shot_remove(int fd, short event) + { return 0; } + + int remove(int fd, short event) + { + struct kevent kev; + EV_SET(&kev, fd, event, EV_DELETE, 0, 0, NULL); + return kevent(m_kq, &kev, 1, NULL, 0, NULL); + } + + struct backlog { + backlog() + { + buf = (struct kevent*)::calloc( + sizeof(struct kevent), + MP_WAVY_EDGE_BACKLOG_SIZE); + if(!buf) { throw std::bad_alloc(); } + } + + ~backlog() + { + ::free(buf); + } + + int operator[] (int n) const + { + return buf[n].ident; + } + + private: + struct kevent* buf; + friend class edge; + backlog(const backlog&); + }; + + int wait(backlog* result) + { + return kevent(m_kq, NULL, 0, result->buf, + MP_WAVY_EDGE_BACKLOG_SIZE, NULL); + } + + int wait(backlog* result, int timeout_msec) + { + struct timespec ts; + ts.tv_sec = timeout_msec / 1000; + ts.tv_nsec = (timeout_msec % 1000) * 1000000; + return kevent(m_kq, NULL, 0, result->buf, + MP_WAVY_EDGE_BACKLOG_SIZE, &ts); + } + +private: + int m_kq; + +private: + edge(const edge&); +}; + + +} // namespace wavy +} // namespace mp + +#endif /* wavy_edge_kqueue.h */ + diff --git a/src/mpsrc/wavy_listen.cc b/src/mpsrc/wavy_listen.cc new file mode 100644 index 0000000..18e2e6b --- /dev/null +++ b/src/mpsrc/wavy_listen.cc @@ -0,0 +1,72 @@ +// +// mp::wavy::core::listen +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "wavy_core.h" +#include "mp/exception.h" + +namespace mp { +namespace wavy { + + +class core::impl::listen_handler : public handler { +public: + listen_handler(int fd, core* c, listen_callback_t callback) : + handler(fd), m_core(c), m_callback(callback) { } + + ~listen_handler() { } + + void read_event() + { + while(true) { + int err = 0; + int sock = ::accept(fd(), NULL, NULL); + if(sock < 0) { + if(errno == EAGAIN || errno == EINTR) { + return; + } else { + err = errno; + } + } else if(sock == 0) { + err = errno; + } + + try { + m_core->submit(m_callback, sock, err); + } catch(...) { } + + if(err) { + throw system_error(errno, "mp::wvy::accept: accept failed"); + } + } + } + +private: + core* m_core; + listen_callback_t m_callback; +}; + + +void core::listen(int lsock, listen_callback_t callback) +{ + add(lsock, this, callback); +} + + +} // namespace wavy +} // namespace mp + diff --git a/src/mpsrc/wavy_output.cc b/src/mpsrc/wavy_output.cc new file mode 100644 index 0000000..eeb7567 --- /dev/null +++ b/src/mpsrc/wavy_output.cc @@ -0,0 +1,502 @@ +// +// mp::wavy::output +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "mp/wavy/output.h" +#include "mp/pthread.h" +#include "wavy_edge.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef MP_WAVY_WRITEV_LIMIT +#define MP_WAVY_WRITEV_LIMIT 1024 +#endif + +//#ifndef MP_WAVY_WRITE_QUEUE_LIMIT +//#define MP_WAVY_WRITE_QUEUE_LIMIT 32 +//#endif + +namespace mp { +namespace wavy { + + +class output::impl { +public: + impl(); + ~impl(); + + void add_thread(size_t num); + + void end(); + + void join(); + void detach(); + +public: + void writev(int fd, const iovec* bufvec, const request* reqvec, size_t veclen); + +private: + class context { + public: + context(); + ~context(); + + public: + pthread_mutex& mutex(); +#ifdef MP_WAVY_WRITE_QUEUE_LIMIT + void wait_cond(); +#endif + + bool push(const iovec* bufvec, + const request* reqvec, size_t veclen); + bool empty() const; + size_t size() const; + + iovec* vec(); + size_t veclen() const; + + bool skip_zero(); + + bool consumed(size_t num); + void clear(); + + private: + typedef std::vector bufvec_t; + typedef std::vector reqvec_t; + bufvec_t m_bufvec; + reqvec_t m_reqvec; + pthread_mutex m_mutex; +#ifdef MP_WAVY_WRITE_QUEUE_LIMIT + pthread_cond m_cond; + volatile bool m_wait; +#endif + + private: + context(const context&); + }; + + context* m_fdctx; + + class worker : public pthread_thread { + public: + worker(context* fdctx, volatile bool& end_flag); + ~worker(); + + public: + bool try_write_initial(int fd); + void watch(int fd); + + public: + void operator() (); + + private: + void initial_remove(int fd); + + void try_write(int fd); + void success_remove(int fd); + void failed_remove(int fd); + + private: + context* m_fdctx; + volatile bool& m_end_flag; + edge::backlog m_backlog; + edge m_edge; + + private: + worker(); + worker(const worker&); + }; + + volatile bool m_end_flag; + +private: + worker* worker_for(int fd); + + typedef std::vector workers_t; + workers_t m_workers; + +private: + impl(const impl&); +}; + + +output::output() : m_impl(new impl()) { } + +output::impl::impl() : + m_end_flag(false) +{ + struct rlimit rbuf; + if(::getrlimit(RLIMIT_NOFILE, &rbuf) < 0) { + throw system_error(errno, "getrlimit() failed"); + } + m_fdctx = new context[rbuf.rlim_cur]; +} + + +output::~output() { } + +output::impl::~impl() +{ + end(); + for(workers_t::iterator it(m_workers.begin()); + it != m_workers.end(); ++it) { + delete *it; + } + delete[] m_fdctx; +} + +void output::end() { m_impl->end(); } +void output::impl::end() +{ + m_end_flag = true; +} + +void output::join() { m_impl->join(); } +void output::impl::join() +{ + for(workers_t::iterator it(m_workers.begin()); + it != m_workers.end(); ++it) { + (*it)->join(); + } +} + +void output::detach() { m_impl->detach(); } +void output::impl::detach() +{ + for(workers_t::iterator it(m_workers.begin()); + it != m_workers.end(); ++it) { + (*it)->detach(); + } +} + +void output::add_thread(size_t num) { m_impl->add_thread(num); } +void output::impl::add_thread(size_t num) +{ + for(size_t i=0; i < num; ++i) { + m_workers.push_back(NULL); + try { + m_workers.back() = new worker(m_fdctx, m_end_flag); + } catch (...) { + m_workers.pop_back(); + throw; + } + m_workers.back()->run(); + } +} + +output::impl::worker* output::impl::worker_for(int fd) +{ + return m_workers[fd % m_workers.size()]; +} + + +output::impl::context::context() /*: m_wait(false)*/ { } + +output::impl::context::~context() { clear(); } + +inline bool output::impl::context::skip_zero() +{ + size_t offset = 0; + for(; offset < veclen() && vec()[offset].iov_len == 0; ++offset) { } + return consumed(offset); +} + +inline bool output::impl::context::consumed(size_t num) +{ + if(num == 0) { return m_bufvec.empty(); } + for(size_t i=0; i < num; ++i) { + if(m_reqvec[i].finalize) { + (*m_reqvec[i].finalize)(m_reqvec[i].user); + } + } + m_bufvec.erase(m_bufvec.begin(), m_bufvec.begin()+num); + m_reqvec.erase(m_reqvec.begin(), m_reqvec.begin()+num); +#ifdef MP_WAVY_WRITE_QUEUE_LIMIT + if(size() > MP_WAVY_WRITE_QUEUE_LIMIT) { + return false; + } + if(m_wait) { + m_cond.broadcast(); + m_wait = false; + } + return m_bufvec.empty(); +#else + return m_bufvec.empty(); +#endif +} + +inline void output::impl::context::clear() +{ + consumed(m_bufvec.size()); +} + +bool output::impl::context::push(const iovec* bufvec, + const request* reqvec, size_t veclen) +{ + bool watch_needed = m_bufvec.empty(); + m_bufvec.insert(m_bufvec.end(), bufvec, bufvec+veclen); + m_reqvec.insert(m_reqvec.end(), reqvec, reqvec+veclen); + return watch_needed; +} + +pthread_mutex& output::impl::context::mutex() +{ + return m_mutex; +} + +#ifdef MP_WAVY_WRITE_QUEUE_LIMIT +void output::impl::context::wait_cond() +{ + m_wait = true; + m_cond.wait(m_mutex); +} +#endif + +iovec* output::impl::context::vec() +{ + return &m_bufvec.front(); +} + +size_t output::impl::context::veclen() const +{ + return m_bufvec.size(); +} + +bool output::impl::context::empty() const +{ + return m_bufvec.empty(); +} + +size_t output::impl::context::size() const +{ + return m_bufvec.size(); +} + + +output::impl::worker::worker(context* fdctx, volatile bool& end_flag) : + pthread_thread(this), + m_fdctx(fdctx), + m_end_flag(end_flag) +{ } + +output::impl::worker::~worker() { } + +void output::impl::worker::watch(int fd) +{ + if(m_edge.add_notify(fd, EVEDGE_WRITE) < 0) { + // FIXME + failed_remove(fd); + } +} + +void output::impl::worker::operator() () +{ + while(!m_end_flag) { + int num = m_edge.wait(&m_backlog, 1000); + if(num < 0) { + if(errno == EINTR || errno == EAGAIN) { + continue; + } else { + throw system_error(errno, "wavy output event failed"); + } + } else if(num == 0) { + continue; + } + for(int i=0; i < num; ++i) { + int fd = m_backlog[i]; + try_write(fd); + } + } +} + +void output::impl::worker::try_write(int fd) +{ + context& ctx(m_fdctx[fd]); + pthread_scoped_lock lk(ctx.mutex()); + + if(ctx.skip_zero()) { + success_remove(fd); + return; + } + + ssize_t wl = ::writev(fd, ctx.vec(), + std::min(ctx.veclen(), (size_t)MP_WAVY_WRITEV_LIMIT)); + if(wl < 0) { + if(errno == EAGAIN || errno == EINTR) { + return; + } else { + failed_remove(fd); + return; + } + } else if(wl == 0) { + failed_remove(fd); + return; + } + + size_t i; + for(i=0; i < ctx.veclen(); ++i) { + if(static_cast(wl) >= ctx.vec()[i].iov_len) { + wl -= ctx.vec()[i].iov_len; + } else { + ctx.vec()[i].iov_base = (void*)(((char*)ctx.vec()[i].iov_base) + wl); + ctx.vec()[i].iov_len -= wl; + break; + } + } + if(ctx.consumed(i)) { + success_remove(fd); + return; + } + if(m_edge.shot_reactivate(fd, EVEDGE_WRITE) < 0) { + // FIXME + //failed_remove(fd); + } +} + +bool output::impl::worker::try_write_initial(int fd) +{ + context& ctx(m_fdctx[fd]); + + if(ctx.skip_zero()) { + return true; + } + + ssize_t wl = ::writev(fd, ctx.vec(), + std::min(ctx.veclen(), (size_t)MP_WAVY_WRITEV_LIMIT)); + if(wl < 0) { + if(errno == EAGAIN || errno == EINTR) { + return false; + } else { + initial_remove(fd); + return true; + } + } else if(wl == 0) { + initial_remove(fd); + return true; + } + + size_t i; + for(i=0; i < ctx.veclen(); ++i) { + if(static_cast(wl) >= ctx.vec()[i].iov_len) { + wl -= ctx.vec()[i].iov_len; + } else { + ctx.vec()[i].iov_base = (void*)(((char*)ctx.vec()[i].iov_base) + wl); + ctx.vec()[i].iov_len -= wl; + break; + } + } + return ctx.consumed(i); +} + +inline void output::impl::worker::initial_remove(int fd) +{ + m_fdctx[fd].clear(); + ::shutdown(fd, SHUT_RD); // FIXME shutdown() only break socket +} + +inline void output::impl::worker::failed_remove(int fd) +{ + m_fdctx[fd].clear(); + m_edge.shot_remove(fd, EVEDGE_WRITE); + // break fd positively + // input side will catch the exception. + ::shutdown(fd, SHUT_RD); // FIXME shutdown() only break socket +} + +inline void output::impl::worker::success_remove(int fd) +{ + m_edge.shot_remove(fd, EVEDGE_WRITE); // ignore error +} + + +void output::write(int fd, const char* buf, size_t buflen) +{ + struct iovec bufvec = {(void*)buf, buflen}; + request reqvec(NULL, NULL); + m_impl->writev(fd, &bufvec, &reqvec, 1); +} + +void output::writev(int fd, const iovec* vec, size_t veclen) +{ + request reqvec[veclen]; + memset(reqvec, 0, sizeof(request)*veclen); + m_impl->writev(fd, vec, reqvec, veclen); +} + +void output::write(int fd, const char* buf, size_t buflen, request req) +{ + struct iovec bufvec = {(void*)buf, buflen}; + m_impl->writev(fd, &bufvec, &req, 1); +} + +void output::write(int fd, const char* buf, size_t buflen, finalize_t finalize, void* user) +{ + struct iovec bufvec = {(void*)buf, buflen}; + request req(finalize, user); + m_impl->writev(fd, &bufvec, &req, 1); +} + +void output::writev(int fd, const iovec* vec, size_t veclen, finalize_t finalize, void* user) +{ + request reqvec[veclen]; + memset(reqvec, 0, sizeof(request)*(veclen-1)); + reqvec[veclen-1] = request(finalize, user); + m_impl->writev(fd, vec, reqvec, veclen); +} + +void output::writev(int fd, const iovec* vec, size_t veclen, request req) +{ + request reqvec[veclen]; + memset(reqvec, 0, sizeof(request)*(veclen-1)); + reqvec[veclen-1] = req; + m_impl->writev(fd, vec, reqvec, veclen); +} + +void output::writev(int fd, const iovec* bufvec, const request* reqvec, size_t veclen) +{ + m_impl->writev(fd, bufvec, reqvec, veclen); +} + +void output::impl::writev(int fd, const iovec* bufvec, const request* reqvec, size_t veclen) +{ + context& ctx(m_fdctx[fd]); + pthread_scoped_lock lk(ctx.mutex()); + if(ctx.push(bufvec, reqvec, veclen)) { + //worker_for(fd)->watch(fd); + if(!worker_for(fd)->try_write_initial(fd)) { + worker_for(fd)->watch(fd); + } + } else { +#ifdef MP_WAVY_WRITE_QUEUE_LIMIT + // FIXME sender or receiver must not wait to flush to avoid deadlock. + while(ctx.size() > MP_WAVY_WRITE_QUEUE_LIMIT) { + ctx.wait_cond(); + } +#endif + } +} + + +} // namespace wavy +} // namespace mp + diff --git a/src/mpsrc/wavy_timer.cc b/src/mpsrc/wavy_timer.cc new file mode 100644 index 0000000..c45fcb1 --- /dev/null +++ b/src/mpsrc/wavy_timer.cc @@ -0,0 +1,59 @@ +// +// mp::wavy::core::timer +// +// Copyright (C) 2008 FURUHASHI Sadayuki +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "wavy_core.h" +#include + +namespace mp { +namespace wavy { + + +class core::impl::timer_thread { +public: + timer_thread(core* c, + const timespec* interval, + timer_callback_t callback) : + m_interval(*interval), + m_core(c), m_callback(callback) { } + + void operator() () + { + while(!m_core->is_end()) { + nanosleep(&m_interval, NULL); + m_core->submit(m_callback); + } + } + +private: + const timespec m_interval; + core* m_core; + timer_callback_t m_callback; + timer_thread(); +}; + +void core::timer(const timespec* interval, timer_callback_t callback) +{ + add_thread(1); + impl::timer_thread t(this, interval, callback); + submit(t); +} + + +} // namespace wavy +} // namespace mp + diff --git a/src/rpc/Makefile.am b/src/rpc/Makefile.am new file mode 100644 index 0000000..5cda167 --- /dev/null +++ b/src/rpc/Makefile.am @@ -0,0 +1,40 @@ + +noinst_LIBRARIES = libkumo_rpc.a libkumo_cluster.a + +libkumo_rpc_a_CPPFLAGS = -I.. + +libkumo_cluster_a_CPPFLAGS = -I.. + +libkumo_rpc_a_SOURCES = \ + address.cc \ + session.cc + +libkumo_cluster_a_SOURCES = \ + $(libkumo_rpc_a_SOURCES) \ + server.cc \ + cluster.cc + +noinst_HEADERS = \ + address.h \ + client.h \ + client_tmpl.h \ + cluster.h \ + connection.h \ + message.h \ + request.h \ + protocol.h \ + responder.h \ + responder_impl.h \ + retry.h \ + rpc.h \ + server.h \ + session.h \ + session_impl.h \ + transport.h \ + transport_impl.h \ + types.h \ + vrefbuffer.h \ + wavy.h \ + weak_responder.h \ + weak_responder_impl.h + diff --git a/src/rpc/address.cc b/src/rpc/address.cc new file mode 100644 index 0000000..ef67b2e --- /dev/null +++ b/src/rpc/address.cc @@ -0,0 +1,106 @@ +#include "rpc/address.h" +#include +#include + +namespace rpc { + + +address::address(const struct sockaddr_in& addr) +{ +#ifdef KUMO_IPV6 + m_serial_length = 6; + memcpy(&m_serial_address[0], &addr.sin_port, 2); + memcpy(&m_serial_address[2], &addr.sin_addr.s_addr, 4); +#else + m_serial = addr.sin_addr.s_addr; + m_serial <<= 16; + m_serial |= addr.sin_port; +#endif +} + +#ifdef KUMO_IPV6 +address::address(const struct sockaddr_in6& addr) +{ + m_serial_length = 22; + memcpy(&m_serial_address[0], &addr.sin6_port, 2); + memcpy(&m_serial_address[2], addr.sin6_addr.s6_addr, 16); + memcpy(&m_serial_address[18], &addr.sin6_scope_id, 4); +} +#endif + +address::address(const char* ptr, unsigned int len) +{ +#ifdef KUMO_IPV6 + if(len != 6 && len != 22) { + throw std::runtime_error("unknown address type"); + } + + memcpy(m_serial_address, ptr, len); + m_serial_length = len; + +#else + if(len != 6) { + throw std::runtime_error("unknown address type"); + } + + m_serial = 0; + memcpy(&m_serial, ptr, len); // FIXME +#endif +} + + +void address::getaddr(sockaddr* addrbuf) const +{ +#ifdef KUMO_IPV6 + if(m_serial_length == 6) { + sockaddr_in* addr = reinterpret_cast(addrbuf); + + memset(addr, 0, sizeof(sockaddr_in)); + addr->sin_family = AF_INET; + addr->sin_port = raw_port(); + addr->sin_addr.s_addr = *((uint32_t*)&m_serial_address[2]); + + } else { + sockaddr_in6* addr = reinterpret_cast(addrbuf); + + memset(addr, 0, sizeof(sockaddr_in6)); + addr->sin6_family = AF_INET6; + addr->sin6_port = raw_port(); + memcpy(addr->sin6_addr.s6_addr, &m_serial_address[2], 16); + addr->sin6_scope_id = *((uint32_t*)&m_serial_address[18]); + } + +#else + sockaddr_in* addr = reinterpret_cast(addrbuf); + + memset(addr, 0, sizeof(sockaddr_in)); + addr->sin_family = AF_INET; + addr->sin_port = raw_port(); + addr->sin_addr.s_addr = (uint32_t)(m_serial >> 16); +#endif +} + + +std::ostream& operator<< (std::ostream& stream, const address& addr) +{ +#ifdef KUMO_IPV6 + if(addr.m_serial_length == 6) { + uint32_t sa = *(uint32_t*)&addr.m_serial_address[2]; + char buf[16]; + return stream << ::inet_ntop(AF_INET, &sa, buf, sizeof(buf)) << ':' << ntohs(addr.raw_port()); + } else { + unsigned char sa[16]; + char buf[41]; + memcpy(sa, &addr.m_serial_address[2], sizeof(sa)); + return stream << '[' << ::inet_ntop(AF_INET6, sa, buf, sizeof(buf)) << "]:" << ntohs(addr.raw_port()); + } +#else + uint32_t sa = (uint32_t)(addr.m_serial >> 16); + char buf[16]; + return stream << ::inet_ntop(AF_INET, &sa, buf, sizeof(buf)) << ':' << ntohs(addr.raw_port()); +#endif +} + + +} // namespace rpc + diff --git a/src/rpc/address.h b/src/rpc/address.h new file mode 100644 index 0000000..da9fe9d --- /dev/null +++ b/src/rpc/address.h @@ -0,0 +1,213 @@ +#ifndef RPC_ADDRESS_H__ +#define RPC_ADDRESS_H__ + +#include +#include +#include +#include +#include +#include + +#include + +namespace rpc { + + +class address { +public: + address(); + address(const struct sockaddr_in& addr); +#ifdef KUMO_IPV6 + address(const struct sockaddr_in6& addr); +#endif + address(const char* ptr, unsigned int len); +// address(const address& o); + +public: + unsigned int dump_size() const; + const char* dump() const; + + static const unsigned int MAX_DUMP_SIZE = 22; + + bool connectable() const; + +private: + // +--+----+ + // | 2| 4 | + // +--+----+ + // port network byte order + // IPv4 address + // + // +--+----------------+----+ + // | 2| 16 | 4 | + // +--+----------------+----+ + // port network byte order + // IPv6 address + // scope id +#ifdef KUMO_IPV6 + char m_serial_address[22]; + unsigned int m_serial_length; // 6 or 22 +#else + uint64_t m_serial; +#endif + +public: + socklen_t addrlen() const; + void getaddr(sockaddr* addrbuf) const; + uint16_t port() const; + void set_port(uint16_t p); +private: + uint16_t raw_port() const; + +public: + bool operator== (const address& addr) const; + bool operator!= (const address& addr) const; + bool operator< (const address& addr) const; + bool operator> (const address& addr) const; + + friend std::ostream& operator<< (std::ostream& stream, const address& addr); +}; + +std::ostream& operator<< (std::ostream& stream, const address& addr); + + +inline address::address() : +#ifdef KUMO_IPV6 + m_serial_length(0) +{ + *((uint16_t*)&m_serial_address[0]) = 0; +} +#else + m_serial(0) +{ } +#endif + +//inline address::address(const address& o) : +// m_serial_length(o.m_serial_length) +//{ +// memcpy(m_serial_address, o.m_serial_address, m_serial_length); +//} + +inline unsigned int address::dump_size() const +{ +#ifdef KUMO_IPV6 + return m_serial_length; +#else + return 6; +#endif +} +inline const char* address::dump() const +{ +#ifdef KUMO_IPV6 + return m_serial_address; +#else + return (char*)&m_serial; +#endif +} + +inline uint16_t address::port() const +{ + return ntohs(raw_port()); +} + +inline void address::set_port(uint16_t p) +{ +#ifdef KUMO_IPV6 + *((uint16_t*)m_serial_address) = htons(p); +#else + m_serial &= 0x0000ffffffffffffULL; + m_serial |= htons(p); +#endif +} + +inline bool address::connectable() const +{ + return raw_port() != 0; +} + +inline socklen_t address::addrlen() const +{ +#ifdef KUMO_IPV6 + return m_serial_length == 6 ? + sizeof(sockaddr_in) : sizeof(sockaddr_in6); +#else + return sizeof(sockaddr_in); +#endif +} + +inline bool address::operator== (const address& addr) const +{ +#ifdef KUMO_IPV6 + return m_serial_length == addr.m_serial_length && + memcmp(m_serial_address, addr.m_serial_address, m_serial_length) == 0; +#else + return m_serial == addr.m_serial; +#endif +} + +inline bool address::operator!= (const address& addr) const +{ + return !(*this == addr); +} + +inline bool address::operator< (const address& addr) const +{ +#ifdef KUMO_IPV6 + if(m_serial_length == addr.m_serial_length) { + return memcmp(m_serial_address, addr.m_serial_address, m_serial_length) < 0; + } else { + return m_serial_length < addr.m_serial_length; + } +#else + return m_serial < addr.m_serial; +#endif +} + +inline bool address::operator> (const address& addr) const +{ +#ifdef KUMO_IPV6 + if(m_serial_length == addr.m_serial_length) { + return memcmp(m_serial_address, addr.m_serial_address, m_serial_length) > 0; + } else { + return m_serial_length > addr.m_serial_length; + } +#else + return m_serial > addr.m_serial; +#endif +} + +inline uint16_t address::raw_port() const +{ +#ifdef KUMO_IPV6 + return *((uint16_t*)&m_serial_address[0]); +#else + return (uint16_t)m_serial; +#endif +} + + +#ifdef MSGPACK_OBJECT_HPP__ +inline address& operator>> (msgpack::object o, address& v) +{ + using namespace msgpack; + if(o.type != type::RAW) { throw type_error(); } + v = address(o.via.raw.ptr, o.via.raw.size); + return v; +} + +template +inline msgpack::packer& operator<< (msgpack::packer& o, const address& v) +{ + using namespace msgpack; + o.pack_raw(v.dump_size()); + o.pack_raw_body(v.dump(), v.dump_size()); + return o; +} +#endif + + +} // namespace rpc + + +#endif /* rpc/address.h */ + diff --git a/src/rpc/client.h b/src/rpc/client.h new file mode 100644 index 0000000..542c2f9 --- /dev/null +++ b/src/rpc/client.h @@ -0,0 +1,97 @@ +#ifndef RPC_CLIENT_H__ +#define RPC_CLIENT_H__ + +#include "rpc/rpc.h" +#include "rpc/protocol.h" +#include "log/mlogger.h" // FIXME +#include +#include + +namespace rpc { + + +template +class client : public session_manager, public transport_manager { +public: + typedef mp::shared_ptr shared_session; + typedef mp::weak_ptr weak_session; + + typedef mp::function callback_t; + +public: + client(unsigned int connect_timeout_msec, + unsigned short connect_retry_limit); + + virtual ~client(); + + virtual void transport_lost(shared_session& s); + + virtual void connect_failed(shared_session s, address addr, int error) + { + transport_lost(s); + } + + virtual void dispatch( + shared_session from, weak_responder response, + method_id method, msgobj param, auto_zone z) = 0; + +public: + // step callback timeout count + void step_timeout(); + + // get/create RPC stub instance for the address. + // if the session is not exist, connect to the session + shared_session get_session(const address& addr); + + // get/create RPC stub instance for the address. + // if the session is not exist, don't connect to the session + // and returns unbound RPC stub instance. + shared_session create_session(const address& addr); + + // add new connection and new managed Session and bind them. + shared_session add(int fd, const address& addr); + +protected: + // connect session to the address and return true if + // it is not bound. + bool async_connect(const address& addr, shared_session& s); + +private: + template + shared_session get_session_impl(const address& addr); + +private: + mp::pthread_rwlock m_sessions_rwlock; + typedef std::multimap sessions_t; + sessions_t m_sessions; + + struct connect_pack { + shared_session session; + address addr; + }; + + void connect_callback(address addr, shared_session s, int fd, int err); + +protected: + unsigned int m_connect_timeout_msec; + unsigned short m_connect_retry_limit; + +public: + virtual void dispatch_request( + basic_shared_session& s, weak_responder response, + method_id method, msgobj param, auto_zone z); + + virtual void transport_lost_notify(basic_shared_session& s); + +private: + client(); + client(const client&); +}; + + +} // namespace rpc + +#include "rpc/client_tmpl.h" + +#endif /* rpc/client.h */ + diff --git a/src/rpc/client_tmpl.h b/src/rpc/client_tmpl.h new file mode 100644 index 0000000..bf68b7a --- /dev/null +++ b/src/rpc/client_tmpl.h @@ -0,0 +1,219 @@ +#ifndef RPC_CLIENT_TMPL_H__ +#define RPC_CLIENT_TMPL_H__ + +#include +#include +#include +#include + +namespace rpc { + + +template +client::client( + unsigned int connect_timeout_msec, + unsigned short connect_retry_limit) : + m_connect_timeout_msec(connect_timeout_msec), + m_connect_retry_limit(connect_retry_limit) +{ } + +template +client::~client() { } + + +template +template +typename client::shared_session +client::get_session_impl(const address& addr) +{ + shared_session s; + + { + pthread_scoped_rdlock rdlk(m_sessions_rwlock); + + std::pair pair = + m_sessions.equal_range(addr); + + while(pair.first != pair.second) { + s = pair.first->second.lock(); + if(s && !s->is_lost()) { return s; } + ++pair.first; + //m_sessions.erase(pair.first++); + } + } + + // ほとんどの場合rwlockだけでヒットする + { + pthread_scoped_wrlock wrlk(m_sessions_rwlock); + + std::pair pair = + m_sessions.equal_range(addr); + + while(pair.first != pair.second) { + s = pair.first->second.lock(); + if(s && !s->is_lost()) { return s; } + //++pair.first; + m_sessions.erase(pair.first++); + } + + LOG_TRACE("no session exist, creating ",addr); + s.reset(new Session(this)); + m_sessions.insert( typename sessions_t::value_type( + addr, weak_session(s)) ); + } + + if(!CONNECT) { + return s; + } + + async_connect(addr, s); + return s; +} + + +template +typename client::shared_session +client::get_session(const address& addr) +{ + LOG_TRACE("get session ",addr); + return get_session_impl(addr); +} + + +template +typename client::shared_session +client::create_session(const address& addr) +{ + LOG_TRACE("create session ",addr); + return get_session_impl(addr); +} + + +template +typename client::shared_session +client::add(int fd, const address& addr) +{ + shared_session s(new Session(this)); + wavy::add(fd, s, this); + + pthread_scoped_wrlock lk(m_sessions_rwlock); + m_sessions.insert( typename sessions_t::value_type(addr, s) ); + return s; +} + + +template +bool client::async_connect( + const address& addr, shared_session& s) +{ + // rough check + if(!s->is_lost() && s->is_bound()) { return false; } + + LOG_INFO("connecting to ",addr); + char addrbuf[addr.addrlen()]; + addr.getaddr((sockaddr*)&addrbuf); + + using namespace mp::placeholders; + wavy::connect(PF_INET, SOCK_STREAM, 0, + (sockaddr*)addrbuf, sizeof(addrbuf), + m_connect_timeout_msec, + mp::bind( + &client::connect_callback, + this, addr, s, _1, _2)); + + s->increment_connect_retried_count(); + return true; +} + +template +void client::connect_callback( + address addr, shared_session s, int fd, int err) +{ + if(fd >= 0) { +#ifdef USE_TCP_NODELAY + // XXX + int on = 1; + ::setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); // ignore error +#endif + LOG_INFO("connect success ",addr," fd(",fd,")"); + try { + basic_shared_session bs(mp::static_pointer_cast(s)); + wavy::add(fd, bs, this); + } catch (...) { + ::close(fd); + throw; + } + return; + } + +//error: + LOG_INFO("connect failed ",addr,": ",strerror(err)); + if(s->connect_retried_count() > m_connect_retry_limit) { + connect_failed(s, addr, err); + return; + } + + // retry connect + // FIXME: retry only when err == ETIMEDOUT? + async_connect(addr, s); +} + + +template +void client::transport_lost(shared_session& s) +{ + msgpack::object res; + res.type = msgpack::type::NIL; + msgpack::object err; + err.type = msgpack::type::POSITIVE_INTEGER; + err.via.u64 = protocol::NODE_LOST_ERROR; + + s->force_lost(res, err); +} + + +template +void client::dispatch_request( + basic_shared_session& s, weak_responder response, + method_id method, msgobj param, auto_zone z) +{ + dispatch(mp::static_pointer_cast(s), + response, method, param, z); +} + + +template +void client::step_timeout() +{ + LOG_TRACE("step timeout ..."); + + pthread_scoped_wrlock lk(m_sessions_rwlock); + for(typename sessions_t::iterator it(m_sessions.begin()), + it_end(m_sessions.end()); it != it_end; ) { + shared_session s(it->second.lock()); + if(s && !s->is_lost()) { + wavy::submit(&basic_session::step_timeout, + s, + mp::static_pointer_cast(s)); + ++it; + } else { + m_sessions.erase(it++); + } + } + + LOG_TRACE("step timeout done"); +} + + +template +void client::transport_lost_notify(basic_shared_session& s) +{ + shared_session x(mp::static_pointer_cast(s)); + transport_lost(x); +} + + +} // namespace rpc + +#endif /* rpc/client.h */ + diff --git a/src/rpc/cluster.cc b/src/rpc/cluster.cc new file mode 100644 index 0000000..25c6b9a --- /dev/null +++ b/src/rpc/cluster.cc @@ -0,0 +1,250 @@ +#include "rpc/cluster.h" + +namespace rpc { + + +cluster_transport::cluster_transport(int fd, + basic_shared_session s, transport_manager* srv) : + basic_transport(fd, s, srv), + connection(fd), + m_process_state(NULL) +{ + send_init(); + s->bind_transport(this); +} + +cluster_transport::cluster_transport(int fd, + transport_manager* srv) : + basic_transport(fd, basic_shared_session(), srv), // null session + connection(fd), + m_process_state(NULL) +{ } + +cluster_transport::~cluster_transport() +{ + if(m_session) { + m_session->unbind_transport(this, m_session); + } +} + +void cluster_transport::rebind(basic_shared_session s) +{ + if(m_session) { + m_session->unbind_transport(this, m_session); + } + m_session = s; + s->bind_transport(this); +} + +void cluster_transport::send_init() +{ + msgpack::sbuffer buf; + rpc_initmsg param( + get_server()->m_self_addr, + get_server()->m_self_id); + msgpack::pack(buf, param); + + wavy::request req(&::free, buf.data()); + wavy::write(fd(), buf.data(), buf.size(), req); + buf.release(); + LOG_TRACE("sent init message"); +} + +cluster* cluster_transport::get_server() + { return get_server(get_manager()); } + +cluster* cluster_transport::get_server(transport_manager* srv) + { return static_cast(srv); } + + + +node::node(session_manager* mgr) : + session(mgr), m_role(-1) { } + +node::~node() { } + +bool node::set_role(role_type role_id) +{ + //if(m_role == -1) { m_role = role_id; return true; } + //else { return false; } + return __sync_bool_compare_and_swap(&m_role, -1, role_id); +} + +void cluster_transport::init_message(msgobj msg, auto_zone z) +{ + rpc_message rpc(msg.convert()); + + if(!rpc.is_cluster_init()) { + // server node + LOG_DEBUG("enter subsys state ",msg); + if(m_session) { throw msgpack::type_error(); } + + cluster::subsys* sub = + static_cast(&get_server()->subsystem()); + rebind( sub->add_session() ); + + m_process_state = &cluster_transport::subsys_state; + + // re-process this message + submit_message(msg, z); + return; + } + + // cluster node + rpc_initmsg init(msg.convert()); + + LOG_TRACE("receive init message: ",(uint16_t)init.role_id()," ",init.addr()); + + if(!m_session) { + if(!init.addr().connectable()) { + throw std::runtime_error("invalid address"); + } + + send_init(); + rebind( get_server()->create_session(init.addr()) ); + } + + node* n = static_cast(m_session.get()); + if(n->set_role(init.role_id())) { + n->m_addr = init.addr(); + // FIXME submit? + wavy::submit(&cluster::new_node, get_server(), + init.addr(), init.role_id(), + mp::static_pointer_cast(m_session)); + } + + m_process_state = &cluster_transport::cluster_state; +} + +void cluster_transport::subsys_state(msgobj msg, msgpack::zone* newz) +{ + auto_zone z(newz); +// LOG_TRACE("receive rpc message: ",msg); + rpc_message rpc(msg.convert()); + + if(rpc.is_request()) { + rpc_request msgreq(msg.convert()); + weak_responder response(m_session, msgreq.msgid()); + get_server()->subsystem_dispatch( + mp::static_pointer_cast(m_session), + response, msgreq.method(), msgreq.param(), z); + + } else { + rpc_response msgres(msg.convert()); + basic_transport::process_response( + msgres.result(), msgres.error(), msgres.msgid(), z); + } +} + +void cluster_transport::cluster_state(msgobj msg, msgpack::zone* newz) +{ + auto_zone z(newz); +// LOG_TRACE("receive rpc message: ",msg); + rpc_message rpc(msg.convert()); + + if(rpc.is_request()) { + rpc_request msgreq(msg.convert()); + weak_responder response(m_session, msgreq.msgid()); + get_server()->cluster_dispatch( + mp::static_pointer_cast(m_session), + response, msgreq.method(), msgreq.param(), z); + + } else { + rpc_response msgres(msg.convert()); + basic_transport::process_response( + msgres.result(), msgres.error(), msgres.msgid(), z); + } +} + + + +cluster::cluster(role_type self_id, + const address& self_addr, + unsigned int connect_timeout_msec, + unsigned short connect_retry_limit) : + client_t(connect_timeout_msec, connect_retry_limit), + m_self_id(self_id), + m_self_addr(self_addr), + m_subsystem(this) { } + +cluster::~cluster() { } + +void cluster::accepted(int fd) +{ + wavy::add(fd, (client_t*)this); +} + + +shared_node cluster::get_node(const address& addr) +{ + shared_node n( get_session(addr) ); + if(!n->addr().connectable()) { + n->m_addr = addr; + } + return n; +} + +void cluster::transport_lost(shared_node& n) +{ + if(n->connect_retried_count() > m_connect_retry_limit) { + LOG_DEBUG("give up to reconnect ",n->addr()); + client_t::transport_lost(n); + + if(n->is_role_set()) { + // node is lost + lost_node(n->addr(), n->role()); + } + + } else if(n->addr().connectable()) { + LOG_DEBUG("reconnect to ",n->addr()); + async_connect(n->addr(), n); + + } else { + // FIXME non-connectable node? + LOG_DEBUG("lost node is not connectable ",n->addr()); + client_t::transport_lost(n); + } +} + + + +cluster::subsys::subsys(cluster* srv) : + m_srv(srv) { } + +cluster::subsys::~subsys() { } + +basic_shared_session cluster::subsys::add_session() +{ + basic_shared_session s(new peer(this)); + void* k = (void*)s.get(); + + pthread_scoped_lock lk(m_peers_mutex); + m_peers.insert( peers_t::value_type(k, basic_weak_session(s)) ); + return s; +} + + + +// connection::submit_message is hooked. +// transport::process_request won't be called. + +void cluster::dispatch( + shared_node from, weak_responder response, + method_id method, msgobj param, auto_zone z) +{ + throw std::logic_error("cluster::dispatch called"); +} + +void cluster::subsys::dispatch( + shared_peer from, weak_responder response, + method_id method, msgobj param, auto_zone z) +{ + throw std::logic_error("cluster::subsys::dispatch called"); +} + + +// FIXME step_timeout: +// remove that is_role_set() == false? + +} // namespace rpc + diff --git a/src/rpc/cluster.h b/src/rpc/cluster.h new file mode 100644 index 0000000..4b4673e --- /dev/null +++ b/src/rpc/cluster.h @@ -0,0 +1,241 @@ +#ifndef RPC_CLUSTER_H__ +#define RPC_CLUSTER_H__ + +#include "rpc/client.h" +#include "rpc/server.h" +#include +#include +#include + +namespace rpc { + + +class cluster; +class cluster_transport; + + +class node : public session { +public: + node(session_manager* mgr); + ~node(); + +public: + const address& addr() const { return m_addr; } + bool is_role_set() const { return m_role >= 0; } + role_type role() const { return m_role; } + +private: + address m_addr; + friend class cluster; + +private: + inline bool set_role(role_type role_id); + friend class cluster_transport; + short m_role; + +private: + node(); + node(const node&); +}; + +typedef mp::shared_ptr shared_node; +typedef mp::weak_ptr weak_node; + + +class cluster_transport : public basic_transport, public connection { +public: + // cluster::get_node + cluster_transport(int fd, basic_shared_session s, transport_manager* srv); + + // cluster::accepted + cluster_transport(int fd, transport_manager* srv); + + ~cluster_transport(); + + void submit_message(msgobj msg, auto_zone& z); + +private: + void send_init(); + void rebind(basic_shared_session s); + cluster* get_server(); + cluster* get_server(transport_manager* srv); + +private: + static const short PEER_NOT_SET = -1; + static const short PEER_SERVER = -2; + + void (cluster_transport::*m_process_state)(msgobj msg, msgpack::zone* newz); + + void init_message(msgobj msg, auto_zone z); + void subsys_state(msgobj msg, msgpack::zone* newz); + void cluster_state(msgobj msg, msgpack::zone* newz); + +private: + cluster_transport(); + cluster_transport(const cluster_transport&); +}; + +inline void cluster_transport::submit_message(msgobj msg, auto_zone& z) +{ + if(!m_process_state) { + init_message(msg, z); + } else { + // FIXME better performance? + //(this->*m_process_state)(msg, z.release()); + wavy::submit(m_process_state, + shared_self(), + msg, z.get()); + z.release(); + } +} + + +class cluster : protected client { +public: + typedef client client_t; + + typedef rpc::shared_peer shared_session; + typedef rpc::weak_peer weak_session; + + cluster(role_type self_id, + const address& self_addr, + unsigned int connect_timeout_msec, + unsigned short connect_retry_limit); + + virtual ~cluster(); + + // called when new node is connected. + virtual void new_node(address addr, role_type id, shared_node n) { } + + // called when node is lost. + virtual void lost_node(address addr, role_type id) { } + + + virtual void cluster_dispatch( + shared_node from, weak_responder response, + method_id method, msgobj param, auto_zone z) = 0; + + virtual void subsystem_dispatch( + shared_peer from, weak_responder response, + method_id method, msgobj param, auto_zone z) + { + throw msgpack::type_error(); + } + +public: + // step timeout count. + void step_timeout(); + + // add accepted connection + void accepted(int fd); + + // get/create RPC stub instance for the address. + shared_node get_node(const address& addr); + + // return self address; + const address& addr() const; + + // get server interface. + // it manages non-cluster clients. + server& subsystem(); + +private: + void transport_lost(shared_node& s); + +private: + role_type m_self_id; + address m_self_addr; + friend class cluster_transport; + +private: + virtual void dispatch( + shared_node from, weak_responder response, + method_id method, msgobj param, auto_zone z); + +private: + class subsys : public server { + public: + subsys(cluster* srv); + ~subsys(); + + public: + void dispatch( + shared_peer from, weak_responder response, + method_id method, msgobj param, auto_zone z); + + basic_shared_session add_session(); + + private: + cluster* m_srv; + + private: + subsys(); + subsys(const subsys&); + }; + + subsys m_subsystem; + +private: + cluster(); + cluster(const cluster&); +}; + + +inline void cluster::step_timeout() +{ + client_t::step_timeout(); + m_subsystem.step_timeout(); +} + +inline const address& cluster::addr() const +{ + return m_self_addr; +} + +inline server& cluster::subsystem() +{ + return static_cast(m_subsystem); +} + + + +template +class request { +public: + request(shared_node from, msgobj param) : + m_from(from) + { + param.convert(&m_param); + } + +public: + Parameter& param() + { + return m_param; + } + + const Parameter& param() const + { + return m_param; + } + + shared_node& node() + { + return m_from; + } + +private: + Parameter m_param; + shared_node m_from; + +private: + request(); + request(const request&); +}; + + + +} // namespace rpc + +#endif /* rpc/cluster.h */ + diff --git a/src/rpc/connection.h b/src/rpc/connection.h new file mode 100644 index 0000000..fa6487f --- /dev/null +++ b/src/rpc/connection.h @@ -0,0 +1,158 @@ +#ifndef RPC_CONNECTION_H__ +#define RPC_CONNECTION_H__ + +#include "log/mlogger.h" //FIXME +#include "rpc/types.h" +#include "rpc/protocol.h" +#include "rpc/wavy.h" +#include +#include +#include +#include +#include +#include +#include + +#ifndef RPC_INITIAL_BUFFER_SIZE +#define RPC_INITIAL_BUFFER_SIZE (64*1024) +#endif + +#ifndef RPC_BUFFER_RESERVATION_SIZE +#define RPC_BUFFER_RESERVATION_SIZE (8*1024) +#endif + +namespace rpc { + + +template +class connection : public mp::wavy::handler { +public: + connection(int fd); + virtual ~connection(); + +public: + // from wavy: readable notification + void read_event(); + + void submit_message(msgobj msg, auto_zone& z); + + void process_message(msgobj msg, msgpack::zone* newz); + + void process_request(method_id method, msgobj param, msgid_t msgid, auto_zone& z); + + void dispatch_request(method_id method, msgobj param, responder& response, auto_zone& z) + { + throw msgpack::type_error(); + } + + void process_response(msgobj result, msgobj error, msgid_t msgid, auto_zone& z) + { + throw msgpack::type_error(); + } + +private: + msgpack::unpacker m_pac; + +private: + connection(); + connection(const connection&); +}; + + +template +connection::connection(int fd) : + mp::wavy::handler(fd), + m_pac(RPC_INITIAL_BUFFER_SIZE) { } + +template +connection::~connection() { } + + +template +void connection::connection::read_event() +try { + m_pac.reserve_buffer(RPC_BUFFER_RESERVATION_SIZE); + + ssize_t rl = ::read(fd(), m_pac.buffer(), m_pac.buffer_capacity()); + if(rl < 0) { + if(errno == EAGAIN || errno == EINTR) { + return; + } else { + throw std::runtime_error("read error"); + } + } else if(rl == 0) { + throw std::runtime_error("connection closed"); + } + + m_pac.buffer_consumed(rl); + + while(m_pac.execute()) { + msgobj msg = m_pac.data(); + std::auto_ptr z( m_pac.release_zone() ); + m_pac.reset(); + static_cast(this)->submit_message(msg, z); + } + +} catch(msgpack::type_error& e) { + LOG_ERROR("rpc packet: type error"); + throw; +} catch(std::exception& e) { + LOG_WARN("rpc packet: ", e.what()); + throw; +} catch(...) { + LOG_ERROR("rpc packet: unknown error"); + throw; +} + +template +inline void connection::submit_message(msgobj msg, auto_zone& z) +{ + wavy::submit(&IMPL::process_message, + shared_self(), msg, z.get()); + z.release(); + //static_cast(this)->process_message(msg, z.release()); +} + +template +void connection::process_message(msgobj msg, msgpack::zone* newz) +try { + auto_zone z(newz); + rpc_message rpc(msg.convert()); + + if(rpc.is_request()) { + rpc_request msgreq(msg.convert()); + static_cast(this)->process_request( + msgreq.method(), msgreq.param(), msgreq.msgid(), z); + + } else { + rpc_response msgres(msg.convert()); + static_cast(this)->process_response( + msgres.result(), msgres.error(), msgres.msgid(), z); + } + +} catch(msgpack::type_error& e) { + LOG_ERROR("rpc packet: type error"); + throw; +} catch(std::exception& e) { + LOG_WARN("rpc packet: ", e.what()); + throw; +} catch(...) { + LOG_ERROR("rpc packet: unknown error"); + throw; +} + + +template +void connection::process_request(method_id method, msgobj param, + msgid_t msgid, auto_zone& z) +{ + responder response(fd(), msgid); + static_cast(this)->dispatch_request( + method, param, response, z); +} + + +} // namespace rpc + +#endif /* rpc/connection.h */ + diff --git a/src/rpc/message.h b/src/rpc/message.h new file mode 100644 index 0000000..4dba4cc --- /dev/null +++ b/src/rpc/message.h @@ -0,0 +1,19 @@ +#ifndef RPC_MESSAGE_H__ +#define RPC_MESSAGE_H__ + +#include + +namespace rpc { + + +template +struct message { + typedef Session session_type; + typedef rpc::method method; +}; + + +} // namespace rpc + +#endif /* rpc/message.h */ + diff --git a/src/rpc/protocol.h b/src/rpc/protocol.h new file mode 100644 index 0000000..b903603 --- /dev/null +++ b/src/rpc/protocol.h @@ -0,0 +1,152 @@ +#ifndef RPC_PROTOCOL_H__ +#define RPC_PROTOCOL_H__ + +#include +#include "rpc/types.h" +#include "rpc/address.h" + +namespace rpc { + + +namespace protocol { + using msgpack::define; + using msgpack::type::tuple; + using msgpack::type::raw_ref; + + static const int TRANSPORT_LOST_ERROR = 1; + static const int NODE_LOST_ERROR = 2; + static const int TIMEOUT_ERROR = 3; + static const int UNKNOWN_ERROR = 4; + static const int PROTOCOL_ERROR = 5; + static const int SERVER_ERROR = 6; +} + + +typedef uint16_t protocol_id; +typedef uint16_t version_id; + +struct method_id { + method_id() { } + method_id(uint32_t id) : m(id) { } + + uint32_t get() const { return m; } + + void msgpack_unpack(uint32_t id) { m = id; } + + template + void msgpack_pack(Packer& pk) const { pk.pack(m); } + +private: + uint32_t m; +}; + + +template +struct method : public method_id { + static const uint16_t protocol = Protocol; + static const uint16_t version = Version; + static const uint32_t id = Protocol << 16 | Version; + method() : method_id(id) { } +}; + + +typedef uint16_t rpc_type_t; + +namespace rpc_type { + static const rpc_type_t MESSAGE_REQUEST = 0; + static const rpc_type_t MESSAGE_RESPONSE = 1; + static const rpc_type_t CLUSTER_INIT = 2; +} // namespace rpc_type + + +struct rpc_message : msgpack::define< msgpack::type::tuple > { + rpc_message() { } + bool is_request() const { return get<0>() == rpc_type::MESSAGE_REQUEST; } + //bool is_response() const { return get<0>() == rpc_type::MESSAGE_RESPONSE; } + bool is_cluster_init() const { return get<0>() == rpc_type::CLUSTER_INIT; } +}; + + +template +struct rpc_request : msgpack::define< + msgpack::type::tuple > { + + typedef rpc_request this_t; + + rpc_request() { } + + rpc_request( + method_id method, + typename msgpack::type::tuple_type::transparent_reference params, + msgid_t msgid) : + this_t::define_type(typename this_t::msgpack_type( + rpc_type::MESSAGE_REQUEST, + msgid, + method, + params + )) { } + + msgid_t msgid() const { return this_t::msgpack_type::template get<1>(); } + + method_id method() const { return this_t::msgpack_type::template get<2>(); } + + typename msgpack::type::tuple_type::const_reference + param() const { return this_t::msgpack_type::template get<3>(); } +}; + + +template +struct rpc_response : msgpack::define< + msgpack::type::tuple > { + + typedef rpc_response this_t; + + rpc_response() { } + + rpc_response( + typename msgpack::type::tuple_type::transparent_reference res, + typename msgpack::type::tuple_type::transparent_reference err, + msgid_t msgid) : + this_t::define_type(typename this_t::msgpack_type( + rpc_type::MESSAGE_RESPONSE, + msgid, + err, + res + )) { } + + msgid_t msgid() const { return this_t::msgpack_type::template get<1>(); } + + typename msgpack::type::tuple_type::const_reference + error() const { return this_t::msgpack_type::template get<2>(); } + + typename msgpack::type::tuple_type::const_reference + result() const { return this_t::msgpack_type::template get<3>(); } +}; + + +struct rpc_initmsg : msgpack::define< + msgpack::type::tuple > { + + typedef rpc_initmsg this_t; + + rpc_initmsg() { } + + rpc_initmsg( + const address& addr, + role_type id) : + this_t::define_type(msgpack_type( + rpc_type::CLUSTER_INIT, + msgpack::type::raw_ref(addr.dump(), addr.dump_size()), + id + )) { } + + address addr() const { return address(get<1>().ptr, get<1>().size); } + + role_type role_id() const { return get<2>(); } +}; + + +} // namespace rpc + +#endif /* rpc/protocol.h */ + diff --git a/src/rpc/request.h b/src/rpc/request.h new file mode 100644 index 0000000..7b701f3 --- /dev/null +++ b/src/rpc/request.h @@ -0,0 +1,49 @@ +#ifndef RPC_REQUEST_H__ +#define RPC_REQUEST_H__ + +namespace rpc { + + +template +class request; + + +template +class request { +public: + request(basic_shared_session from, msgobj param) : + m_from(from) + { + param.convert(&m_param); + } + +public: + Parameter& param() + { + return m_param; + } + + const Parameter& param() const + { + return m_param; + } + + basic_shared_session& session() + { + return m_from; + } + +private: + Parameter m_param; + basic_shared_session m_from; + +private: + request(); +}; + + + +} // namespace rpc + +#endif /* rpc/request.h */ + diff --git a/src/rpc/responder.h b/src/rpc/responder.h new file mode 100644 index 0000000..567ccc0 --- /dev/null +++ b/src/rpc/responder.h @@ -0,0 +1,52 @@ +#ifndef RPC_RESPONDER_H__ +#define RPC_RESPONDER_H__ + +#include "rpc/types.h" + +namespace rpc { + + +class responder { +public: + responder(int fd, msgid_t msgid); + + ~responder(); + + template + void result(Result res); + + template + void result(Result res, auto_zone z); + + template + void error(Error err); + + template + void error(Error err, auto_zone z); + + void null(); + + void send_response(const char* buf, size_t buflen, auto_zone z); + + void send_responsev(const struct iovec* vb, size_t count, auto_zone z); + +private: + template + void call(Result& res, Error& err); + + template + void call(Result& res, Error& err, auto_zone z); + +private: + int m_fd; + msgid_t m_msgid; + +private: + responder(); +}; + + +} // namespace rpc + +#endif /* rpc/responder.h */ + diff --git a/src/rpc/responder_impl.h b/src/rpc/responder_impl.h new file mode 100644 index 0000000..d054ec3 --- /dev/null +++ b/src/rpc/responder_impl.h @@ -0,0 +1,93 @@ +#ifndef RPC_RESPONDER_IMPL_H__ +#define RPC_RESPONDER_IMPL_H__ + +#include +#include "rpc/vrefbuffer.h" +#include "rpc/wavy.h" + +namespace rpc { + + +inline responder::responder(int fd, msgid_t msgid) : + m_fd(fd), m_msgid(msgid) { } + +inline responder::~responder() { } + +template +void responder::result(Result res) +{ + msgpack::type::nil err; + call(res, err); +} + +template +void responder::result(Result res, auto_zone z) +{ + msgpack::type::nil err; + call(res, err, z); +} + +template +void responder::error(Error err) +{ + msgpack::type::nil res; + call(res, err); +} + +template +void responder::error(Error err, auto_zone z) +{ + msgpack::type::nil res; + call(res, err, z); +} + +inline void responder::null() +{ + msgpack::type::nil res; + msgpack::type::nil err; + call(res, err); +} + +template +inline void responder::call(Result& res, Error& err) +{ + msgpack::sbuffer buf; // FIXME use vrefbuffer? + rpc_response msgres(res, err, m_msgid); + msgpack::pack(buf, msgres); + + wavy::request req(&::free, buf.data()); + wavy::write(m_fd, buf.data(), buf.size(), req); + buf.release(); +} + +template +inline void responder::call(Result& res, Error& err, auto_zone z) +{ + vrefbuffer* buf = z->allocate(); + rpc_response msgres(res, err, m_msgid); + msgpack::pack(*buf, msgres); + + wavy::request req(&mp::object_delete, z.get()); + wavy::writev(m_fd, buf->vector(), buf->vector_size(), req); + z.release(); +} + +inline void responder::send_response(const char* buf, size_t buflen, auto_zone z) +{ + wavy::request req(&mp::object_delete, z.get()); + wavy::write(m_fd, buf, buflen, req); + z.release(); +} + +inline void responder::send_responsev(const struct iovec* vb, size_t count, auto_zone z) +{ + wavy::request req(&mp::object_delete, z.get()); + wavy::writev(m_fd, vb, count, req); + z.release(); +} + + +} // namespace rpc + +#endif /* rpc/responder_impl.h */ + diff --git a/src/rpc/retry.h b/src/rpc/retry.h new file mode 100644 index 0000000..49b22a8 --- /dev/null +++ b/src/rpc/retry.h @@ -0,0 +1,59 @@ +#ifndef RPC_RETRY_H__ +#define RPC_RETRY_H__ + +namespace rpc { + + +template +class retry { +public: + retry(Parameter param) : + m_limit(0), m_param(param) { } + + void set_callback(rpc::callback_t callback) + { + m_callbck = callback; + } + +public: + bool retry_incr(unsigned short limit) + { + return ++m_limit <= limit; + } + + template + void call(Session s, rpc::shared_zone& life, unsigned short timeout_steps = 10) + { + s->call(m_param, life, m_callbck, timeout_steps); + } + + unsigned short num_retried() const + { + return m_limit; + } + + Parameter& param() + { + return m_param; + } + + const Parameter& param() const + { + return m_param; + } + +private: + unsigned short m_limit; + Parameter m_param; + rpc::callback_t m_callbck; + +private: + retry(); + retry(const retry&); +}; + + +} // namespace rpc + +#endif /* rpc/retry.h */ + diff --git a/src/rpc/rpc.h b/src/rpc/rpc.h new file mode 100644 index 0000000..ea6019d --- /dev/null +++ b/src/rpc/rpc.h @@ -0,0 +1,24 @@ +#ifndef RPC_RPC_H__ +#define RPC_RPC_H__ + +#include "rpc/types.h" + +#include "rpc/address.h" +#include "rpc/responder.h" +#include "rpc/connection.h" + +#include "rpc/weak_responder.h" +#include "rpc/session.h" +#include "rpc/transport.h" + +#include "rpc/responder_impl.h" +#include "rpc/session_impl.h" +#include "rpc/transport_impl.h" +#include "rpc/weak_responder_impl.h" + +#include "rpc/retry.h" +#include "rpc/request.h" +#include "rpc/message.h" + +#endif /* rpc/rpc.h */ + diff --git a/src/rpc/server.cc b/src/rpc/server.cc new file mode 100644 index 0000000..8bb5d94 --- /dev/null +++ b/src/rpc/server.cc @@ -0,0 +1,76 @@ +#include "rpc/server.h" +#include "rpc/protocol.h" +#include +#include +#include +#include + +namespace rpc { + + +server::server() { } + +server::~server() { } + + +shared_peer server::accepted(int fd) +{ +#ifdef USE_TCP_NODELAY + // XXX + int on = 1; + ::setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); // ignore error +#endif + basic_shared_session s(new peer(this)); + wavy::add(fd, s, this); + void* k = (void*)s.get(); + + pthread_scoped_lock lk(m_peers_mutex); + m_peers.insert( peers_t::value_type(k, basic_weak_session(s)) ); + return mp::static_pointer_cast(s); +} + + +void server::dispatch_request( + basic_shared_session& s, weak_responder response, + method_id method, msgobj param, auto_zone z) +{ + dispatch(mp::static_pointer_cast(s), + response, method, param, z); +} + + +void server::step_timeout() +{ + pthread_scoped_lock lk(m_peers_mutex); + for(peers_t::iterator it(m_peers.begin()), it_end(m_peers.end()); + it != it_end; ) { + basic_shared_session p(it->second.lock()); + if(p && !p->is_lost()) { + wavy::submit(&basic_session::step_timeout, p.get(), p); + ++it; + } else { + m_peers.erase(it++); + } + } +} + +void server::transport_lost_notify(basic_shared_session& s) +{ + msgpack::object res; + res.type = msgpack::type::NIL; + msgpack::object err; + err.type = msgpack::type::POSITIVE_INTEGER; + err.via.u64 = protocol::NODE_LOST_ERROR; + + void* k = (void*)s.get(); + { + pthread_scoped_lock lk(m_peers_mutex); + m_peers.erase(k); + } + + s->force_lost(res, err); +} + + +} // namespace rpc + diff --git a/src/rpc/server.h b/src/rpc/server.h new file mode 100644 index 0000000..faaffff --- /dev/null +++ b/src/rpc/server.h @@ -0,0 +1,100 @@ +#ifndef RPC_SERVER_H__ +#define RPC_SERVER_H__ + +#include "rpc/rpc.h" +#include "log/mlogger.h" // FIXME +#include +#include +#include + +namespace rpc { + + +class server; + +class peer : public basic_session { +public: + peer(session_manager* mgr = NULL); +}; + +typedef mp::shared_ptr shared_peer; +typedef mp::weak_ptr weak_peer; + + +class server : public session_manager, public transport_manager { +public: + typedef shared_peer shared_session; + typedef weak_peer weak_session; + + server(); + + virtual ~server(); + + virtual void dispatch( + shared_peer from, weak_responder response, + method_id method, msgobj param, auto_zone z) = 0; + +public: + // step timeout count. + void step_timeout(); + + // add accepted connection + shared_peer accepted(int fd); + + // apply function to all connected sessions. + // F is required to implement + // void operator() (shared_peer); + template + void for_each_peer(F f); + +protected: + mp::pthread_mutex m_peers_mutex; + typedef std::map peers_t; + peers_t m_peers; + +public: + virtual void dispatch_request( + basic_shared_session& s, weak_responder response, + method_id method, msgobj param, auto_zone z); + + virtual void transport_lost_notify(basic_shared_session& s); + +private: + server(const server&); +}; + + +inline peer::peer(session_manager* mgr) : + basic_session(mgr) { } + + +namespace detail { + template + struct server_each_peer { + server_each_peer(F f) : m(f) { } + inline void operator() (std::pair& x) + { + basic_shared_session s(x.second.lock()); + if(s && !s->is_lost()) { + m(mp::static_pointer_cast(s)); + } + } + private: + F m; + server_each_peer(); + }; +} // namespace detail + +template +void server::for_each_peer(F f) +{ + pthread_scoped_lock lk(m_peers_mutex); + detail::server_each_peer e(f); + std::for_each(m_peers.begin(), m_peers.end(), e); +} + + +} // namespace rpc + +#endif /* rpc/client.h */ + diff --git a/src/rpc/session.cc b/src/rpc/session.cc new file mode 100644 index 0000000..31c95ec --- /dev/null +++ b/src/rpc/session.cc @@ -0,0 +1,305 @@ +#include "rpc/rpc.h" +#include "rpc/protocol.h" +#include "log/mlogger.h" //FIXME +#include + +namespace rpc { + + +inline bool basic_session::callback_table::out( + msgid_t msgid, callback_entry* result) +{ + pthread_scoped_lock lk(m_callbacks_mutex[msgid % PARTITION_NUM]); + + callbacks_t& cbs(m_callbacks[msgid % PARTITION_NUM]); + callbacks_t::iterator it(cbs.find(msgid)); + if(it == cbs.end()) { + return false; + } + + *result = it->second; + cbs.erase(it); + + return true; +} + +template +inline void basic_session::callback_table::for_each_clear(F f) +{ + for(size_t i=0; i < PARTITION_NUM; ++i) { + pthread_scoped_lock lk(m_callbacks_mutex[i]); + callbacks_t& cbs(m_callbacks[i]); + std::for_each(cbs.begin(), cbs.end(), f); + cbs.clear(); + } +} + +template +inline void basic_session::callback_table::erase_if(F f) +{ + for(size_t i=0; i < PARTITION_NUM; ++i) { + pthread_scoped_lock lk(m_callbacks_mutex[i]); + callbacks_t& cbs(m_callbacks[i]); + for(callbacks_t::iterator it(cbs.begin()), it_end(cbs.end()); + it != it_end; ) { + if(f(*it)) { + cbs.erase(it++); + } else { + ++it; + } + } + //cbs.erase(std::remove_if(cbs.begin(), cbs.end(), f), cbs.end()); + } +} + + +basic_session::~basic_session() +{ + // FIXME + //if(mp::iothreads::is_end()) { return; } + + msgpack::object res; + res.type = msgpack::type::NIL; + msgpack::object err; + err.type = msgpack::type::POSITIVE_INTEGER; + err.via.u64 = protocol::TRANSPORT_LOST_ERROR; + + force_lost(res, err); +} + +session::~session() +{ + cancel_pendings(); +} + + +void basic_session::process_response( + basic_shared_session& self, + msgobj result, msgobj error, + msgid_t msgid, auto_zone z) +{ + callback_entry e; + LOG_DEBUG("process callback this=",(void*)this," id=",msgid," result:",result," error:",error); + if(!m_cbtable.out(msgid, &e)) { + LOG_DEBUG("callback not found id=",msgid); + return; + } + e.callback(self, result, error, z); +} + + +void basic_session::send_data(const char* buf, size_t buflen, + void (*finalize)(void*), void* data) +{ + pthread_scoped_lock lk(m_binds_mutex); + if(m_binds.empty()) { + throw std::runtime_error("session not bound"); + } + // ad-hoc load balancing + m_binds[m_msgid_rr % m_binds.size()]->send_data( + buf, buflen, finalize, data); +} + +void basic_session::send_datav(vrefbuffer* buf, + void (*finalize)(void*), void* data) +{ + pthread_scoped_lock lk(m_binds_mutex); + if(m_binds.empty()) { + throw std::runtime_error("session not bound"); + } + // ad-hoc load balancing + m_binds[m_msgid_rr % m_binds.size()]->send_datav( + buf, finalize, data); +} + + +bool basic_session::bind_transport(basic_transport* t) +{ + m_connect_retried_count = 0; + + pthread_scoped_lock lk(m_binds_mutex); + + bool ret = m_binds.empty() ? true : false; + m_binds.push_back(t); + + return ret; +} + +bool session::bind_transport(basic_transport* t) +{ + bool ret = basic_session::bind_transport(t); + + pending_queue_t pendings; + { + pthread_scoped_lock lk(m_pending_queue_mutex); + pendings.swap(m_pending_queue); + } + + for(pending_queue_t::iterator it(pendings.begin()), + it_end(pendings.end()); it != it_end; ++it) { + t->send_datav(*it, + &mp::object_delete, *it); + } + pendings.clear(); + + return ret; +} + + +bool basic_session::unbind_transport(basic_transport* t, basic_shared_session& self) +{ + pthread_scoped_lock lk(m_binds_mutex); + + binds_t::iterator remove_from = + std::remove(m_binds.begin(), m_binds.end(), t); + m_binds.erase(remove_from, m_binds.end()); + + if(m_binds.empty()) { + if(m_manager) { + wavy::submit(&session_manager::transport_lost_notify, m_manager, self); + } + return true; + } + return false; +} + +bool session::unbind_transport(basic_transport* t, basic_shared_session& self) +{ + return basic_session::unbind_transport(t, self); +} + + +void basic_session::shutdown() +{ + pthread_scoped_lock lk(m_binds_mutex); + + basic_shared_session self; + for(binds_t::iterator it(m_binds.begin()), it_end(m_binds.end()); + it != it_end; ++it) { + basic_shared_session b = (*it)->shutdown(); + if(b) { self = b; } + } + m_binds.clear(); + + if(m_manager && self) { + wavy::submit(&session_manager::transport_lost_notify, m_manager, self); + } +} + + +namespace { + struct each_callback_submit { + each_callback_submit(msgobj r, msgobj e) : + res(r), err(e) { } + template + void operator() (T& pair) const + { + basic_shared_session nulls; + pair.second.callback_submit(nulls, res, err); + } + private: + msgobj res; + msgobj err; + each_callback_submit(); + }; +} + +void basic_session::force_lost(msgobj res, msgobj err) +{ + m_lost = true; + m_cbtable.for_each_clear(each_callback_submit(res, err)); +} + + +basic_session::callback_entry::callback_entry() { } + +basic_session::callback_entry::callback_entry( + callback_t callback, shared_zone life, + unsigned short timeout_steps) : + m_timeout_steps(timeout_steps), + m_callback(callback), + m_life(life) { } + +void basic_session::callback_entry::callback(basic_shared_session& s, + msgobj res, msgobj err, auto_zone& z) +{ + // msgpack::zone::push_finalizer is not thread-safe + // m_life may null. see {basic_,}session::call + //m_life->push_finalizer(&mp::object_delete, z.release()); + shared_zone life(z.release()); + if(m_life) { life->allocate(m_life); } + callback_real(s, res, err, life); +} + +void basic_session::callback_entry::callback(basic_shared_session& s, + msgobj res, msgobj err) +{ + shared_zone life = m_life; + if(!life) { life.reset(new msgpack::zone()); } + callback_real(s, res, err, life); +} + +void basic_session::callback_entry::callback_submit( + basic_shared_session& s, msgobj res, msgobj err) +{ + shared_zone life = m_life; + if(!life) { life.reset(new msgpack::zone()); } + wavy::submit(m_callback, s, res, err, life); +} + +void basic_session::callback_entry::callback_real(basic_shared_session& s, + msgobj res, msgobj err, shared_zone life) +try { + m_callback(s, res, err, life); +} catch (std::exception& e) { + LOG_ERROR("response callback error: ",e.what()); +} catch (...) { + LOG_ERROR("response callback error: unknown error"); +} + + +namespace { + struct remove_if_step_timeout { + remove_if_step_timeout(basic_shared_session s) : + self(s) + { + res.type = msgpack::type::NIL; + err.type = msgpack::type::POSITIVE_INTEGER; + err.via.u64 = protocol::TIMEOUT_ERROR; + } + template + bool operator() (T& pair) + { + if(!pair.second.step_timeout()) { + LOG_DEBUG("callback timeout id=",pair.first); + pair.second.callback_submit(self, res, err); // client::step_timeout; + //pair.second.callback(self, res, err); // client::step_timeout; // FIXME XXX + return true; + } + return false; + } + private: + basic_shared_session& self; + msgobj res; + msgobj err; + remove_if_step_timeout(); + }; +} + +void basic_session::step_timeout(basic_shared_session self) +{ + m_cbtable.erase_if(remove_if_step_timeout(self)); +} + +bool basic_session::callback_entry::step_timeout() +{ + if(m_timeout_steps > 0) { + --m_timeout_steps; // FIXME atomic? + return true; + } + return false; +} + + +} // namespace rpc + diff --git a/src/rpc/session.h b/src/rpc/session.h new file mode 100644 index 0000000..400354d --- /dev/null +++ b/src/rpc/session.h @@ -0,0 +1,201 @@ +#ifndef RPC_SESSION_H__ +#define RPC_SESSION_H__ + +#include "rpc/address.h" +#include "rpc/connection.h" +#include "rpc/vrefbuffer.h" +#include +#include +#include + +namespace rpc { + + +class basic_transport; + +struct session_manager { + session_manager() { } + + virtual ~session_manager() { } + + virtual void dispatch_request( + basic_shared_session& s, weak_responder response, + method_id method, msgobj param, auto_zone z) = 0; + + virtual void transport_lost_notify(basic_shared_session& s) = 0; +}; + + +class basic_session { +public: + basic_session(session_manager* mgr = NULL); + virtual ~basic_session(); + + typedef std::auto_ptr auto_zone; + +public: + // step callback timeout count. + void step_timeout(basic_shared_session self); + + // return true if this session is connected. + bool is_bound() const; + + // call remote procedure. + // if this session is not bound, exception will be thrown. + // Message is requred to inherit rpc::message. + template + void call(Message& params, + shared_zone life, callback_t callback, + unsigned short timeout_steps); + + // get session manager + session_manager* get_manager(); + + void send_data(const char* buf, size_t buflen, + void (*finalize)(void*), void* data); + + void send_datav(vrefbuffer* buf, + void (*finalize)(void*), void* data); + +public: + // called from client::async_connect and user. + // the number of retried times is reset when bind_transport() + // is called. + unsigned short increment_connect_retried_count(); + + // return number of connect retried times. + unsigned short connect_retried_count(); + + // called from user. + // close this session. + void shutdown(); + +public: + // call all registered callback functions with specified arguments + // and set is_lost == true + void force_lost(msgobj res, msgobj err); + +public: + // return true if the destructor of this session is already running or + // force_lost() is called. + bool is_lost() const; + + // turn off the lost flag. + // use this function carefully. + void revive(); + +public: + // called from transport + void process_request( + basic_shared_session& s, + method_id method, msgobj param, + msgid_t msgid, auto_zone z); + + // process callback. + void process_response( + basic_shared_session& self, + msgobj result, msgobj error, + msgid_t msgid, auto_zone z); + + virtual bool bind_transport(basic_transport* t); + virtual bool unbind_transport(basic_transport* t, basic_shared_session& self); + +protected: + template + msgid_t pack(vrefbuffer& buffer, Message& param); + +private: + class callback_entry { + public: + callback_entry(); + callback_entry(callback_t callback, shared_zone life, + unsigned short timeout_steps); + public: + void callback(basic_shared_session& s, msgobj res, msgobj err, auto_zone& z); + void callback(basic_shared_session& s, msgobj res, msgobj err); + inline void callback_submit(basic_shared_session& s, msgobj res, msgobj err); + inline bool step_timeout(); // Note: NOT thread-safe + private: + inline void callback_real(basic_shared_session& s, + msgobj res, msgobj err, shared_zone z); + private: + unsigned short m_timeout_steps; + callback_t m_callback; + shared_zone m_life; + }; + +protected: + msgid_t m_msgid_rr; + + class callback_table { + public: + callback_table() { } + ~callback_table() { } + public: + void insert(msgid_t msgid, const callback_entry& entry); + bool out(msgid_t msgid, callback_entry* result); + template void for_each_clear(F f); + template void erase_if(F f); + public: + static const size_t PARTITION_NUM = 4; // FIXME + typedef std::map callbacks_t; + mp::pthread_mutex m_callbacks_mutex[PARTITION_NUM]; + callbacks_t m_callbacks[PARTITION_NUM]; + private: + callback_table(const callback_table&); + }; + callback_table m_cbtable; + + mp::pthread_mutex m_binds_mutex; + typedef std::vector binds_t; + binds_t m_binds; + + bool m_lost; + unsigned short m_connect_retried_count; + + session_manager* m_manager; + +private: + basic_session(); + basic_session(const basic_session&); +}; + + +class session : public basic_session { +public: + session(session_manager* mgr = NULL); + virtual ~session(); + +public: + // call remote procedure. + // if this session is not connected, the request will + // be kept till connected. + // Message is requred to inherit rpc::message. + template + void call(Message& param, + shared_zone life, callback_t callback, + unsigned short timeout_steps); + + // clear all pending requests. + void cancel_pendings(); + +public: + virtual bool bind_transport(basic_transport* t); + virtual bool unbind_transport(basic_transport* t, basic_shared_session& self); + +private: + mp::pthread_mutex m_pending_queue_mutex; + typedef std::vector pending_queue_t; + pending_queue_t m_pending_queue; + void clear_pending_queue(pending_queue_t& queue); + +private: + session(); + session(const session&); +}; + + +} // namespace rpc + +#endif /* rpc/session.h */ + diff --git a/src/rpc/session_impl.h b/src/rpc/session_impl.h new file mode 100644 index 0000000..6bb192e --- /dev/null +++ b/src/rpc/session_impl.h @@ -0,0 +1,158 @@ +#ifndef RPC_SESSION_IMPL_H__ +#define RPC_SESSION_IMPL_H__ + +namespace rpc { + + +inline void basic_session::callback_table::insert( + msgid_t msgid, const callback_entry& entry) +{ + pthread_scoped_lock lk(m_callbacks_mutex[msgid % PARTITION_NUM]); + std::pair pair = + m_callbacks[msgid % PARTITION_NUM].insert( + callbacks_t::value_type(msgid, entry)); + if(!pair.second) { + pair.first->second = entry; + } +} + + +inline basic_session::basic_session(session_manager* mgr) : + m_msgid_rr(0), // FIXME randomize? + m_lost(false), + m_connect_retried_count(0), + m_manager(mgr) { } + + +inline bool basic_session::is_bound() const + { return !m_binds.empty(); } + +inline bool basic_session::is_lost() const + { return m_lost; } + +inline void basic_session::revive() + { m_lost = false; } + + +inline unsigned short basic_session::increment_connect_retried_count() + { return ++m_connect_retried_count; } // FIXME atomic? + +inline unsigned short basic_session::connect_retried_count() + { return m_connect_retried_count; } + + +inline session::session(session_manager* mgr) : + basic_session(mgr) +{ } + +inline session_manager* basic_session::get_manager() +{ + return m_manager; +} + + +inline void basic_session::process_request( + basic_shared_session& s, + method_id method, msgobj param, + msgid_t msgid, auto_zone z) +{ + weak_responder response(s, msgid); + get_manager()->dispatch_request(s, response, method, param, z); +} + + +template +msgid_t basic_session::pack(vrefbuffer& buffer, Message& param) +{ + msgid_t msgid = __sync_add_and_fetch(&m_msgid_rr, 1); + rpc_request msgreq(typename Message::method(), param, msgid); + msgpack::pack(buffer, msgreq); + return msgid; +} + + +template +void basic_session::call( + Message& param, + shared_zone life, callback_t callback, + unsigned short timeout_steps) +{ + LOG_DEBUG("send request method=",Message::method::id); + if(is_lost()) { throw std::runtime_error("lost session"); } + //if(!life) { life.reset(new msgpack::zone()); } + + std::auto_ptr buf(new vrefbuffer()); + msgid_t msgid = pack(*buf, param); + + pthread_scoped_lock blk(m_binds_mutex); + if(m_binds.empty()) { + //throw std::runtime_error("session not bound"); + // FIXME XXX forget the error for robustness. + // FIXME XXX wait timeout: + m_cbtable.insert(msgid, callback_entry(callback, life, timeout_steps)); + buf.release(); + + } else { + m_cbtable.insert(msgid, callback_entry(callback, life, timeout_steps)); + // ad-hoc load balancing + m_binds[m_msgid_rr % m_binds.size()]->send_datav(buf.get(), + &mp::object_delete, buf.get()); + buf.release(); + } +} + + +template +void session::call( + Message& param, + shared_zone life, callback_t callback, + unsigned short timeout_steps) +{ + LOG_DEBUG("send request method=",Message::method::id); + if(is_lost()) { throw std::runtime_error("lost session"); } + //if(!life) { life.reset(new msgpack::zone()); } + + std::auto_ptr buf(new vrefbuffer()); + msgid_t msgid = pack(*buf, param); + + m_cbtable.insert(msgid, callback_entry(callback, life, timeout_steps)); + + pthread_scoped_lock blk(m_binds_mutex); + if(m_binds.empty()) { + { + pthread_scoped_lock plk(m_pending_queue_mutex); + LOG_TRACE("push pending queue ",m_pending_queue.size()+1); + m_pending_queue.push_back(buf.get()); + } + buf.release(); + // FIXME clear pending queue if it is too big + // FIXME or throw exception + + } else { + // ad-hoc load balancing + m_binds[m_msgid_rr % m_binds.size()]->send_datav(buf.get(), + &mp::object_delete, buf.get()); + buf.release(); + } +} + +inline void session::cancel_pendings() +{ + pthread_scoped_lock lk(m_pending_queue_mutex); + clear_pending_queue(m_pending_queue); +} + +inline void session::clear_pending_queue(pending_queue_t& queue) +{ + for(pending_queue_t::iterator it(queue.begin()), + it_end(queue.end()); it != it_end; ++it) { + delete *it; + } + queue.clear(); +} + + +} // namespace rpc + +#endif /* rpc/session_impl.h */ + diff --git a/src/rpc/transport.h b/src/rpc/transport.h new file mode 100644 index 0000000..3932625 --- /dev/null +++ b/src/rpc/transport.h @@ -0,0 +1,77 @@ +#ifndef RPC_TRANSPORT_H__ +#define RPC_TRANSPORT_H__ + +#include "rpc/types.h" +#include "rpc/connection.h" + +namespace rpc { + + +struct transport_manager { + virtual ~transport_manager() { } +}; + + +class basic_transport { +public: + basic_transport(int fd, basic_shared_session s, + transport_manager* mgr = NULL); + ~basic_transport(); + +public: + // get transport manager + transport_manager* get_manager(); + + // called from basic_session::shutdown() + basic_shared_session shutdown(); + +public: + void process_request(method_id method, msgobj param, + msgid_t msgid, auto_zone& z); + + void process_response(msgobj res, msgobj err, + msgid_t msgid, auto_zone& z); + +public: + void send_data(const char* buf, size_t buflen, + void (*finalize)(void*), void* data); + + void send_datav(vrefbuffer* buf, + void (*finalize)(void*), void* data); + +protected: + int m_fd; + basic_shared_session m_session; + +private: + transport_manager* m_manager; + +private: + basic_transport(); + basic_transport(const basic_transport&); +}; + + +class transport : public basic_transport, public connection { +public: + transport(int fd, basic_shared_session& s, + transport_manager* mgr = NULL); + + virtual ~transport(); + + void process_request(method_id method, msgobj param, + msgid_t msgid, auto_zone& z); + + void process_response(msgobj res, msgobj err, + msgid_t msgid, auto_zone& z); + +private: + transport(); + transport(const transport&); +}; + + +} // namespace rpc + +#endif /* rpc/transport.h */ + diff --git a/src/rpc/transport_impl.h b/src/rpc/transport_impl.h new file mode 100644 index 0000000..60894d1 --- /dev/null +++ b/src/rpc/transport_impl.h @@ -0,0 +1,92 @@ +#ifndef RPC_TRANSPORT_IMPL_H__ +#define RPC_TRANSPORT_IMPL_H__ + +namespace rpc { + + +inline basic_transport::basic_transport(int fd, + basic_shared_session s, transport_manager* mgr) : + m_fd(fd), + m_session(s), + m_manager(mgr) { } + +inline basic_transport::~basic_transport() { } + + +inline transport_manager* basic_transport::get_manager() +{ + return m_manager; +} + +inline basic_shared_session basic_transport::shutdown() +{ + ::shutdown(m_fd, SHUT_RD); // FIXME + return m_session; +} + +inline transport::transport(int fd, basic_shared_session& s, + transport_manager* mgr) : + basic_transport(fd, s, mgr), + connection(fd) +{ + m_session->bind_transport(this); +} + +inline transport::~transport() +{ + if(m_session) { + m_session->unbind_transport(this, m_session); + } +} + + +inline void basic_transport::process_request(method_id method, msgobj param, + msgid_t msgid, auto_zone& z) +{ + if(!m_session) { + throw std::runtime_error("session unbound"); + } + m_session->process_request(m_session, method, param, msgid, z); +} + +inline void transport::process_request(method_id method, msgobj param, + msgid_t msgid, auto_zone& z) +{ + basic_transport::process_request(method, param, msgid, z); +} + + +inline void basic_transport::process_response(msgobj result, msgobj error, + msgid_t msgid, auto_zone& z) +{ + m_session->process_response(m_session, result, error, msgid, z); +} + +inline void transport::process_response(msgobj res, msgobj err, + msgid_t msgid, auto_zone& z) +{ + basic_transport::process_response(res, err, msgid, z); +} + + +inline void basic_transport::send_data( + const char* buf, size_t buflen, + void (*finalize)(void*), void* data) +{ + wavy::request req(finalize, data); + wavy::write(m_fd, buf, buflen, req); +} + +inline void basic_transport::send_datav( + vrefbuffer* buf, + void (*finalize)(void*), void* data) +{ + wavy::request req(finalize, data); + wavy::writev(m_fd, buf->vector(), buf->vector_size(), req); +} + + +} // namespace rpc + +#endif /* rpc/transport_impl.h */ + diff --git a/src/rpc/types.h b/src/rpc/types.h new file mode 100644 index 0000000..2f4e3b9 --- /dev/null +++ b/src/rpc/types.h @@ -0,0 +1,40 @@ +#ifndef RPC_TYPES_H__ +#define RPC_TYPES_H__ + +#include +#include +#include +#include + +namespace rpc { + + +typedef msgpack::object msgobj; +typedef std::auto_ptr auto_zone; +typedef mp::shared_ptr shared_zone; + + +typedef uint32_t msgid_t; +typedef uint8_t role_type; + + +class transport; +class session; +class basic_transport; +class basic_session; + + +typedef mp::shared_ptr basic_shared_session; +typedef mp::weak_ptr basic_weak_session; + +typedef mp::function callback_t; + + +using mp::pthread_scoped_lock; +using mp::pthread_scoped_rdlock; +using mp::pthread_scoped_wrlock; + + +} // namespace rpc + +#endif /* rpc/types.h */ diff --git a/src/rpc/vrefbuffer.h b/src/rpc/vrefbuffer.h new file mode 100644 index 0000000..858eab6 --- /dev/null +++ b/src/rpc/vrefbuffer.h @@ -0,0 +1,89 @@ +#ifndef RPC_VREFBUFFER_H__ +#define RPC_VREFBUFFER_H__ + +#include +#include +#include +#include +#include + +namespace rpc { + + +class vrefbuffer { +public: + vrefbuffer(); + ~vrefbuffer(); + +public: + void append_ref(const char* buf, size_t len); + void append_copy(const char* buf, size_t len); + + void write(const char* buf, size_t len); + + size_t vector_size() const; + const struct iovec* vector() const; + +public: + typedef std::vector vec_t; + vec_t m_vec; + + msgpack::zone m_zone; + +private: + vrefbuffer(const vrefbuffer&); +}; + + +inline vrefbuffer::vrefbuffer() +{ + m_vec.reserve(4); // FIXME sizeof(struct iovec) * 4 < 72 +} + +inline vrefbuffer::~vrefbuffer() { } + + +inline void vrefbuffer::append_ref(const char* buf, size_t len) +{ + struct iovec v = {(void*)buf, len}; + m_vec.push_back(v); +} + +inline void vrefbuffer::append_copy(const char* buf, size_t len) +{ + char* m = (char*)m_zone.malloc(len); + memcpy(m, buf, len); + if(!m_vec.empty() && ((const char*)m_vec.back().iov_base) + + m_vec.back().iov_len == m) { + m_vec.back().iov_len += len; + } else { + append_ref(m, len); + } +} + + +inline void vrefbuffer::write(const char* buf, size_t len) +{ + if(len > 32) { // FIXME + append_ref(buf, len); + } else { + append_copy(buf, len); + } +} + + +inline size_t vrefbuffer::vector_size() const +{ + return m_vec.size(); +} + +inline const struct iovec* vrefbuffer::vector() const +{ + return &m_vec[0]; +} + + +} // namespace rpc + +#endif /* rpc/vrefbuffer.h */ + diff --git a/src/rpc/wavy.h b/src/rpc/wavy.h new file mode 100644 index 0000000..413d8cf --- /dev/null +++ b/src/rpc/wavy.h @@ -0,0 +1,19 @@ +#ifndef RPC_WAVY_H__ +#define RPC_WAVY_H__ + +#include +#include +#include + +namespace rpc { + + +struct rpc_wavy { }; + +typedef mp::wavy::singleton wavy; + + +} // namespace rpc + +#endif /* rpc/wavy.h */ + diff --git a/src/rpc/weak_responder.h b/src/rpc/weak_responder.h new file mode 100644 index 0000000..5a77eff --- /dev/null +++ b/src/rpc/weak_responder.h @@ -0,0 +1,60 @@ +#ifndef RPC_WEAK_RESPONDER_FWD_H__ +#define RPC_WEAK_RESPONDER_FWD_H__ + +#include "rpc/types.h" + +namespace rpc { + + +class weak_responder { +public: + weak_responder(basic_weak_session s, msgid_t msgid); + + ~weak_responder(); + + template + void result(Result res); + + template + void result(Result res, auto_zone& z); + + template + void result(Result res, shared_zone& life); + + template + void error(Error err); + + template + void error(Error err, auto_zone& z); + + template + void error(Error err, shared_zone& life); + + void null(); + +private: + template + void call(Result& res, Error& err); + + template + void call(Result& res, Error& err, auto_zone& z); + + template + void call(Result& res, Error& err, shared_zone& life); + + template + void call_impl(Result& res, Error& err, ZoneType& life); + +private: + basic_weak_session m_session; + const msgid_t m_msgid; + +private: + weak_responder(); +}; + + +} // namespace rpc + +#endif /* rpc/weak_responder_fwd.h */ + diff --git a/src/rpc/weak_responder_impl.h b/src/rpc/weak_responder_impl.h new file mode 100644 index 0000000..9cd2675 --- /dev/null +++ b/src/rpc/weak_responder_impl.h @@ -0,0 +1,134 @@ +#ifndef RPC_WEAK_RESPONDER_H__ +#define RPC_WEAK_RESPONDER_H__ + +#include + +namespace rpc { + + +inline weak_responder::weak_responder(basic_weak_session s, msgid_t msgid) : + m_session(s), m_msgid(msgid) { } + +inline weak_responder::~weak_responder() { } + + +template +void weak_responder::result(Result res) +{ + LOG_TRACE("send response data with Success id=",m_msgid); + msgpack::type::nil err; + call(res, err); +} + +template +void weak_responder::result(Result res, auto_zone& z) +{ + LOG_TRACE("send response data with Success id=",m_msgid); + msgpack::type::nil err; + call(res, err, z); +} + +template +void weak_responder::result(Result res, shared_zone& life) +{ + LOG_TRACE("send response data with Success id=",m_msgid); + msgpack::type::nil err; + call(res, err, life); +} + +template +void weak_responder::error(Error err) +{ + LOG_TRACE("send response data with Error id=",m_msgid); + msgpack::type::nil res; + call(res, err); +} + +template +void weak_responder::error(Error err, auto_zone& z) +{ + LOG_TRACE("send response data with Error id=",m_msgid); + msgpack::type::nil res; + call(res, err, z); +} + +template +void weak_responder::error(Error err, shared_zone& life) +{ + LOG_TRACE("send response data with Error id=",m_msgid); + msgpack::type::nil res; + call(res, err, life); +} + +inline void weak_responder::null() +{ + LOG_TRACE("send response data with null id=",m_msgid); + msgpack::type::nil res; + msgpack::type::nil err; + call(res, err); +} + + +namespace detail { + template + struct zone_keeper { + zone_keeper(ZoneType& z) : m(z) { } + ~zone_keeper() { } + vrefbuffer buf; + private: + ZoneType m; + zone_keeper(); + zone_keeper(const zone_keeper&); + }; +} + +template +void weak_responder::call(Result& res, Error& err) +{ + msgpack::sbuffer buf; // FIXME use vrefbuffer? + rpc_response msgres(res, err, m_msgid); + msgpack::pack(buf, msgres); + + basic_shared_session s(m_session.lock()); + if(!s) { throw std::runtime_error("lost session"); } + + s->send_data((const char*)buf.data(), buf.size(), + &::free, + reinterpret_cast(buf.data())); + buf.release(); +} + +template +inline void weak_responder::call(Result& res, Error& err, auto_zone& z) +{ + call_impl(res, err, z); +} + +template +inline void weak_responder::call(Result& res, Error& err, shared_zone& z) +{ + call_impl(res, err, z); +} + +template +void weak_responder::call_impl(Result& res, Error& err, ZoneType& life) +{ + std::auto_ptr > zk(new detail::zone_keeper(life)); + + rpc_response msgres(res, err, m_msgid); + msgpack::pack(zk->buf, msgres); + + basic_shared_session s(m_session.lock()); + if(!s) { throw std::runtime_error("lost session"); } + + s->send_datav(&zk->buf, + &mp::object_delete >, + reinterpret_cast(zk.get())); + zk.release(); +} + + +} // namespace rpc + +#endif /* rpc/weak_responder.h */ + diff --git a/t/00_load.t b/t/00_load.t new file mode 100644 index 0000000..3edecb0 --- /dev/null +++ b/t/00_load.t @@ -0,0 +1,8 @@ +# -*- mode: cperl -*- +use Test::More tests => 1; + +BEGIN { + use_ok('t::TestUtil'); +} + +diag( "load t::TestUtil" ); diff --git a/t/01_set.t b/t/01_set.t new file mode 100644 index 0000000..37cca90 --- /dev/null +++ b/t/01_set.t @@ -0,0 +1,48 @@ +# -*- coding: utf-8; mode: cperl -*- +use Test::Base; +use t::TestUtil; + +=pod +== getとset == + 1. Managerを起動する + 2. Serverを起動する + 3. kumoctl manager attachを実行する + 4. Gatewayを起動する + 5. このテストを実行する +=cut + +plan tests => 1 * blocks; +#filters { kv => 'eval' }; + +run { + my $block = shift; + + my $mc0 = create_memcache_client(); + $mc0->set($block->key, $block->val); + undef $mc0; + + my $mc = create_memcache_client(); + is $mc->get($block->key), $block->val, $block->name; +} + +__END__ +=== alpha +--- key: curry +--- val: daisuki + +=== alnum +--- key: curry8 +--- val: 1ban + +=== num +--- key: 1 +--- val: 2 + +=== zero +--- key: 0 +--- val: 0 + +=== ja +--- key: キー +--- val: 表 + diff --git a/t/02_delete.t b/t/02_delete.t new file mode 100644 index 0000000..30174d6 --- /dev/null +++ b/t/02_delete.t @@ -0,0 +1,52 @@ +# -*- coding: utf-8; mode: cperl -*- +use Test::Base; +use t::TestUtil; + +=pod +== delete == + 1. Managerを起動する + 2. Serverを起動する + 3. kumoctl manager attachを実行する + 4. Gatewayを起動する + 5. このテストを実行する +=cut + +plan tests => 1 * blocks; +#filters { kv => 'eval' }; + +run { + my $block = shift; + + my $mc0 = create_memcache_client(); + $mc0->set($block->key, $block->val); + undef $mc0; + + my $mc1 = create_memcache_client(); + $mc1->delete($block->key); + undef $mc1; + + my $mc = create_memcache_client(); + is $mc->get($block->key), undef, $block->name; +} + +__END__ +=== alpha +--- key: curry +--- val: daisuki + +=== alnum +--- key: curry8 +--- val: 1ban + +=== num +--- key: 1 +--- val: 2 + +=== zero +--- key: 0 +--- val: 0 + +=== ja +--- key: キー +--- val: 表 + diff --git a/t/03_get_multi.t b/t/03_get_multi.t new file mode 100644 index 0000000..626b5ec --- /dev/null +++ b/t/03_get_multi.t @@ -0,0 +1,47 @@ +# -*- coding: utf-8; mode: cperl -*- +use Test::Base; +use t::TestUtil; + +=pod +== get_multi == + 1. Managerを起動する + 2. Serverを起動する + 3. kumoctl manager attachを実行する + 4. Gatewayを起動する + 5. このテストを実行する +=cut + +plan tests => 1 * blocks; +filters { data => 'yaml' }; + +run { + my $block = shift; + + my %data = %{$block->data}; + my @keys = keys(%data); + + my $mc0 = create_memcache_client(); + foreach my $key (@keys) { + $mc0->set($key, $data{$key}); + } + undef $mc0; + + my $mc = create_memcache_client(); + my %result = %{$mc->get_multi(@keys)}; + is %result, %data, $block->name; +} + +# ruby -e 'c="a"; r=[]; 100.times { r << ["k#{c}", "v#{c}"]; c.succ! }; puts "{#{r.map{|k,v|"#{k}: #{v}"}.join(", ")}}"' + +__END__ +=== few +--- data +--- {ka: va, kb: vb, kc: vc} + +=== many +--- data +--- {ka: va, kb: vb, kc: vc, kd: vd, ke: ve, kf: vf, kg: vg, kh: vh, ki: vi, kj: vj, kk: vk, kl: vl, km: vm, kn: vn, ko: vo, kp: vp, kq: vq, kr: vr, ks: vs, kt: vt, ku: vu, kv: vv, kw: vw, kx: vx, ky: vy, kz: vz, kaa: vaa, kab: vab, kac: vac, kad: vad, kae: vae, kaf: vaf, kag: vag, kah: vah, kai: vai, kaj: vaj, kak: vak, kal: val, kam: vam, kan: van, kao: vao, kap: vap, kaq: vaq, kar: var, kas: vas, kat: vat, kau: vau, kav: vav, kaw: vaw, kax: vax, kay: vay, kaz: vaz, kba: vba, kbb: vbb, kbc: vbc, kbd: vbd, kbe: vbe, kbf: vbf, kbg: vbg, kbh: vbh, kbi: vbi, kbj: vbj, kbk: vbk, kbl: vbl, kbm: vbm, kbn: vbn, kbo: vbo, kbp: vbp, kbq: vbq, kbr: vbr, kbs: vbs, kbt: vbt, kbu: vbu, kbv: vbv, kbw: vbw, kbx: vbx, kby: vby, kbz: vbz, kca: vca, kcb: vcb, kcc: vcc, kcd: vcd, kce: vce, kcf: vcf, kcg: vcg, kch: vch, kci: vci, kcj: vcj, kck: vck, kcl: vcl, kcm: vcm, kcn: vcn, kco: vco, kcp: vcp, kcq: vcq, kcr: vcr, kcs: vcs, kct: vct, kcu: vcu, kcv: vcv} + +=== manymany +--- data +--- {ka: va, kb: vb, kc: vc, kd: vd, ke: ve, kf: vf, kg: vg, kh: vh, ki: vi, kj: vj, kk: vk, kl: vl, km: vm, kn: vn, ko: vo, kp: vp, kq: vq, kr: vr, ks: vs, kt: vt, ku: vu, kv: vv, kw: vw, kx: vx, ky: vy, kz: vz, kaa: vaa, kab: vab, kac: vac, kad: vad, kae: vae, kaf: vaf, kag: vag, kah: vah, kai: vai, kaj: vaj, kak: vak, kal: val, kam: vam, kan: van, kao: vao, kap: vap, kaq: vaq, kar: var, kas: vas, kat: vat, kau: vau, kav: vav, kaw: vaw, kax: vax, kay: vay, kaz: vaz, kba: vba, kbb: vbb, kbc: vbc, kbd: vbd, kbe: vbe, kbf: vbf, kbg: vbg, kbh: vbh, kbi: vbi, kbj: vbj, kbk: vbk, kbl: vbl, kbm: vbm, kbn: vbn, kbo: vbo, kbp: vbp, kbq: vbq, kbr: vbr, kbs: vbs, kbt: vbt, kbu: vbu, kbv: vbv, kbw: vbw, kbx: vbx, kby: vby, kbz: vbz, kca: vca, kcb: vcb, kcc: vcc, kcd: vcd, kce: vce, kcf: vcf, kcg: vcg, kch: vch, kci: vci, kcj: vcj, kck: vck, kcl: vcl, kcm: vcm, kcn: vcn, kco: vco, kcp: vcp, kcq: vcq, kcr: vcr, kcs: vcs, kct: vct, kcu: vcu, kcv: vcv, kcw: vcw, kcx: vcx, kcy: vcy, kcz: vcz, kda: vda, kdb: vdb, kdc: vdc, kdd: vdd, kde: vde, kdf: vdf, kdg: vdg, kdh: vdh, kdi: vdi, kdj: vdj, kdk: vdk, kdl: vdl, kdm: vdm, kdn: vdn, kdo: vdo, kdp: vdp, kdq: vdq, kdr: vdr, kds: vds, kdt: vdt, kdu: vdu, kdv: vdv, kdw: vdw, kdx: vdx, kdy: vdy, kdz: vdz, kea: vea, keb: veb, kec: vec, ked: ved, kee: vee, kef: vef, keg: veg, keh: veh, kei: vei, kej: vej, kek: vek, kel: vel, kem: vem, ken: ven, keo: veo, kep: vep, keq: veq, ker: ver, kes: ves, ket: vet, keu: veu, kev: vev, kew: vew, kex: vex, key: vey, kez: vez, kfa: vfa, kfb: vfb, kfc: vfc, kfd: vfd, kfe: vfe, kff: vff, kfg: vfg, kfh: vfh, kfi: vfi, kfj: vfj, kfk: vfk, kfl: vfl, kfm: vfm, kfn: vfn, kfo: vfo, kfp: vfp, kfq: vfq, kfr: vfr, kfs: vfs, kft: vft, kfu: vfu, kfv: vfv, kfw: vfw, kfx: vfx, kfy: vfy, kfz: vfz, kga: vga, kgb: vgb, kgc: vgc, kgd: vgd, kge: vge, kgf: vgf, kgg: vgg, kgh: vgh, kgi: vgi, kgj: vgj, kgk: vgk, kgl: vgl, kgm: vgm, kgn: vgn, kgo: vgo, kgp: vgp, kgq: vgq, kgr: vgr} diff --git a/t/04_replace_on_add_server.t b/t/04_replace_on_add_server.t new file mode 100644 index 0000000..ba18108 --- /dev/null +++ b/t/04_replace_on_add_server.t @@ -0,0 +1,45 @@ +# -*- coding: utf-8; mode: cperl -*- +use Test::Base; +use t::TestUtil; +use t::ManyData; +use Data::Dumper; + +=pod +== Serverの追加 1 == + 1. Managerを起動する + 2. Serverを起動する + 3. kumoctl manager attachを実行する + 4. Gatewayを起動する + 5. このテストを実行する + 6.1. Serverを1台起動する + 6.2. kumoctl manager attachを実行する + +== Serverの追加 2 == + ... + 6.1. Serverを2台起動する + ... + +== Serverの追加 N == + ... + 6.1. ServerをN台起動する + ... +=cut + +plan tests => 1 * blocks; +filters { num => 'eval' }; + +foreach my $block (blocks()) { + set_many_data($block->num); +} + +wait_user_operation("Serverを追加してkumoctl localhost attachを実行"); + +run { + my $block = shift; + is get_many_data($block->num), 0, $block->name; +} + +__END__ +=== 1000entries +--- num: 1000 + diff --git a/t/05_replication.t b/t/05_replication.t new file mode 100644 index 0000000..c5cbe2c --- /dev/null +++ b/t/05_replication.t @@ -0,0 +1,38 @@ +# -*- coding: utf-8; mode: cperl -*- +use Test::Base; +use t::TestUtil; +use t::ManyData; + +=pod +== レプリケーション 1 == + 1. Managerを起動する + 2. Serverを起動する + 3. kumoctl manager attachを実行する + 4. Gatewayを起動する + 5. このテストを実行する + 6.1. Serverを1台落とす + +== レプリケーション 2 == + ... + 6.1. Serverを2台落とす + ... +=cut + +plan tests => 1 * blocks; +filters { num => 'eval' }; + +foreach my $block (blocks()) { + set_many_data($block->num); +} + +wait_user_operation("Serverを落とす"); + +run { + my $block = shift; + is get_many_data($block->num), 0, $block->name; +} + +__END__ +=== 1000entries +--- num: 1000 + diff --git a/t/06_getset_on_fault_server.t b/t/06_getset_on_fault_server.t new file mode 100644 index 0000000..f8deab6 --- /dev/null +++ b/t/06_getset_on_fault_server.t @@ -0,0 +1,57 @@ +# -*- coding: utf-8; mode: cperl -*- +use Test::Base; +use t::TestUtil; +use t::ManyData; + +=pod +== サーバー障害時のgetとset 1 == + 1. Managerを起動する + 2. Serverを起動する + 3. kumoctl manager attachを実行する + 4. Gatewayを起動する + 5. このテストを実行する + 6.1. Serverを1台落とす + +== サーバー障害時のgetとset 2 == + ... + 6.1. Serverを2台落とす + ... +=cut + +plan tests => 1 * blocks; +#filters { kv => 'eval' }; + +wait_user_operation("Serverを落とす"); + +run { + my $block = shift; + + my $mc0 = create_memcache_client(); + $mc0->set($block->key, $block->val); + undef $mc0; + + my $mc = create_memcache_client(); + is $mc->get($block->key), $block->val, $block->name; +} + +__END__ +=== alpha +--- key: curry +--- val: daisuki + +=== alnum +--- key: curry8 +--- val: 1ban + +=== num +--- key: 1 +--- val: 2 + +=== zero +--- key: 0 +--- val: 0 + +=== ja +--- key: キー +--- val: 表 + diff --git a/t/07_get_multi_on_fault_server.t b/t/07_get_multi_on_fault_server.t new file mode 100644 index 0000000..5ac9b68 --- /dev/null +++ b/t/07_get_multi_on_fault_server.t @@ -0,0 +1,56 @@ +# -*- coding: utf-8; mode: cperl -*- +use Test::Base; +use t::TestUtil; +use t::ManyData; + +=pod +== サーバー障害時のget_multi 1 == + 1. Managerを起動する + 2. Serverを起動する + 3. kumoctl manager attachを実行する + 4. Gatewayを起動する + 5. このテストを実行する + 6.1. Serverを1台落とす + +== サーバー障害時のget_multi 2 == + ... + 6.1. Serverを2台落とす + ... +=cut + +plan tests => 1 * blocks; +filters { data => 'yaml' }; + +wait_user_operation("Serverを落とす"); + +run { + my $block = shift; + + my %data = %{$block->data}; + my @keys = keys(%data); + + my $mc0 = create_memcache_client(); + foreach my $key (@keys) { + $mc0->set($key, $data{$key}); + } + undef $mc0; + + my $mc = create_memcache_client(); + my %result = %{$mc->get_multi(@keys)}; + is %result, %data, $block->name; +} + +# ruby -e 'c="a"; r=[]; 100.times { r << ["k#{c}", "v#{c}"]; c.succ! }; puts "{#{r.map{|k,v|"#{k}: #{v}"}.join(", ")}}"' + +__END__ +=== few +--- data +--- {ka: va, kb: vb, kc: vc} + +=== many +--- data +--- {ka: va, kb: vb, kc: vc, kd: vd, ke: ve, kf: vf, kg: vg, kh: vh, ki: vi, kj: vj, kk: vk, kl: vl, km: vm, kn: vn, ko: vo, kp: vp, kq: vq, kr: vr, ks: vs, kt: vt, ku: vu, kv: vv, kw: vw, kx: vx, ky: vy, kz: vz, kaa: vaa, kab: vab, kac: vac, kad: vad, kae: vae, kaf: vaf, kag: vag, kah: vah, kai: vai, kaj: vaj, kak: vak, kal: val, kam: vam, kan: van, kao: vao, kap: vap, kaq: vaq, kar: var, kas: vas, kat: vat, kau: vau, kav: vav, kaw: vaw, kax: vax, kay: vay, kaz: vaz, kba: vba, kbb: vbb, kbc: vbc, kbd: vbd, kbe: vbe, kbf: vbf, kbg: vbg, kbh: vbh, kbi: vbi, kbj: vbj, kbk: vbk, kbl: vbl, kbm: vbm, kbn: vbn, kbo: vbo, kbp: vbp, kbq: vbq, kbr: vbr, kbs: vbs, kbt: vbt, kbu: vbu, kbv: vbv, kbw: vbw, kbx: vbx, kby: vby, kbz: vbz, kca: vca, kcb: vcb, kcc: vcc, kcd: vcd, kce: vce, kcf: vcf, kcg: vcg, kch: vch, kci: vci, kcj: vcj, kck: vck, kcl: vcl, kcm: vcm, kcn: vcn, kco: vco, kcp: vcp, kcq: vcq, kcr: vcr, kcs: vcs, kct: vct, kcu: vcu, kcv: vcv} + +=== manymany +--- data +--- {ka: va, kb: vb, kc: vc, kd: vd, ke: ve, kf: vf, kg: vg, kh: vh, ki: vi, kj: vj, kk: vk, kl: vl, km: vm, kn: vn, ko: vo, kp: vp, kq: vq, kr: vr, ks: vs, kt: vt, ku: vu, kv: vv, kw: vw, kx: vx, ky: vy, kz: vz, kaa: vaa, kab: vab, kac: vac, kad: vad, kae: vae, kaf: vaf, kag: vag, kah: vah, kai: vai, kaj: vaj, kak: vak, kal: val, kam: vam, kan: van, kao: vao, kap: vap, kaq: vaq, kar: var, kas: vas, kat: vat, kau: vau, kav: vav, kaw: vaw, kax: vax, kay: vay, kaz: vaz, kba: vba, kbb: vbb, kbc: vbc, kbd: vbd, kbe: vbe, kbf: vbf, kbg: vbg, kbh: vbh, kbi: vbi, kbj: vbj, kbk: vbk, kbl: vbl, kbm: vbm, kbn: vbn, kbo: vbo, kbp: vbp, kbq: vbq, kbr: vbr, kbs: vbs, kbt: vbt, kbu: vbu, kbv: vbv, kbw: vbw, kbx: vbx, kby: vby, kbz: vbz, kca: vca, kcb: vcb, kcc: vcc, kcd: vcd, kce: vce, kcf: vcf, kcg: vcg, kch: vch, kci: vci, kcj: vcj, kck: vck, kcl: vcl, kcm: vcm, kcn: vcn, kco: vco, kcp: vcp, kcq: vcq, kcr: vcr, kcs: vcs, kct: vct, kcu: vcu, kcv: vcv, kcw: vcw, kcx: vcx, kcy: vcy, kcz: vcz, kda: vda, kdb: vdb, kdc: vdc, kdd: vdd, kde: vde, kdf: vdf, kdg: vdg, kdh: vdh, kdi: vdi, kdj: vdj, kdk: vdk, kdl: vdl, kdm: vdm, kdn: vdn, kdo: vdo, kdp: vdp, kdq: vdq, kdr: vdr, kds: vds, kdt: vdt, kdu: vdu, kdv: vdv, kdw: vdw, kdx: vdx, kdy: vdy, kdz: vdz, kea: vea, keb: veb, kec: vec, ked: ved, kee: vee, kef: vef, keg: veg, keh: veh, kei: vei, kej: vej, kek: vek, kel: vel, kem: vem, ken: ven, keo: veo, kep: vep, keq: veq, ker: ver, kes: ves, ket: vet, keu: veu, kev: vev, kew: vew, kex: vex, key: vey, kez: vez, kfa: vfa, kfb: vfb, kfc: vfc, kfd: vfd, kfe: vfe, kff: vff, kfg: vfg, kfh: vfh, kfi: vfi, kfj: vfj, kfk: vfk, kfl: vfl, kfm: vfm, kfn: vfn, kfo: vfo, kfp: vfp, kfq: vfq, kfr: vfr, kfs: vfs, kft: vft, kfu: vfu, kfv: vfv, kfw: vfw, kfx: vfx, kfy: vfy, kfz: vfz, kga: vga, kgb: vgb, kgc: vgc, kgd: vgd, kge: vge, kgf: vgf, kgg: vgg, kgh: vgh, kgi: vgi, kgj: vgj, kgk: vgk, kgl: vgl, kgm: vgm, kgn: vgn, kgo: vgo, kgp: vgp, kgq: vgq, kgr: vgr} diff --git a/t/08_replace_on_recover_server.t b/t/08_replace_on_recover_server.t new file mode 100644 index 0000000..fa26e29 --- /dev/null +++ b/t/08_replace_on_recover_server.t @@ -0,0 +1,41 @@ +# -*- coding: utf-8; mode: cperl -*- +use Test::Base; +use t::TestUtil; +use t::ManyData; + +=pod +== サーバー復旧時の再配置 1 == + 1. Managerを起動する + 2. Serverを起動する + 3. kumoctl manager attachを実行する + 4. Gatewayを起動する + 5. このテストを実行する + 6.1. Serverを1台落とす + 6.3. 落としたServerを再起動する + 6.4. kumoctl localhost attachを実行 + +== サーバー復旧時の再配置 2 == + ... + 6.1. Serverを2台落とす + ... +=cut + +plan tests => 1 * blocks; +filters { num => 'eval' }; + +foreach my $block (blocks()) { + set_many_data($block->num); +} + +wait_user_operation("Serverを落とす"); +wait_user_operation("Serverを復旧させてkumoctl localhost attachを実行"); + +run { + my $block = shift; + is get_many_data($block->num), 0, $block->name; +} + +__END__ +=== 1000entries +--- num: 1000 + diff --git a/t/09_server_fault_on_fault_manager.t b/t/09_server_fault_on_fault_manager.t new file mode 100644 index 0000000..b6c154c --- /dev/null +++ b/t/09_server_fault_on_fault_manager.t @@ -0,0 +1,11 @@ +=pod +== Manager障害時のServerの障害 == + 1. Managerを起動する + 2. Managerをもう1台起動する + 3. Managerを2台指定してServerを起動する + 4. kumoctl manager1 attachを実行する + 5. Gatewayを起動する + 6. Managerを1台落とす + 7. 04_から08_のテストを実行する +=cut + diff --git a/t/10_recover_manager.t b/t/10_recover_manager.t new file mode 100644 index 0000000..f02a187 --- /dev/null +++ b/t/10_recover_manager.t @@ -0,0 +1,12 @@ +=pod +== Manager復旧 == + 1. Managerを起動する + 2. Managerをもう1台起動する + 3. Managerを2台指定してServerを起動する + 4. kumoctl manager1 attachを実行する + 5. Gatewayを起動する + 6. Managerを1台落とす + 7. 落としたManagerを再起動する + 8. 04_から08_のテストを実行する +=cut + diff --git a/t/ManyData.pm b/t/ManyData.pm new file mode 100644 index 0000000..09b1e4e --- /dev/null +++ b/t/ManyData.pm @@ -0,0 +1,48 @@ +package t::ManyData; + +use strict; +use warnings; +use utf8; +use Exporter qw(import); +our @EXPORT = qw(set_many_data get_many_data wait_user_operation); +use Carp; +use t::TestUtil; + +# set_many_data(number_of_entries_to_set) +sub set_many_data($) { + my $mc = create_memcache_client(); + for(my $i=0; $i < $_[0]; ++$i) { + $mc->set($i, $i); + } + undef $mc; +} + +# get_many_data(number_of_entries_to_get) +sub get_many_data($) { + my $mc = create_memcache_client(); + for(my $i=0; $i < $_[0]; ++$i) { + if($mc->get($i) != $i) { + return -1; + } + } + undef $mc; + return 0; +} + +# wait_user_operation("message") +sub wait_user_operation($) { + print STDERR $_[0], " and press Enter key: "; + my $str = ; +} + +__END__ + +# for Emacsen +# Local Variables: +# mode: cperl +# cperl-indent-level: 4 +# indent-tabs-mode: nil +# coding: utf-8 +# End: + +# vi: set ts=4 sw=4 sts=0 : diff --git a/t/README b/t/README new file mode 100644 index 0000000..49f6f29 --- /dev/null +++ b/t/README @@ -0,0 +1,16 @@ +== == + +cpan install Test::More +cpan install Test::Base +cpan install Cache::Memcached::Fast +֤ۤʥ⥸塼ʤ衪פäƤ줿ŬƤ + +== how to test == + +cd kumofs +prove t/*.t + or +prove -v t/*.t + or +prove -v t/01_set.t + diff --git a/t/TestUtil.pm b/t/TestUtil.pm new file mode 100644 index 0000000..48aa012 --- /dev/null +++ b/t/TestUtil.pm @@ -0,0 +1,46 @@ +package t::TestUtil; + +use strict; +use warnings; +use utf8; +use Exporter qw(import); +our @EXPORT = qw(y d create_memcache_client); +use Carp; +use Data::Dumper; +$Data::Dumper::Indent = 1; +$Data::Dumper::Deepcopy = 1; +$Data::Dumper::Sortkeys = 1; +use YAML::Syck; +$YAML::Syck::ImplicitTyping = 1; +$YAML::Syck::SingleQuote = 1; + +use Cache::Memcached::Fast; + +sub y(@) { + print YAML::Syck::Dump(\@_); +} +sub d(@) { + my $d = Dumper(\@_); + $d =~ s/\\x{([0-9a-z]+)}/chr(hex($1))/ge; + print $d; +} + +# create_memcache_client({ servers => [ '127.0.0.1:11211' ] }) +sub create_memcache_client { + my $opt = shift; + $opt->{servers} = [ '127.0.0.1:11211' ] unless exists $opt->{servers}; + return Cache::Memcached::Fast->new($opt); +} + + +__END__ + +# for Emacsen +# Local Variables: +# mode: cperl +# cperl-indent-level: 4 +# indent-tabs-mode: nil +# coding: utf-8 +# End: + +# vi: set ts=4 sw=4 sts=0 : diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..b12298b --- /dev/null +++ b/test/Makefile @@ -0,0 +1,15 @@ +CXXFLAGS = -Wall -g -O0 + +LDFLAGS = -lpthread -lm -Wl,-Bstatic -lmemcached -Wl,-Bdynamic + +TESTS = test_get test_delete test_multi test_nowrite test_set test_set_get_delete test_bench check_set_get_delete membench + +%: %.c + $(CC) $< $(CXXFLAGS) $(LDFLAGS) -o $@ + +all: $(TESTS) + +.PHONY: clean +clean: + $(RM) $(TESTS) + diff --git a/test/check_set_get_delete.c b/test/check_set_get_delete.c new file mode 100644 index 0000000..c8f67a9 --- /dev/null +++ b/test/check_set_get_delete.c @@ -0,0 +1,100 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmemcached/memcached.h" + +#define KEY_PREFIX "key" +#define VAL_PREFIX "val" + +void usage(void) +{ + printf("usage: ./test \n"); + exit(1); +} + +void pexit(const char* msg) +{ + perror(msg); + exit(1); +} + +int main(int argc, char* argv[]) +{ + if(argc < 4) { usage(); } + + char* host = argv[1]; + + unsigned short port = atoi(argv[2]); + if(port == 0) { usage(); } + + uint32_t num = atoi(argv[3]); + if(num == 0) { usage(); } + + memcached_st* mc = memcached_create(NULL); + if(mc == NULL) { pexit("memcached_create"); } + + memcached_server_add(mc, host, port); + +// memcached_behavior_set(mc, MEMCACHED_BEHAVIOR_BINARY_PROTOCOL, 1); + + char kbuf[strlen(KEY_PREFIX) + 11]; + char vbuf[strlen(VAL_PREFIX) + 11]; + +while(1) { + printf("s"); fflush(stdout); + uint32_t i; + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + int vlen = sprintf(vbuf, VAL_PREFIX "%d", i); + memcached_return ret = memcached_set(mc, kbuf, klen, vbuf, vlen, 0, 0); + if(ret != MEMCACHED_SUCCESS) { + fprintf(stderr, "set failed %d '%s'\n", ret, kbuf); + } + } + + printf("g"); fflush(stdout); + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + int vlen = sprintf(vbuf, VAL_PREFIX "%d", i); + size_t vallen; + uint32_t flags; + memcached_return rc; + char* val = memcached_get(mc, kbuf, klen, &vallen, &flags, &rc); + if(!val || vallen != vlen || memcmp(val, vbuf, vlen) != 0) { + fprintf(stderr, "false NEGATIVE get '%s'\n", kbuf); + } + } + + printf("d"); fflush(stdout); + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + memcached_return ret = memcached_delete(mc, kbuf, klen, 0); + if(ret != MEMCACHED_SUCCESS) { + fprintf(stderr, "delete not performed %d '%s'\n", ret, kbuf); + } + } + + printf("g"); fflush(stdout); + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + //int vlen = sprintf(vbuf, VAL_PREFIX "%d", i); + size_t vallen; + uint32_t flags; + memcached_return rc; + char* val = memcached_get(mc, kbuf, klen, &vallen, &flags, &rc); + if(val) { + fprintf(stderr, "false POSITIVE get '%s'\n", kbuf); + } + } + +} + + return 0; +} + diff --git a/test/membench.c b/test/membench.c new file mode 100644 index 0000000..fa45730 --- /dev/null +++ b/test/membench.c @@ -0,0 +1,419 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef USE_APR +#include +#else +#include +#endif + +extern char* optarg; +extern int optint, opterr, optopt; +const char* g_progname; + +static const char* g_host = "127.0.0.1"; +static unsigned short g_port = 11211; + +static unsigned long g_num_request; +static unsigned long g_num_thread; +static unsigned long g_keylen = 8; +static unsigned long g_vallen = 1024; +static bool g_binary = false; +static bool g_noset = false; +static bool g_noget = false; + +static pthread_mutex_t g_count_lock; +static pthread_cond_t g_count_cond; +static volatile int g_thread_count; +static pthread_mutex_t g_thread_lock; + +#define KEY_FILL 'k' + +static struct timeval g_timer; + +void reset_timer() +{ + gettimeofday(&g_timer, NULL); +} + +void show_timer() +{ + struct timeval endtime; + double sec; + unsigned long size_bytes = (g_keylen+g_vallen) * g_num_request * g_num_thread; + unsigned long requests = g_num_request * g_num_thread; + gettimeofday(&endtime, NULL); + sec = (endtime.tv_sec - g_timer.tv_sec) + + (double)(endtime.tv_usec - g_timer.tv_usec) / 1000 / 1000; + printf("%f sec\n", sec); + printf("%f MB\n", ((double)size_bytes)/1024/1024); + printf("%f Mbps\n", ((double)size_bytes)*8/sec/1000/1000); + printf("%f req/sec\n", ((double)requests)/sec); + printf("%f usec/req\n", ((double)sec)/requests*1000*1000); +} + + +static pthread_t* create_worker(void* (*func)(void*)) +{ + unsigned long i; + pthread_t* threads = malloc(sizeof(pthread_t)*g_num_thread); + + pthread_mutex_lock(&g_thread_lock); + g_thread_count = 0; + + for(i=0; i < g_num_thread; ++i) { + int err = pthread_create(&threads[i], NULL, func, NULL); + if(err != 0) { + fprintf(stderr, "failed to create thread: %s\n", strerror(err)); + exit(1); + } + } + + pthread_mutex_lock(&g_count_lock); + while(g_thread_count < g_num_thread) { + pthread_cond_wait(&g_count_cond, &g_count_lock); + } + pthread_mutex_unlock(&g_count_lock); + + return threads; +} + +static void start_worker() +{ + pthread_mutex_unlock(&g_thread_lock); +} + +static void join_worker(pthread_t* threads) +{ + unsigned long i; + for(i=0; i < g_num_thread; ++i) { + void* ret; + int err = pthread_join(threads[i], &ret); + if(err != 0) { + fprintf(stderr, "failed to join thread: %s\n", strerror(err)); + } + } +} + +static unsigned long wait_worker_ready() +{ + unsigned long index; + pthread_mutex_lock(&g_count_lock); + index = g_thread_count++; + pthread_cond_signal(&g_count_cond); + pthread_mutex_unlock(&g_count_lock); + pthread_mutex_lock(&g_thread_lock); + pthread_mutex_unlock(&g_thread_lock); + return index; +} + + +#ifdef USE_APR +typedef struct apr_memcache_st { + apr_pool_t* p; + apr_memcache_server_t* srv; + apr_memcache_t* mc; +} apr_memcache_st; + +static apr_memcache_st initialize_user() +{ + apr_memcache_st st; + + if(apr_pool_create(&st.p, NULL) != APR_SUCCESS) { + perror("apr_pool_create failed"); + exit(1); + } + + if(apr_memcache_create(st.p, 10, 0, &st.mc) != APR_SUCCESS) { + perror("apr_memcached_create failed"); + exit(1); + } + + if(apr_memcache_server_create(st.p, g_host, g_port, + 1, 1, 1, 600, &st.srv) != APR_SUCCESS) { + //32, 64, 128, 120, &st.srv) != APR_SUCCESS) { + perror("apr_memcache_server_create failed"); + exit(1); + } + + if(apr_memcache_add_server(st.mc, st.srv) != APR_SUCCESS) { + perror("apr_memcache_add_server failed"); + exit(1); + } + + return st; +} +#else +static memcached_st* initialize_user() +{ + memcached_st* st = memcached_create(NULL); + if(!st) { + perror("memcached_create failed"); + exit(1); + } + + memcached_server_add(st, g_host, g_port); + if(g_binary) { + memcached_behavior_set(st, MEMCACHED_BEHAVIOR_BINARY_PROTOCOL, 1); + } + + //memcached_behavior_set(st, MEMCACHED_BEHAVIOR_POLL_TIMEOUT, 20*1000); + //memcached_behavior_set(st, MEMCACHED_BEHAVIOR_CONNECT_TIMEOUT, 20*1000); + + return st; +} +#endif + +static char* malloc_keybuf() +{ + char* keybuf = malloc(g_keylen+1); + if(!keybuf) { + perror("malloc for key failed"); + exit(1); + } + memset(keybuf, KEY_FILL, g_keylen); + keybuf[g_keylen] = '\0'; + return keybuf; +} + +static char* malloc_valbuf() +{ + char* valbuf = malloc(g_vallen); + if(!valbuf) { + perror("malloc for value failed"); + exit(1); + } + memset(valbuf, 'v', g_vallen); + //memset(valbuf, 0, g_vallen); + return valbuf; +} + +static void pack_keynum(char* keybuf, uint32_t i) +{ + /* 0x40 - 0x4f is printable ascii character */ + unsigned char* prefix = (unsigned char*)keybuf + g_keylen - 8; + prefix[0] = ((i >> 0) & 0x0f) + 0x40; + prefix[1] = ((i >> 4) & 0x0f) + 0x40; + prefix[2] = ((i >> 8) & 0x0f) + 0x40; + prefix[3] = ((i >>12) & 0x0f) + 0x40; + prefix[4] = ((i >>16) & 0x0f) + 0x40; + prefix[5] = ((i >>20) & 0x0f) + 0x40; + prefix[6] = ((i >>24) & 0x0f) + 0x40; + prefix[7] = ((i >>28) & 0x0f) + 0x40; +} + +static void* worker_set(void* trash) +{ + unsigned long i, t; +#ifdef USE_APR + char errstr[256]; + apr_status_t ret; + apr_memcache_st st = initialize_user(); +#else + memcached_return ret; + memcached_st* st = initialize_user(); +#endif + char* keybuf = malloc_keybuf(); + char* valbuf = malloc_valbuf(); + + printf("s"); + t = wait_worker_ready(); + + for(i=t*g_num_request, t=i+g_num_request; i < t; ++i) { + pack_keynum(keybuf, i); +#ifdef USE_APR + ret = apr_memcache_set(st.mc, keybuf, valbuf, g_vallen, 0, 0); + if(ret != APR_SUCCESS) { + fprintf(stderr, "set failed: %s\n", apr_strerror(ret,errstr,sizeof(errstr))); + } +#else + ret = memcached_set(st, keybuf, g_keylen, valbuf, g_vallen, 0, 0); + if(ret != MEMCACHED_SUCCESS) { + fprintf(stderr, "set failed: %s\n", memcached_strerror(st, ret)); + } +#endif + } + + free(keybuf); + free(valbuf); +#ifdef USE_APR + apr_pool_destroy(st.p); +#else + memcached_free(st); +#endif + return NULL; +} + +static void* worker_get(void* trash) +{ + unsigned long i, t; + size_t vallen; + uint32_t flags; + char* value; +#ifdef USE_APR + char errstr[256]; + apr_status_t ret; + apr_memcache_st st = initialize_user(); +#else + memcached_return ret; + memcached_st* st = initialize_user(); +#endif + char* keybuf = malloc_keybuf(); + + printf("g"); + t = wait_worker_ready(); + + for(i=t*g_num_request, t=i+g_num_request; i < t; ++i) { + pack_keynum(keybuf, i); +#ifdef USE_APR + ret = apr_memcache_getp(st.mc, st.p, keybuf, &value, &vallen, &flags); + if(ret != APR_SUCCESS) { + fprintf(stderr, "get failed: %s\n", apr_strerror(ret,errstr,sizeof(errstr))); + } else if(!value) { + fprintf(stderr, "get failed: key not found\n"); + } +#else + value = memcached_get(st, keybuf, g_keylen, + &vallen, &flags, &ret); + if(ret != MEMCACHED_SUCCESS) { + fprintf(stderr, "get failed: %s\n", memcached_strerror(st, ret)); + } else if(!value) { + fprintf(stderr, "get failed: key not found\n"); + } + free(value); +#endif + } + + free(keybuf); +#ifdef USE_APR + apr_pool_destroy(st.p); +#else + memcached_free(st); +#endif + return NULL; +} + + +static void usage(const char* msg) +{ + printf("Usage: %s [options] \n" + " -l HOST=127.0.0.1 : memcached server address\n" + " -p PORT=11211 : memcached server port\n" + " -k SIZE=8 : size of key >= 8\n" + " -v SIZE=1024 : size of value\n" + " -b : use binary protocol\n" + " -x : omit to set values\n" + " -s : omit to get benchmark\n" + " -h : print this help message\n" + , g_progname); + if(msg) { printf("error: %s\n", msg); } + exit(1); +} + +static void parse_argv(int argc, char* argv[]) +{ + int c; + g_progname = argv[0]; + while((c = getopt(argc, argv, "hbxsl:p:k:v:")) != -1) { + switch(c) { + case 'l': + g_host = optarg; + break; + + case 'p': + g_port = atoi(optarg); + if(g_port == 0) { usage("invalid port number"); } + break; + + case 'k': + g_keylen = atoi(optarg); + if(g_keylen < 8) { usage("invalid key size"); } + break; + + case 'v': + g_vallen = atoi(optarg); + if(g_vallen == 0) { usage("invalid value size"); } + break; + + case 'b': + g_binary = true; + break; + + case 'x': + g_noset = true; + break; + + case 's': + g_noget = true; + break; + + case 'h': /* FALL THROUGH */ + case '?': /* FALL THROUGH */ + default: + usage(NULL); + } + } + + argc -= optind; + + if(argc != 2) { usage(NULL); } + + g_num_thread = atoi(argv[optind]); + g_num_request = atoi(argv[optind+1]) / g_num_thread; + + if(g_num_request == 0) { usage("invalid number of request"); } + + printf("number of threads : %lu\n", g_num_thread); + printf("number of requests : %lu\n", g_num_thread * g_num_request); + printf("requests per thread : %lu\n", g_num_request); + printf("size of key : %lu bytes\n", g_keylen); + printf("size of value : %lu bytes\n", g_vallen); +} + +int main(int argc, char* argv[]) +{ + pthread_t* threads; + + parse_argv(argc, argv); + +#ifdef USE_APR + apr_initialize(); +#endif + + signal(SIGPIPE, SIG_IGN); + + pthread_mutex_init(&g_count_lock, NULL); + pthread_cond_init(&g_count_cond, NULL); + pthread_mutex_init(&g_thread_lock, NULL); + + if(!g_noset) { + printf("----\n["); + threads = create_worker(worker_set); + reset_timer(); + printf("] ...\n"); + start_worker(); + join_worker(threads); + show_timer(); + } + + if(!g_noget) { + printf("----\n["); + threads = create_worker(worker_get); + reset_timer(); + printf("] ...\n"); + start_worker(); + join_worker(threads); + show_timer(); + } + + return 0; +} + diff --git a/test/test_bench.c b/test/test_bench.c new file mode 100644 index 0000000..16e207d --- /dev/null +++ b/test/test_bench.c @@ -0,0 +1,262 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern char* optarg; +extern int optint, opterr, optopt; +const char* g_progname; + +static const char* g_host = "127.0.0.1"; +static unsigned short g_port = 11211; + +static uint32_t g_num_request; +static uint32_t g_num_thread; +static size_t g_keylen = 8; +static size_t g_vallen = 1024; +static bool g_binary = false; +static bool g_noset = false; +static pthread_mutex_t g_thread_lock; + +#define KEY_FILL 'k' + +static struct timeval g_timer; + +void reset_timer() +{ + gettimeofday(&g_timer, NULL); +} + +void show_timer() +{ + size_t size_bytes = (g_keylen+g_vallen) * g_num_request * g_num_thread; + size_t requests = g_num_request * g_num_thread; + + struct timeval endtime; + double sec; + gettimeofday(&endtime, NULL); + sec = (endtime.tv_sec - g_timer.tv_sec) + + (double)(endtime.tv_usec - g_timer.tv_usec) / 1000 / 1000; + printf("%f sec\n", sec); + printf("%f MB\n", ((double)size_bytes)/1024/1024); + printf("%f Mbps\n", ((double)size_bytes)*8/sec/1000/1000); + printf("%f req/sec\n", ((double)requests)/sec); +} + + +static void usage() +{ + printf("Usage: %s [options] \n" + " -l HOST : memcached server address\n" + " -p PORT : memcached server port\n" + " -k SIZE=8 : size of key >= 8\n" + " -v SIZE=1024 : size of value\n" + " -b : use binary protocol\n" + " -x : omit to set initial values\n" + " -h : print this help message\n" + , g_progname); + exit(1); +} + +static void parse_argv(int argc, char* argv[]) +{ + g_progname = argv[0]; + int c; + while((c = getopt(argc, argv, "hbxl:p:k:v:")) != -1) { + switch(c) { + case 'l': + g_host = optarg; + break; + + case 'p': + g_port = atoi(optarg); + if(g_port == 0) { usage(); } + break; + + case 'k': + g_keylen = atoi(optarg); + if(g_keylen < 8) { usage(); } + break; + + case 'v': + g_vallen = atoi(optarg); + break; + + case 'b': + g_binary = true; + break; + + case 'x': + g_noset = true; + break; + + case 'h': /* FALL THROUGH */ + case '?': /* FALL THROUGH */ + default: + usage(); + } + } + + argc -= optind; + + if(argc != 2) { usage(); } + + g_num_thread = atoi(argv[optind]); + g_num_request = atoi(argv[optind+1]); + + printf("number of threads : %u\n", g_num_thread); + printf("requests per thread : %u\n", g_num_request); + printf("size of key : %lu bytes\n", g_keylen); + printf("size of value : %lu bytes\n", g_vallen); +} + + +static memcached_st* initialize_user() +{ + memcached_st* st = memcached_create(NULL); + if(!st) { + perror("memcached_create failed"); + exit(1); + } + + memcached_server_add(st, g_host, g_port); + if(g_binary) { + memcached_behavior_set(st, MEMCACHED_BEHAVIOR_BINARY_PROTOCOL, 1); + } + + return st; +} + +inline void pack_keynum(char* keybuf, uint32_t i) +{ + // 0x40 - 0x4f is printable ascii character + unsigned char* prefix = (unsigned char*)keybuf + g_keylen - 8; + prefix[0] = ((i >> 0) & 0x0f) + 0x40; + prefix[1] = ((i >> 4) & 0x0f) + 0x40; + prefix[2] = ((i >> 8) & 0x0f) + 0x40; + prefix[3] = ((i >>12) & 0x0f) + 0x40; + prefix[4] = ((i >>16) & 0x0f) + 0x40; + prefix[5] = ((i >>20) & 0x0f) + 0x40; + prefix[6] = ((i >>24) & 0x0f) + 0x40; + prefix[7] = ((i >>28) & 0x0f) + 0x40; +} + +static void* bench_func(void* trash) +{ + printf("start thread ok.\n"); + + char* keybuf = malloc(g_keylen); + if(!keybuf) { + perror("malloc for key failed"); + exit(1); + } + memset(keybuf, KEY_FILL, g_keylen); + + memcached_st* st = initialize_user(); + + pthread_mutex_lock(&g_thread_lock); + pthread_mutex_unlock(&g_thread_lock); + + memcached_return ret; + uint32_t i; + for(i=0; i < g_num_request; ++i) { + pack_keynum(keybuf, i); + size_t vallen; + uint32_t flags; + char* value = memcached_get(st, keybuf, g_keylen, + &vallen, &flags, &ret); + if(ret != MEMCACHED_SUCCESS) { + fprintf(stderr, "get failed: %s\n", + memcached_strerror(st, ret)); + } else if(!value) { + fprintf(stderr, "get failed: key not found\n"); + } + free(value); + } + + memcached_free(st); + return NULL; +} + +int main(int argc, char* argv[]) +{ + uint32_t i; + memcached_return ret; + + parse_argv(argc, argv); + + signal(SIGPIPE, SIG_IGN); + + + printf("starting threads ...\n"); + + pthread_mutex_init(&g_thread_lock, NULL); + pthread_mutex_lock(&g_thread_lock); + + pthread_t threads[g_num_thread]; + for(i=0; i < g_num_thread; ++i) { + int err = pthread_create(&threads[i], NULL, bench_func, NULL); + if(err != 0) { + fprintf(stderr, "failed to create thread: %s\n", strerror(err)); + exit(1); + } + } + + + if(!g_noset) { + printf("setting initial values ...\n"); + + memcached_st* st = initialize_user(); + + char* keybuf = malloc(g_keylen); + if(!keybuf) { + perror("malloc for key failed"); + exit(1); + } + memset(keybuf, KEY_FILL, g_keylen); + + char* valbuf = malloc(g_vallen); + if(!valbuf) { + perror("malloc for value failed"); + exit(1); + } + memset(valbuf, 0, g_vallen); + + for(i=0; i < g_num_request; ++i) { + pack_keynum(keybuf, i); + ret = memcached_set(st, keybuf, g_keylen, valbuf, g_vallen, 0, 0); + if(ret != MEMCACHED_SUCCESS) { + fprintf(stderr, "put failed: %s\n", + memcached_strerror(st, ret)); + } + } + + free(valbuf); + memcached_free(st); + } + + printf("start benchmark ...\n"); + + reset_timer(); + pthread_mutex_unlock(&g_thread_lock); + + for(i=0; i < g_num_thread; ++i) { + void* ret; + int err = pthread_join(threads[i], &ret); + if(err != 0) { + fprintf(stderr, "failed to join thread: %s\n", strerror(err)); + } + } + show_timer(); + + return 0; +} + diff --git a/test/test_bench2.c b/test/test_bench2.c new file mode 100644 index 0000000..a848069 --- /dev/null +++ b/test/test_bench2.c @@ -0,0 +1,307 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern char* optarg; +extern int optint, opterr, optopt; +const char* g_progname; + +static enum { + TEST_GET, + TEST_SET, +} g_test; + +static const char* g_host = "127.0.0.1"; +static unsigned short g_port = 11211; + +static uint32_t g_num_request; +static uint32_t g_num_thread; +static size_t g_keylen = 16; +static size_t g_vallen = 400; +static bool g_binary = false; +static pthread_mutex_t g_thread_lock; + +#define KEY_FILL "k" +#define KEY_LENGTH (strlen(KEY_PREFIX)+8) + +static struct timeval g_timer; + +void reset_timer() +{ + gettimeofday(&g_timer, NULL); +} + +void show_timer() +{ + size_t size_bytes = (KEY_LENGTH+g_vallen) * g_num_request * g_num_thread; + size_t requests = g_num_request * g_num_thread; + + struct timeval endtime; + double sec; + gettimeofday(&endtime, NULL); + sec = (endtime.tv_sec - g_timer.tv_sec) + + (double)(endtime.tv_usec - g_timer.tv_usec) / 1000 / 1000; + printf("%f sec\n", sec); + printf("%f MB\n", ((double)size_bytes)/1024/1024); + printf("%f Mbps\n", ((double)size_bytes)*8/sec/1000/1000); + printf("%f req/sec\n", ((double)requests)/sec); +} + + +static void usage() +{ + printf( + "Usage: %s set [options] \n" + " set key0 up to key$() using threads.\n" + " must be a multile of .\n" + " -l HOST : memcached server address\n" + " -p PORT : memcached server port\n" + " -b : use binary protocol\n" + " -k : size of key >= 8\n" + " -v : size of value\n" + " -h : print this help message\n" + "\n" + "Usage: %s get [options] \n" + " each thread gets key0 up to key$().\n" + " the keys have to be set initially.\n" + " -l HOST : memcached server address\n" + " -p PORT : memcached server port\n" + " -b : use binary protocol\n" + " -k : size of key >= 8\n" + " -v : size of value\n" + " -h : print this help message\n" + , g_progname + , g_progname); + exit(1); +} + +static void parse_argv(int argc, char* argv[]) +{ + g_progname = argv[0]; + int c; + while((c = getopt(argc, argv, "l:p:bkvh")) != -1) { + switch(c) { + case 'l': + g_host = optarg; + break; + + case 'p': + g_port = atoi(optarg); + break; + + case 'b': + g_binary = true; + break; + + case 'k': + g_keylen = atoi(optarg); + break; + + case 'v': + g_vallen = atoi(optarg); + break; + + case 'h': /* FALL THROUGH */ + case '?': /* FALL THROUGH */ + default: + usage(); + } + } + + argc -= optind; + + if(argc != 3) { usage(); } + + if(strcmp(argv[1], "get") == 0) { + g_test = TEST_GET; + } else if(strcmp(argv[1], "set") == 0) { + g_test = TEST_SET; + } else { + usage(); + } + + g_num_thread = atoi(argv[optind]); + g_num_request = atoi(argv[optind+1]); + + if(g_test == TEST_SET && !g_num_request % g_num_thread != 0) { + usage(); + } + + if(g_test == TEST_SET) { + printf("set benchmark\n"); + printf("protocol type : %s\n", g_binary ? "binary" : "text"); + printf("number of threads : %u\n", g_num_thread); + printf("number of entries : %u\n", g_num_request); + printf("key size : %u\n", g_keylen); + printf("value size : %u\n", g_vallen); + } else { + printf("get benchmark\n"); + printf("protocol type : %s\n", g_binary ? "binary" : "text"); + printf("number of threads : %u\n", g_num_thread); + printf("requests per thread : %u\n", g_num_request); + printf("key size : %u\n", g_keylen); + printf("value size : %u\n", g_vallen); + } +} + + +static memcached_st* initialize_user() +{ + memcached_st* st = memcached_create(NULL); + if(!st) { + perror("memcached_create failed"); + exit(1); + } + + memcached_server_add(st, g_host, g_port); + if(g_binary) { + memcached_behavior_set(st, MEMCACHED_BEHAVIOR_BINARY_PROTOCOL, 1); + } + + return st; +} + +inline void fill_key_prefix(char* keybuf) +{ + memset(keybuf, KEY_FILL, g_keylen-8); +} + +inline void pack_keynum(char* keybuf, uint32_t i) +{ + // 0x40 - 0x4f is printable ascii character + unsigned char* prefix = (unsigned char*)keybuf + strlen(KEY_PREFIX); + prefix[0] = ((i >> 0) & 0x0f) + 0x40; + prefix[1] = ((i >> 4) & 0x0f) + 0x40; + prefix[2] = ((i >> 8) & 0x0f) + 0x40; + prefix[3] = ((i >>12) & 0x0f) + 0x40; + prefix[4] = ((i >>16) & 0x0f) + 0x40; + prefix[5] = ((i >>20) & 0x0f) + 0x40; + prefix[6] = ((i >>24) & 0x0f) + 0x40; + prefix[7] = ((i >>28) & 0x0f) + 0x40; +} + +static void* thread_set(void* segment) +{ + uint32_t s = *(uint32_t*)segment; + uint32_t begin = g_num_request / s; + uint32_t end = begin + s; + + char keybuf[KEY_LENGTH]; + fill_key_prefix(keybuf); + + memcached_st* st = initialize_user(); + + char* valbuf = malloc(g_vallen); + if(!valbuf) { + perror("malloc() failed"); + exit(1); + } + memset(valbuf, 0, g_vallen); + + pthread_mutex_lock(&g_thread_lock); + pthread_mutex_unlock(&g_thread_lock); + + uint32_t i; + for(i=begin; i < end; ++i) { + pack_keynum(keybuf, i); + memcached_return ret = + memcached_set(st, keybuf, sizeof(keybuf), valbuf, g_vallen, 0, 0); + if(ret != MEMCACHED_SUCCESS) { + fprintf(stderr, "put failed: %s\n", memcached_strerror(st, ret)); + } + } + + free(valbuf); + memcached_free(st); + return NULL; +} + +static void* thread_get(void* trash) +{ + char keybuf[KEY_LENGTH]; + memcpy(keybuf, KEY_PREFIX, strlen(KEY_PREFIX)); + + memcached_st* st = initialize_user(); + + pthread_mutex_lock(&g_thread_lock); + pthread_mutex_unlock(&g_thread_lock); + + uint32_t i; + for(i=0; i < g_num_request; ++i) { + pack_keynum(keybuf, i); + size_t vallen; + uint32_t flags; + memcached_return ret; + char* value = memcached_get(st, keybuf, sizeof(keybuf), + &vallen, &flags, &ret); + if(ret != MEMCACHED_SUCCESS) { + fprintf(stderr, "get failed: %s\n", memcached_strerror(st, ret)); + } else if(!value) { + fprintf(stderr, "get failed: key not found\n"); + } + } + + memcached_free(st); + return NULL; +} + +int main(int argc, char* argv[]) +{ + uint32_t i; + memcached_return ret; + + parse_argv(argc, argv); + + signal(SIGPIPE, SIG_IGN); + + + printf("starting threads ...\n"); + + pthread_mutex_init(&g_thread_lock, NULL); + pthread_mutex_lock(&g_thread_lock); + + void* (func)(void*); + switch(g_test) { + case TEST_GET: + func = thread_get; break; + case TEST_SET: + func = thread_set; break; + } + + pthread_t threads[g_num_thread]; + for(i=0; i < g_num_thread; ++i) { + int err = pthread_create(&threads[i], NULL, func, (void*)&i); + if(err != 0) { + fprintf(stderr, "failed to create thread: %s\n", strerror(err)); + exit(1); + } + } + + pthread_yield(); + pthread_yield(); + printf("start benchmark ...\n"); + + reset_timer(); + pthread_mutex_unlock(&g_thread_lock); + + for(i=0; i < g_num_thread; ++i) { + void* ret; + int err = pthread_join(threads[i], &ret); + if(err != 0) { + fprintf(stderr, "failed to join thread: %s\n", strerror(err)); + } + } + show_timer(); + + return 0; +} + diff --git a/test/test_delete.c b/test/test_delete.c new file mode 100644 index 0000000..26768fb --- /dev/null +++ b/test/test_delete.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmemcached/memcached.h" + +#define KEY_PREFIX "key" +#define VAL_PREFIX "val" + +void usage(void) +{ + printf("usage: ./test \n"); + exit(1); +} + +void pexit(const char* msg) +{ + perror(msg); + exit(1); +} + +int main(int argc, char* argv[]) +{ + if(argc < 4) { usage(); } + + char* host = argv[1]; + + unsigned short port = atoi(argv[2]); + if(port == 0) { usage(); } + + uint32_t num = atoi(argv[3]); + if(num == 0) { usage(); } + + memcached_st* mc = memcached_create(NULL); + if(mc == NULL) { pexit("memcached_create"); } + + memcached_server_add(mc, host, port); + +// memcached_behavior_set(mc, MEMCACHED_BEHAVIOR_BINARY_PROTOCOL, 1); + + char kbuf[strlen(KEY_PREFIX) + 11]; + char vbuf[strlen(VAL_PREFIX) + 11]; + +while(1) { + uint32_t i; + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + int vlen = sprintf(vbuf, VAL_PREFIX "%d", i); + printf("set '%s' = '%s'\n", kbuf, vbuf); + memcached_set(mc, kbuf, klen, vbuf, vlen, 0, 0); + } + + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + printf("delete '%s'\n", kbuf); + memcached_delete(mc, kbuf, klen, 0); + } +} + + return 0; +} + diff --git a/test/test_get.c b/test/test_get.c new file mode 100644 index 0000000..9208ec2 --- /dev/null +++ b/test/test_get.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmemcached/memcached.h" + +#define KEY_PREFIX "key" +#define VAL_PREFIX "val" + +void usage(void) +{ + printf("usage: ./test \n"); + exit(1); +} + +void pexit(const char* msg) +{ + perror(msg); + exit(1); +} + +int main(int argc, char* argv[]) +{ + if(argc < 4) { usage(); } + + char* host = argv[1]; + + unsigned short port = atoi(argv[2]); + if(port == 0) { usage(); } + + uint32_t num = atoi(argv[3]); + if(num == 0) { usage(); } + + memcached_st* mc = memcached_create(NULL); + if(mc == NULL) { pexit("memcached_create"); } + + memcached_server_add(mc, host, port); + +// memcached_behavior_set(mc, MEMCACHED_BEHAVIOR_BINARY_PROTOCOL, 1); + + char kbuf[strlen(KEY_PREFIX) + 11]; + char vbuf[strlen(VAL_PREFIX) + 11]; + + uint32_t i; + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + int vlen = sprintf(vbuf, VAL_PREFIX "%d", i); + printf("set '%s' = '%s'\n", kbuf, vbuf); + memcached_set(mc, kbuf, klen, vbuf, vlen, 0, 0); + } + +while(1) { + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + int vlen = sprintf(vbuf, VAL_PREFIX "%d", i); + size_t vallen; + uint32_t flags; + memcached_return rc; + char* val = memcached_get(mc, kbuf, klen, &vallen, &flags, &rc); + if(!val) { + fprintf(stderr, "** key '%s' not found **\n", kbuf); + } else if(vallen != vlen || memcmp(val, vbuf, vlen) != 0) { + fprintf(stderr, "** key '%s' not match ** '", kbuf); + fwrite(val, vallen, 1, stderr); + fprintf(stderr, "'\n"); + } else { + printf("get '%s' = '", kbuf); + fwrite(val, vallen, 1, stdout); + printf("'\n"); + } + } +} + + return 0; +} + diff --git a/test/test_multi.c b/test/test_multi.c new file mode 100644 index 0000000..613ff41 --- /dev/null +++ b/test/test_multi.c @@ -0,0 +1,131 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmemcached/memcached.h" + +#define KEY_PREFIX "key" +#define VAL_PREFIX "val" + +void usage(void) +{ + printf("usage: ./test \n"); + exit(1); +} + +void pexit(const char* msg) +{ + perror(msg); + exit(1); +} + +int main(int argc, char* argv[]) +{ + if(argc < 4) { usage(); } + + char* host = argv[1]; + + unsigned short port = atoi(argv[2]); + if(port == 0) { usage(); } + + uint32_t num = atoi(argv[3]); + if(num == 0) { usage(); } + + memcached_st* mc = memcached_create(NULL); + if(mc == NULL) { pexit("memcached_create"); } + + memcached_server_add(mc, host, port); + + memcached_behavior_set(mc, MEMCACHED_BEHAVIOR_BINARY_PROTOCOL, 1); + //memcached_behavior_set(mc, MEMCACHED_BEHAVIOR_POLL_TIMEOUT, 20*1000); + //memcached_behavior_set(mc, MEMCACHED_BEHAVIOR_CONNECT_TIMEOUT, 20*1000); + + char kbuf[strlen(KEY_PREFIX) + 11]; + char vbuf[strlen(VAL_PREFIX) + 11]; + + uint32_t i; + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + int vlen = sprintf(vbuf, VAL_PREFIX "%d", i); + printf("set '%s' = '%s'\n", kbuf, vbuf); + memcached_set(mc, kbuf, klen, vbuf, vlen, 0, 0); + } + + static const int multi = 10; + + char rmkbuf[multi][strlen(KEY_PREFIX) + 11]; + char rmvbuf[multi][strlen(VAL_PREFIX) + 11]; + size_t mklen[multi]; + size_t mvlen[multi]; + int j; + char* mkbuf[multi]; + char* mvbuf[multi]; + for(j=0; j < multi; ++j) { + mkbuf[j] = rmkbuf[j]; + mvbuf[j] = rmvbuf[j]; + } +while(1) { + for(i=0; i < num-multi; ++i) { + for(j=0; j < multi; ++j) { + mklen[j] = sprintf(mkbuf[j], KEY_PREFIX "%d", i+j); + mvlen[j] = sprintf(mvbuf[j], VAL_PREFIX "%d", i+j); + } + memcached_return rc; + printf("get %s %s\n", mkbuf[0], mkbuf[multi-1]); + rc = memcached_mget(mc, mkbuf, mklen, multi); + if(rc != MEMCACHED_SUCCESS) { + fprintf(stderr, "mget failed %s\n", memcached_strerror(mc, rc)); + continue; + } + int n = 0; + while(1) { + size_t keylen = sizeof(kbuf); + size_t vallen; + uint32_t flags; + char* val = memcached_fetch(mc, kbuf, &keylen, &vallen, &flags, &rc); + if(val == NULL) { break; } + if(rc != MEMCACHED_SUCCESS) { + fprintf(stderr, "fetch failed %s\n", memcached_strerror(mc, rc)); + break; + } + int matched = 0; + for(j=0; j < multi; ++j) { + if(keylen == mklen[j] && memcmp(mkbuf[j], kbuf, keylen) == 0) { + if(vallen != mvlen[j] || memcmp(mvbuf[j], val, vallen) != 0) { + fprintf(stderr, "** key '%s' not match ** '", mkbuf[j]); + fwrite(val, vallen, 1, stderr); + fprintf(stderr, "'\n"); + } else { + printf("fetch '%s' = '", mkbuf[j]); + fwrite(val, vallen, 1, stdout); + printf("'\n"); + } + n += (j+1); + matched = 1; + break; + } + } + if(!matched) { + fprintf(stderr, "** unexpected key '"); + fwrite(kbuf, keylen, 1, stderr); + fprintf(stderr, "'\n"); + } + } + + int ok = 0; for(j=0; j < multi; ++j) { ok += (j+1); } + if(ok != n) { + fprintf(stderr, "** some keys are not found **\n"); + } else { + printf("mget ok\n"); + } + } +} + + return 0; +} + diff --git a/test/test_nowrite.c b/test/test_nowrite.c new file mode 100644 index 0000000..d2fe8ad --- /dev/null +++ b/test/test_nowrite.c @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmemcached/memcached.h" + +#define KEY_PREFIX "key" +#define VAL_PREFIX "val" + +void usage(void) +{ + printf("usage: ./test \n"); + exit(1); +} + +void pexit(const char* msg) +{ + perror(msg); + exit(1); +} + +int main(int argc, char* argv[]) +{ + if(argc < 4) { usage(); } + + char* host = argv[1]; + + unsigned short port = atoi(argv[2]); + if(port == 0) { usage(); } + + uint32_t num = atoi(argv[3]); + if(num == 0) { usage(); } + + memcached_st* mc = memcached_create(NULL); + if(mc == NULL) { pexit("memcached_create"); } + + memcached_server_add(mc, host, port); + +// memcached_behavior_set(mc, MEMCACHED_BEHAVIOR_BINARY_PROTOCOL, 1); + + char kbuf[strlen(KEY_PREFIX) + 11]; + char vbuf[strlen(VAL_PREFIX) + 11]; + + uint32_t i; + +while(1) { + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + int vlen = sprintf(vbuf, VAL_PREFIX "%d", i); + size_t vallen; + uint32_t flags; + memcached_return rc; + char* val = memcached_get(mc, kbuf, klen, &vallen, &flags, &rc); + if(!val) { + fprintf(stderr, "** key '%s' not found **\n", kbuf); + } else if(vallen != vlen || memcmp(val, vbuf, vlen) != 0) { + fprintf(stderr, "** key '%s' not match ** '", kbuf); + fwrite(val, vallen, 1, stderr); + fprintf(stderr, "'\n"); + } else { + printf("get '%s' = '", kbuf); + fwrite(val, vallen, 1, stdout); + printf("'\n"); + } + } +} + + return 0; +} + diff --git a/test/test_set.c b/test/test_set.c new file mode 100644 index 0000000..6a39615 --- /dev/null +++ b/test/test_set.c @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmemcached/memcached.h" + +#define KEY_PREFIX "key" +#define VAL_PREFIX "val" + +void usage(void) +{ + printf("usage: ./test \n"); + exit(1); +} + +void pexit(const char* msg) +{ + perror(msg); + exit(1); +} + +int main(int argc, char* argv[]) +{ + if(argc < 4) { usage(); } + + char* host = argv[1]; + + unsigned short port = atoi(argv[2]); + if(port == 0) { usage(); } + + uint32_t num = atoi(argv[3]); + if(num == 0) { usage(); } + + memcached_st* mc = memcached_create(NULL); + if(mc == NULL) { pexit("memcached_create"); } + + memcached_server_add(mc, host, port); + +// memcached_behavior_set(mc, MEMCACHED_BEHAVIOR_BINARY_PROTOCOL, 1); + + char kbuf[strlen(KEY_PREFIX) + 11]; + char vbuf[strlen(VAL_PREFIX) + 11]; + +while(1) { + uint32_t i; + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + int vlen = sprintf(vbuf, VAL_PREFIX "%d", i); + printf("set '%s' = '%s'\n", kbuf, vbuf); + memcached_set(mc, kbuf, klen, vbuf, vlen, 0, 0); + } +} + + return 0; +} + diff --git a/test/test_set_get_delete.c b/test/test_set_get_delete.c new file mode 100644 index 0000000..7e6d23b --- /dev/null +++ b/test/test_set_get_delete.c @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmemcached/memcached.h" + +#define KEY_PREFIX "key" +#define VAL_PREFIX "val" + +void usage(void) +{ + printf("usage: ./test \n"); + exit(1); +} + +void pexit(const char* msg) +{ + perror(msg); + exit(1); +} + +int main(int argc, char* argv[]) +{ + if(argc < 4) { usage(); } + + char* host = argv[1]; + + unsigned short port = atoi(argv[2]); + if(port == 0) { usage(); } + + uint32_t num = atoi(argv[3]); + if(num == 0) { usage(); } + + memcached_st* mc = memcached_create(NULL); + if(mc == NULL) { pexit("memcached_create"); } + + memcached_server_add(mc, host, port); + +// memcached_behavior_set(mc, MEMCACHED_BEHAVIOR_BINARY_PROTOCOL, 1); + + char kbuf[strlen(KEY_PREFIX) + 11]; + char vbuf[strlen(VAL_PREFIX) + 11]; + +while(1) { + uint32_t i; + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + int vlen = sprintf(vbuf, VAL_PREFIX "%d", i); + printf("set '%s' = '%s'\n", kbuf, vbuf); + memcached_set(mc, kbuf, klen, vbuf, vlen, 0, 0); + } + + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + int vlen = sprintf(vbuf, VAL_PREFIX "%d", i); + size_t vallen; + uint32_t flags; + memcached_return rc; + char* val = memcached_get(mc, kbuf, klen, &vallen, &flags, &rc); + if(!val) { + fprintf(stderr, "** key '%s' not found **\n", kbuf); + } else if(vallen != vlen || memcmp(val, vbuf, vlen) != 0) { + fprintf(stderr, "** key '%s' not match ** '", kbuf); + fwrite(val, vallen, 1, stderr); + fprintf(stderr, "'\n"); + } else { + printf("get '%s' = '", kbuf); + fwrite(val, vallen, 1, stdout); + printf("'\n"); + } + } + + for(i=0; i < num; ++i) { + int klen = sprintf(kbuf, KEY_PREFIX "%d", i); + printf("delete '%s'\n", kbuf); + memcached_delete(mc, kbuf, klen, 0); + } +} + + return 0; +} +