diff --git a/CMakeLists.txt b/CMakeLists.txt index f2f497332..6d197e074 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -519,7 +519,7 @@ set(COMMON_LINK_DIRS ${DLIB_LIB_DIR}) if (USE_HDF5) set(COMMON_LINK_LIBS - ddetect ${CUDA_LIB_DEPS} glog gflags ${OpenCV_LIBS} curlpp curl hdf5_cpp ${Boost_LIBRARIES} + ddetect ${CUDA_LIB_DEPS} glog gflags ${OpenCV_LIBS} curlpp curl hdf5_cpp ${Boost_LIBRARIES} archive ${CAFFE_LIB_DEPS} ${CAFFE2_LIB_DEPS} ${TF_LIB_DEPS} diff --git a/README.md b/README.md index e0b7d38fe..1e6dc4446 100644 --- a/README.md +++ b/README.md @@ -242,7 +242,7 @@ Below are instructions for Ubuntu 14.04 LTS and 16.04 LTS. For other Linux and U Beware of dependencies, typically on Debian/Ubuntu Linux, do: ``` -sudo apt-get install build-essential libgoogle-glog-dev libgflags-dev libeigen3-dev libopencv-dev libcppnetlib-dev libboost-dev libboost-iostreams-dev libcurlpp-dev libcurl4-openssl-dev protobuf-compiler libopenblas-dev libhdf5-dev libprotobuf-dev libleveldb-dev libsnappy-dev liblmdb-dev libutfcpp-dev cmake libgoogle-perftools-dev unzip python-setuptools python-dev libspdlog-dev python-six python-enum34 +sudo apt-get install build-essential libgoogle-glog-dev libgflags-dev libeigen3-dev libopencv-dev libcppnetlib-dev libboost-dev libboost-iostreams-dev libcurlpp-dev libcurl4-openssl-dev protobuf-compiler libopenblas-dev libhdf5-dev libprotobuf-dev libleveldb-dev libsnappy-dev liblmdb-dev libutfcpp-dev cmake libgoogle-perftools-dev unzip python-setuptools python-dev libspdlog-dev python-six python-enum34 libarchive-dev ``` #### Default build with Caffe diff --git a/src/backends/caffe/caffemodel.cc b/src/backends/caffe/caffemodel.cc index 0d91b0f9c..f171db840 100644 --- a/src/backends/caffe/caffemodel.cc +++ b/src/backends/caffe/caffemodel.cc @@ -29,8 +29,34 @@ namespace dd { + CaffeModel::CaffeModel(const APIData &ad, APIData &adg, + const std::shared_ptr &logger) + :MLModel(ad,adg,logger) + { + if (ad.has("templates")) + this->_mlmodel_template_repo = ad.get("templates").get(); + else this->_mlmodel_template_repo += "caffe/"; // default + + if (ad.has("def")) + _def = ad.get("def").get(); + if (ad.has("trainf")) + _trainf = ad.get("trainf").get(); + if (ad.has("weights")) + _weights = ad.get("weights").get(); + if (ad.has("corresp")) + _corresp = ad.get("corresp").get(); + if (ad.has("solver")) + _solver = ad.get("solver").get(); + if (ad.has("repository")) + { + if (read_from_repository(ad.get("repository").get(),spdlog::get("api"))) + throw MLLibBadParamException("error reading or listing Caffe models in repository " + _repo); + } + read_corresp_file(); + } + CaffeModel::CaffeModel(const APIData &ad) - :MLModel(ad) + :MLModel(ad) { if (ad.has("templates")) this->_mlmodel_template_repo = ad.get("templates").get(); diff --git a/src/backends/caffe/caffemodel.h b/src/backends/caffe/caffemodel.h index ed1fa4479..34de8b3f7 100644 --- a/src/backends/caffe/caffemodel.h +++ b/src/backends/caffe/caffemodel.h @@ -34,6 +34,8 @@ namespace dd public: CaffeModel(): MLModel() {} CaffeModel(const APIData &ad); + CaffeModel(const APIData &ad, APIData &adg, + const std::shared_ptr &logger); CaffeModel(const APIData &ad, const std::string &repo) :MLModel(ad, repo) {} ~CaffeModel() {}; diff --git a/src/backends/caffe2/caffe2model.cc b/src/backends/caffe2/caffe2model.cc index 2c2af0f63..261edec86 100644 --- a/src/backends/caffe2/caffe2model.cc +++ b/src/backends/caffe2/caffe2model.cc @@ -25,8 +25,9 @@ namespace dd { - Caffe2Model::Caffe2Model(const APIData &ad) - :MLModel() + Caffe2Model::Caffe2Model(const APIData &ad, APIData &adg, + const std::shared_ptr &logger) + :MLModel(ad, adg, logger) { std::map names = { diff --git a/src/backends/caffe2/caffe2model.h b/src/backends/caffe2/caffe2model.h index cffb67383..997bd450e 100644 --- a/src/backends/caffe2/caffe2model.h +++ b/src/backends/caffe2/caffe2model.h @@ -33,7 +33,8 @@ namespace dd { class Caffe2Model : public MLModel { public: Caffe2Model():MLModel() {} - Caffe2Model(const APIData &ad); + Caffe2Model(const APIData &ad, APIData &adg, + const std::shared_ptr &logger); Caffe2Model(const std::string &repo) :MLModel(repo) {} ~Caffe2Model() {}; diff --git a/src/backends/dlib/dlibmodel.cc b/src/backends/dlib/dlibmodel.cc index f64272298..f0f542da2 100644 --- a/src/backends/dlib/dlibmodel.cc +++ b/src/backends/dlib/dlibmodel.cc @@ -24,7 +24,10 @@ #include namespace dd { - DlibModel::DlibModel(const APIData &ad) { + DlibModel::DlibModel(const APIData &ad, APIData &adg, + const std::shared_ptr &logger) + :MLModel(ad,adg,logger) + { if (ad.has("repository")) { read_from_repository(ad.get("repository").get(), spdlog::get("api")); // XXX: beware, error not caught diff --git a/src/backends/dlib/dlibmodel.h b/src/backends/dlib/dlibmodel.h index 881878a61..2469809ef 100644 --- a/src/backends/dlib/dlibmodel.h +++ b/src/backends/dlib/dlibmodel.h @@ -32,7 +32,8 @@ namespace dd { public: DlibModel() : MLModel() {} - DlibModel(const APIData &ad); + DlibModel(const APIData &ad, APIData &adg, + const std::shared_ptr &logger); DlibModel(const std::string &repo) : MLModel(repo) {} diff --git a/src/backends/tf/tfmodel.cc b/src/backends/tf/tfmodel.cc index d5b2d7e73..3ebc71864 100644 --- a/src/backends/tf/tfmodel.cc +++ b/src/backends/tf/tfmodel.cc @@ -24,7 +24,9 @@ #include namespace dd { - TFModel::TFModel(const APIData &ad) + TFModel::TFModel(const APIData &ad, APIDat &adg, + const std::shared_ptr &logger) + :MLModel(ad,adg,logger) { if (ad.has("repository")) { diff --git a/src/backends/tf/tfmodel.h b/src/backends/tf/tfmodel.h index 16db24292..45079a8a0 100644 --- a/src/backends/tf/tfmodel.h +++ b/src/backends/tf/tfmodel.h @@ -33,7 +33,8 @@ namespace dd { public: TFModel():MLModel() {} - TFModel(const APIData &ad); + TFModel(const APIData &ad, APIData &adg, + const std::shared_ptr &logger); TFModel(const std::string &repo) :MLModel(repo) {} ~TFModel() {} diff --git a/src/backends/tsne/tsnemodel.h b/src/backends/tsne/tsnemodel.h index 1943ea2d4..1622aabbb 100644 --- a/src/backends/tsne/tsnemodel.h +++ b/src/backends/tsne/tsnemodel.h @@ -33,8 +33,9 @@ namespace dd { public: TSNEModel():MLModel() {} - TSNEModel(const APIData &ad) - :MLModel() + TSNEModel(const APIData &ad,APIData &adg, + const std::shared_ptr &logger) + :MLModel(ad,adg,logger) { if (ad.has("repository")) this->_repo = ad.get("repository").get(); diff --git a/src/backends/xgb/xgbmodel.cc b/src/backends/xgb/xgbmodel.cc index 0390c8d6a..9edc7da59 100644 --- a/src/backends/xgb/xgbmodel.cc +++ b/src/backends/xgb/xgbmodel.cc @@ -26,8 +26,9 @@ namespace dd { - XGBModel::XGBModel(const APIData &ad) - :MLModel(ad) + XGBModel::XGBModel(const APIData &ad, APIData &adg, + const std::shared_ptr &logger) + :MLModel(ad,adg,logger) { if (ad.has("repository")) this->_repo = ad.get("repository").get(); diff --git a/src/backends/xgb/xgbmodel.h b/src/backends/xgb/xgbmodel.h index b2a3c7a04..a5ec3ee6d 100644 --- a/src/backends/xgb/xgbmodel.h +++ b/src/backends/xgb/xgbmodel.h @@ -34,7 +34,8 @@ namespace dd { public: XGBModel():MLModel() {} - XGBModel(const APIData &ad); + XGBModel(const APIData &ad, APIData &adg, + const std::shared_ptr &logger); XGBModel(const std::string &repo) :MLModel(repo) {} ~XGBModel() {} diff --git a/src/commandlineapi.cc b/src/commandlineapi.cc index 9f1e90159..05dba8cd4 100644 --- a/src/commandlineapi.cc +++ b/src/commandlineapi.cc @@ -58,6 +58,7 @@ namespace dd { APIData model_ad; model_ad.add("repository",FLAGS_model_repo); + APIData adg; CaffeModel cmodel(model_ad); add_service(FLAGS_service,std::move(MLService(FLAGS_service,cmodel))); } @@ -87,6 +88,7 @@ namespace dd { APIData model_ad; model_ad.add("repository",FLAGS_model_repo); + APIData adg; CaffeModel cmodel(model_ad); add_service(FLAGS_service,std::move(MLService(FLAGS_service,cmodel))); APIData ad, out; diff --git a/src/jsonapi.cc b/src/jsonapi.cc index c55d7cad0..6efa73c2b 100644 --- a/src/jsonapi.cc +++ b/src/jsonapi.cc @@ -409,7 +409,7 @@ namespace dd { if (mllib == "caffe") { - CaffeModel cmodel(ad_model); + CaffeModel cmodel(ad_model,ad,_logger); if (type == "supervised") { if (input == "image") @@ -453,7 +453,7 @@ namespace dd #ifdef USE_CAFFE2 else if (mllib == "caffe2") { - Caffe2Model c2model(ad_model); + Caffe2Model c2model(ad_model,ad,_logger); if (type == "supervised") { if (input == "image") @@ -489,7 +489,7 @@ namespace dd #ifdef USE_TF else if (mllib == "tensorflow" || mllib == "tf") { - TFModel tfmodel(ad_model); + TFModel tfmodel(ad_model,ad,_logger); if (type == "supervised") { if (input == "image") @@ -518,7 +518,7 @@ namespace dd #endif #ifdef USE_DLIB else if (mllib == "dlib") { - DlibModel dlibmodel(ad_model); + DlibModel dlibmodel(ad_model,ad,_logger); if (type == "supervised") { if (input == "image") { add_service(sname, std::move(MLService(sname, dlibmodel, description)), ad); @@ -539,7 +539,7 @@ namespace dd #ifdef USE_XGBOOST else if (mllib == "xgboost") { - XGBModel xmodel(ad_model); + XGBModel xmodel(ad_model,ad,_logger); if (input == "csv") add_service(sname,std::move(MLService(sname,xmodel,description)),ad); else if (input == "svm") @@ -557,7 +557,7 @@ namespace dd #ifdef USE_TSNE else if (mllib == "tsne") { - TSNEModel tmodel(ad_model); + TSNEModel tmodel(ad_model,ad,_logger); if (input == "csv") add_service(sname,std::move(MLService(sname,tmodel,description)),ad); else if (input == "txt") diff --git a/src/mlmodel.h b/src/mlmodel.h index d2fea787a..4d472e9c7 100644 --- a/src/mlmodel.h +++ b/src/mlmodel.h @@ -40,15 +40,22 @@ namespace dd public: MLModel() {} - MLModel(const APIData &ad) { + MLModel(const APIData &ad, APIData &adg, + const std::shared_ptr &logger) { init_repo_dir(ad); + read_config_json(adg,logger); } + MLModel(const APIData &ad) + { + init_repo_dir(ad); + } + MLModel(const std::string &repo) :_repo(repo) {} - MLModel(const APIData &ad, const std::string &repo) + MLModel(const APIData &ad, const std::string &repo) :_repo(repo) { init_repo_dir(ad); @@ -146,17 +153,55 @@ namespace dd private: void init_repo_dir(const APIData &ad) { - std::string repo = ad.get("repository").get(); + // auto-creation of model directory + _repo = ad.get("repository").get(); bool create = ad.has("create_repository") && ad.get("create_repository").get(); bool isDir; - bool exists = fileops::file_exists(repo, isDir); + bool exists = fileops::file_exists(_repo, isDir); if (exists && !isDir) throw MLLibBadParamException("file exists with same name as repository"); if (!exists && create) - fileops::create_dir(repo,0775); + fileops::create_dir(_repo,0775); #ifdef USE_SIMSEARCH _index_preload = ad.has("index_preload") && ad.get("index_preload").get(); #endif + // auto-install from model archive + if (ad.has("init")) + { + std::string compressedf = ad.get("init").get(); + if (fileops::uncompress(compressedf,_repo)) + throw MLLibBadParamException("failed installing model from archive, check 'init' argument to model"); + } + } + + void read_config_json(APIData &adg, + const std::shared_ptr &logger) + { + const std::string cf = _repo + "/config.json"; + if (!fileops::file_exists(cf)) + return; + std::ifstream is(cf); + std::stringstream jbuf; + jbuf << is.rdbuf(); + rapidjson::Document d; + d.Parse(jbuf.str().c_str()); + if (d.HasParseError()) + { + logger->error("config.json parsing error on string: {}",jbuf.str()); + throw MLLibBadParamException("Failed parsing config file " + cf); + } + APIData adcj; + try + { + adcj = APIData(d); + } + catch(RapidjsonException &e) + { + logger->error("JSON error {}",e.what()); + throw MLLibBadParamException("Failed converting JSON file to internal data format"); + } + APIData adcj_parameters = adcj.getobj("parameters"); + adg.add("parameters",adcj_parameters); } }; } diff --git a/src/utils/fileops.hpp b/src/utils/fileops.hpp index bb8ad6d19..b15e2aa07 100644 --- a/src/utils/fileops.hpp +++ b/src/utils/fileops.hpp @@ -28,6 +28,8 @@ #include #include #include +#include +#include namespace dd { @@ -223,9 +225,90 @@ namespace dd return 0; } + static int copy_uncompressed_data(struct archive *ar, struct archive *aw) + { + int r; + const void *buff; + size_t size; + int64_t offset; + + for (;;) { + r = archive_read_data_block(ar, &buff, &size, &offset); + if (r == ARCHIVE_EOF) + return (ARCHIVE_OK); + if (r < ARCHIVE_OK) + return (r); + r = archive_write_data_block(aw, buff, size, offset); + if (r < ARCHIVE_OK) { + std::cerr << "archive error: " << archive_error_string(aw) << std::endl; + return r; + } + } + } + + static int uncompress(const std::string &fc, + const std::string &repo) + { + struct archive *a; + struct archive *ext; + struct archive_entry *entry; + int flags; + int r; + + flags = ARCHIVE_EXTRACT_TIME; + //flags |= ARCHIVE_EXTRACT_PERM; + //flags |= ARCHIVE_EXTRACT_ACL; + flags |= ARCHIVE_EXTRACT_FFLAGS; + + a = archive_read_new(); + archive_read_support_format_all(a); + archive_read_support_filter_all(a); + ext = archive_write_disk_new(); + archive_write_disk_set_options(ext, flags); + archive_write_disk_set_standard_lookup(ext); + if ((r = archive_read_open_filename(a, fc.c_str(), 10240))) // 10240 is block_size + return r; + for (;;) { + r = archive_read_next_header(a, &entry); + if (r == ARCHIVE_EOF) + break; + if (r < ARCHIVE_OK) + std::cerr << "archive error: " << archive_error_string(a) << std::endl; + if (r < ARCHIVE_WARN) { + std::cerr << "archive error, aborting\n"; + return 1; + } + const char * compressed_head = archive_entry_pathname(entry); + const std::string full_outpath = repo + "/" + compressed_head; + archive_entry_set_pathname(entry,full_outpath.c_str()); + r = archive_write_header(ext, entry); + if (r < ARCHIVE_OK) + std::cerr << "archive error: " << archive_error_string(ext) << std::endl; + else if (archive_entry_size(entry) > 0) { + r = copy_uncompressed_data(a, ext); + if (r < ARCHIVE_OK) + std::cerr << "error writing uncompressed data: " << archive_error_string(ext) << std::endl; + if (r < ARCHIVE_WARN) { + std::cerr << "archive error, aborting\n"; + return 2; + } + } + r = archive_write_finish_entry(ext); + if (r < ARCHIVE_OK) + std::cerr << "error finishing uncompressed data entry: " << archive_error_string(ext) << std::endl; + if (r < ARCHIVE_WARN) { + std::cerr << "archive error, aborting\n"; + return 3; + } + } + archive_read_close(a); + archive_read_free(a); + archive_write_close(ext); + archive_write_free(ext); - }; - + return 0; + } + }; } #endif