diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index 376b67ee4..58cad1de7 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -169,6 +169,15 @@ bool ReadImageToDatum(const string& filename, const int label, const int height, const int width, const bool is_color, const std::string & encoding, Datum* datum); +/* +** overload function ReadImageDatum +** the overloaded function aimed to solve the regression problem +** here is shown to be able to handle unlabeled data, such as float data +*/ +bool ReadImageToDatum(const string& filename, const vector labels, + const int height, const int width, const bool is_color, + const std::string & encoding, Datum* datum); + inline bool ReadImageToDatum(const string& filename, const int label, const int height, const int width, const bool is_color, Datum* datum) { return ReadImageToDatum(filename, label, height, width, is_color, diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index e889ca51f..cd5abb3d2 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -35,11 +35,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + #ifdef USE_OPENCV #include #endif // USE_OPENCV #include -#include + #include #include "caffe/data_transformer.hpp" @@ -64,15 +65,13 @@ void DataLayer::DataLayerSetUp(const vector*>& bottom, const vector*>& top) { const int batch_size = this->layer_param_.data_param().batch_size(); // Read a data point, and use it to initialize the top blob. - Datum datum; - datum.ParseFromString(*(reader_.full().peek())); + Datum& datum = *(reader_.full().peek()); // Use data_transformer to infer the expected blob shape from datum. vector top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape top[0] and prefetch_data according to the batch_size. top_shape[0] = batch_size; - top[0]->Reshape(top_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].data_.Reshape(top_shape); @@ -80,9 +79,14 @@ void DataLayer::DataLayerSetUp(const vector*>& bottom, LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); - // label + int labelNum = 4; if (this->output_labels_) { - vector label_shape(1, batch_size); + + vector label_shape; + label_shape.push_back(batch_size); + label_shape.push_back(labelNum); + label_shape.push_back(1); + label_shape.push_back(1); top[1]->Reshape(label_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(label_shape); @@ -98,23 +102,16 @@ void DataLayer::load_batch(Batch* batch) { double read_time = 0; double trans_time = 0; CPUTimer timer; - CPUTimer trans_timer; CHECK(batch->data_.count()); - -#ifndef _OPENMP CHECK(this->transformed_data_.count()); -#endif // Reshape according to the first datum of each batch // on single input batches allows for inputs of varying dimension. const int batch_size = this->layer_param_.data_param().batch_size(); - Datum datum; - datum.ParseFromString(*(reader_.full().peek())); + Datum& datum = *(reader_.full().peek()); // Use data_transformer to infer the expected blob shape from datum. vector top_shape = this->data_transformer_->InferBlobShape(datum); -#ifndef _OPENMP this->transformed_data_.Reshape(top_shape); -#endif // Reshape batch according to the batch_size. top_shape[0] = batch_size; batch->data_.Reshape(top_shape); @@ -125,52 +122,31 @@ void DataLayer::load_batch(Batch* batch) { if (this->output_labels_) { top_label = batch->label_.mutable_cpu_data(); } - - trans_timer.Start(); -#ifdef _OPENMP - #pragma omp parallel if (batch_size > 1) - #pragma omp single nowait -#endif for (int item_id = 0; item_id < batch_size; ++item_id) { timer.Start(); // get a datum - string* data = (reader_.full().pop("Waiting for data")); - timer.Stop(); + Datum& datum = *(reader_.full().pop("Waiting for data")); read_time += timer.MicroSeconds(); + timer.Start(); // Apply data transformations (mirror, scale, crop...) int offset = batch->data_.offset(item_id); + this->transformed_data_.set_cpu_data(top_data + offset); + this->data_transformer_->Transform(datum, &(this->transformed_data_)); -#ifdef _OPENMP - PreclcRandomNumbers precalculated_rand_numbers; - this->data_transformer_->GenerateRandNumbers(precalculated_rand_numbers); - #pragma omp task firstprivate(offset, precalculated_rand_numbers, data, item_id) -#endif - { - Datum datum; - datum.ParseFromString(*data); - (reader_.free()).push(data); - // Copy label. We need to copy it before we release datum - if (this->output_labels_) { - top_label[item_id] = datum.label(); + int labelNum = 4; + if (this->output_labels_) { + for(int i=0;i tmp_data; - tmp_data.Reshape(top_shape); - tmp_data.set_cpu_data(top_data + offset); - this->data_transformer_->Transform(datum, &tmp_data, - precalculated_rand_numbers); -#else - this->transformed_data_.set_cpu_data(top_data + offset); - this->data_transformer_->Transform(datum, &(this->transformed_data_)); -#endif } + + + trans_time += timer.MicroSeconds(); + + reader_.free().push(const_cast(&datum)); } - trans_timer.Stop(); + timer.Stop(); batch_timer.Stop(); - // Due to multithreaded nature of transformation, - // time it takes to execute them we get from subtracting - // read batch of images time from total batch read&transform time - trans_time = trans_timer.MicroSeconds() - read_time; DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index ba70dc744..ea9a8e69d 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -254,6 +254,25 @@ bool ReadImageToDatum(const string& filename, const int label, } } +/* +** here is the realization of reloaded function +*/ +bool ReadImageToDatum(const string& filename, const vector labels, + const int height, const int width, const bool is_color, + const std::string & encoding, Datum* datum) { + cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color); + if (cv_img.data) { + CVMatToDatum(cv_img, datum); + for (int i = 0; i < labels.size(); ++i) + { + datum->add_float_data(labels.at(i)); + } + return true; + } else { + return false; + } +} + void GetImageSize(const string& filename, int* height, int* width) { cv::Mat cv_img = cv::imread(filename); if (!cv_img.data) { diff --git a/tools/convert_imageset_regression.cpp b/tools/convert_imageset_regression.cpp new file mode 100644 index 000000000..e635cc94a --- /dev/null +++ b/tools/convert_imageset_regression.cpp @@ -0,0 +1,224 @@ +/* +All modification made by Intel Corporation: © 2016 Intel Corporation + +All contributions by the University of California: +Copyright (c) 2014, 2015, The Regents of the University of California (Regents) +All rights reserved. + +All other contributions: +Copyright (c) 2014, 2015, the respective contributors +All rights reserved. +For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md + + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// This program converts a set of images to a lmdb/leveldb by storing them +// as Datum proto buffers. +// Usage: +// convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME +// +// where ROOTFOLDER is the root folder that holds all the images, and LISTFILE +// should be a list of files as well as their labels, in the format as +// subfolder1/file1.JPEG 7 +// .... + +#include +#include // NOLINT(readability/streams) +#include +#include +#include + +#include "boost/scoped_ptr.hpp" +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "caffe/proto/caffe.pb.h" +#include "caffe/util/db.hpp" +#include "caffe/util/format.hpp" +#include "caffe/util/io.hpp" +#include "caffe/util/rng.hpp" + +#include //### To use tokenizer +#include //### + +using namespace caffe; // NOLINT(build/namespaces) +using std::pair; +using boost::scoped_ptr; + +using namespace std; //### + +DEFINE_bool(gray, false, + "When this option is on, treat images as grayscale ones"); +DEFINE_bool(shuffle, false, + "Randomly shuffle the order of images and their labels"); +DEFINE_string(backend, "lmdb", + "The backend {lmdb, leveldb} for storing the result"); +DEFINE_int32(resize_width, 0, "Width images are resized to"); +DEFINE_int32(resize_height, 0, "Height images are resized to"); +DEFINE_bool(check_size, false, + "When this option is on, check that all the datum have the same size"); +DEFINE_bool(encoded, false, + "When this option is on, the encoded image will be save in datum"); +DEFINE_string(encode_type, "", + "Optional: What type should we encode the image as ('png','jpg',...)."); + +int main(int argc, char** argv) { +#ifdef USE_OPENCV + ::google::InitGoogleLogging(argv[0]); + // Print output to stderr (while still logging) + FLAGS_alsologtostderr = 1; + +#ifndef GFLAGS_GFLAGS_H_ + namespace gflags = google; +#endif + + gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n" + "format used as input for Caffe.\n" + "Usage:\n" + " convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n" + "The ImageNet dataset for the training demo is at\n" + " http://www.image-net.org/download-images\n"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + + if (argc < 4) { + gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset"); + return 1; + } + + const bool is_color = !FLAGS_gray; + const bool check_size = FLAGS_check_size; + const bool encoded = FLAGS_encoded; + const string encode_type = FLAGS_encode_type; + + std::ifstream infile(argv[2]); + //std::vector > lines; //### + /* + ** change the label value from int to vector + ** the purpose of this change is to enable the regression problem + ** the integer value does not support multiple label or the float value + */ + std::vector > > lines; + std::string line; + //size_t pos; + //int label; //### + std::vector labels; + + while (std::getline(infile, line)) { + // pos = line.find_last_of(' '); + // label = atoi(line.substr(pos + 1).c_str()); + // lines.push_back(std::make_pair(line.substr(0, pos), label)); + //### + std::vector tokens; + boost::char_separator sep(" "); + boost::tokenizer > tok(line, sep); + tokens.clear(); + std::copy(tok.begin(), tok.end(), std::back_inserter(tokens)); + + for (int i = 1; i < tokens.size(); ++i) + { + labels.push_back(atof(tokens.at(i).c_str())); + } + + lines.push_back(std::make_pair(tokens.at(0), labels)); + //###To clear the vector labels + labels.clear(); + } + if (FLAGS_shuffle) { + // randomly shuffle data + LOG(INFO) << "Shuffling data"; + shuffle(lines.begin(), lines.end()); + } + LOG(INFO) << "A total of " << lines.size() << " images."; + + if (encode_type.size() && !encoded) + LOG(INFO) << "encode_type specified, assuming encoded=true."; + + int resize_height = std::max(0, FLAGS_resize_height); + int resize_width = std::max(0, FLAGS_resize_width); + + // Create new DB + scoped_ptr db(db::GetDB(FLAGS_backend)); + db->Open(argv[3], db::NEW); + scoped_ptr txn(db->NewTransaction()); + + // Storing to db + std::string root_folder(argv[1]); + Datum datum; + int count = 0; + int data_size = 0; + bool data_size_initialized = false; + + for (int line_id = 0; line_id < lines.size(); ++line_id) { + bool status; + std::string enc = encode_type; + if (encoded && !enc.size()) { + // Guess the encoding type from the file name + string fn = lines[line_id].first; + size_t p = fn.rfind('.'); + if ( p == fn.npos ) + LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'"; + enc = fn.substr(p); + std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower); + } + status = ReadImageToDatum(root_folder + lines[line_id].first, //### + lines[line_id].second, resize_height, resize_width, is_color, + enc, &datum); + if (status == false) continue; + if (check_size) { + if (!data_size_initialized) { + data_size = datum.channels() * datum.height() * datum.width(); + data_size_initialized = true; + } else { + const std::string& data = datum.data(); + CHECK_EQ(data.size(), data_size) << "Incorrect data field size " + << data.size(); + } + } + // sequential + string key_str = caffe::format_int(line_id, 8) + "_" + lines[line_id].first; + + // Put in db + string out; + CHECK(datum.SerializeToString(&out)); + txn->Put(key_str, out); + + if (++count % 1000 == 0) { + // Commit db + txn->Commit(); + txn.reset(db->NewTransaction()); + LOG(INFO) << "Processed " << count << " files."; + } + } + // write the last batch + if (count % 1000 != 0) { + txn->Commit(); + LOG(INFO) << "Processed " << count << " files."; + } +#else + LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV."; +#endif // USE_OPENCV + return 0; +}