Skip to content

Commit

Permalink
Merge branch '1.0' of gh:idiap/bob into 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Manuel Guenther committed Jul 6, 2012
2 parents a863014 + 13ebfe1 commit bf04b31
Show file tree
Hide file tree
Showing 5 changed files with 406 additions and 28 deletions.
2 changes: 2 additions & 0 deletions cxx/io/CMakeLists.txt
Expand Up @@ -33,6 +33,8 @@ set(src

"src/ImageFile.cc"

"src/CSVFile.cc"

"src/BinFileHeader.cc"
"src/BinFile.cc"
"src/BinaryArrayFile.cc"
Expand Down
334 changes: 334 additions & 0 deletions cxx/io/src/CSVFile.cc
@@ -0,0 +1,334 @@
/**
* @file cxx/io/src/CSVFile.cc
* @date Thu 10 May 2012 15:19:24 CEST
* @author Andre Anjos <andre.anjos@idiap.ch>
*
* @brief Code to read and write CSV files to/from arrays. CSV files are always
* treated as containing sequences of double precision numbers.
*
* Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

#include <sstream>
#include <fstream>
#include <string>
#include <boost/filesystem.hpp>
#include <boost/format.hpp>
#include <boost/filesystem.hpp>
#include <boost/make_shared.hpp>
#include <boost/tokenizer.hpp>

#include <boost/shared_array.hpp>
#include <boost/algorithm/string.hpp>

#include "io/CodecRegistry.h"
#include "io/Exception.h"

namespace fs = boost::filesystem;
namespace io = bob::io;
namespace ca = bob::core::array;

typedef boost::tokenizer<boost::escaped_list_separator<char> > Tokenizer;

class CSVFile: public io::File {

public: //api

/**
* Peeks the file contents for a type. We assume the element type to be
* always doubles. This method, effectively, only peaks for the total
* number of lines and the number of columns in the file.
*/
void peek() {

std::string line;
size_t line_number = 0;
size_t entries = 0;
std::streampos cur_pos = 0;

m_file.seekg(0); //< returns to the begin of file and start reading...

while (std::getline(m_file,line)) {
++line_number;
m_pos.push_back(cur_pos);
cur_pos = m_file.tellg();
Tokenizer tok(line);
size_t size = std::distance(tok.begin(), tok.end());
if (!entries) entries = size;
else if (entries != size) {
boost::format m("line %d at file '%s' contains %d entries instead of %d (expected)");
m % line_number % m_filename % size % entries;
throw std::runtime_error(m.str().c_str());
}
}

if (!line_number) {
m_newfile = true;
m_pos.clear();
return;
}

m_arrayset_type.dtype = bob::core::array::t_float64;
m_arrayset_type.nd = 1;
m_arrayset_type.shape[0] = entries;
m_arrayset_type.update_strides();

m_array_type = m_arrayset_type;
m_array_type.nd = 2;
m_array_type.shape[0] = m_pos.size();
m_array_type.shape[1] = entries;
m_array_type.update_strides();
}

CSVFile(const std::string& path, char mode):
m_filename(path),
m_newfile(false) {

if (mode == 'r' || (mode == 'a' && fs::exists(path))) { //try peeking

if (mode == 'r')
m_file.open(m_filename.c_str(), std::ios::in);
else if (mode == 'a')
m_file.open(m_filename.c_str(), std::ios::app|std::ios::in|std::ios::out);
if (!m_file.is_open()) {
boost::format m("cannot open file '%s' for reading or appending");
m % path;
throw std::runtime_error(m.str().c_str());
}

peek(); ///< peek file properties
}
else {
m_file.open(m_filename.c_str(), std::ios::trunc|std::ios::in|std::ios::out);

if (!m_file.is_open()) {
boost::format m("cannot open file '%s' for writing");
m % path;
throw std::runtime_error(m.str().c_str());
}

m_newfile = true;
}

//general precision settings, in case output is needed...
m_file.precision(10);
m_file.setf(std::ios_base::scientific, std::ios_base::floatfield);

}

virtual ~CSVFile() { }

virtual const std::string& filename() const {
return m_filename;
}

virtual const ca::typeinfo& array_type() const {
return m_array_type;
}

virtual const ca::typeinfo& arrayset_type() const {
return m_arrayset_type;
}

virtual size_t arrayset_size() const {
return m_pos.size();
}

virtual const std::string& name() const {
return s_codecname;
}

virtual void array_read(ca::interface& buffer) {
if (m_newfile)
throw std::runtime_error("uninitialized CSV file cannot be read");

if (!buffer.type().is_compatible(m_array_type)) buffer.set(m_array_type);

//read contents
std::string line;
if (m_file.eof()) m_file.clear(); ///< clear current "end" state.
m_file.seekg(0);
double* p = static_cast<double*>(buffer.ptr());
while (std::getline(m_file, line)) {
Tokenizer tok(line);
for(Tokenizer::iterator k=tok.begin(); k!=tok.end(); ++k) {
std::istringstream(*k) >> *(p++);
}
}
}

virtual void arrayset_read(ca::interface& buffer, size_t index) {

if (m_newfile)
throw std::runtime_error("uninitialized CSV file cannot be read");

if (!buffer.type().is_compatible(m_arrayset_type))
buffer.set(m_arrayset_type);

if (index >= m_pos.size()) {
boost::format m("cannot array at position %d -- there is only %d entries at file '%s'");
m % index % m_pos.size() % m_filename;
throw std::runtime_error(m.str().c_str());
}

//reads a specific line from the file.
std::string line;
if (m_file.eof()) m_file.clear(); ///< clear current "end" state.
m_file.seekg(m_pos[index]);
if (!std::getline(m_file, line)) {
boost::format m("could not seek to line %u (offset %u) while reading file '%s'");
m % index % m_pos[index] % m_filename;
throw std::runtime_error(m.str().c_str());
}
Tokenizer tok(line);
double* p = static_cast<double*>(buffer.ptr());
for(Tokenizer::iterator k=tok.begin(); k!=tok.end(); ++k) {
std::istringstream(*k) >> *(p++);
}

}

virtual size_t arrayset_append (const ca::interface& buffer) {

const ca::typeinfo& type = buffer.type();

if (m_newfile) {
if (type.nd != 1 || type.dtype != bob::core::array::t_float64) {
boost::format m("cannot append %s to file '%s' - CSV files only accept 1D double precision float arrays");
m % type.str() % m_filename;
throw std::runtime_error(m.str().c_str());
}
m_pos.clear();
m_arrayset_type = m_array_type = type;
m_array_type.shape[1] = m_arrayset_type.shape[0];
m_newfile = false;
}

else {

//check compatibility
if (!m_arrayset_type.is_compatible(buffer.type())) {
boost::format m("CSV file '%s' only accepts arrays of type %s");
m % m_filename % m_arrayset_type.str();
throw std::runtime_error(m.str().c_str());
}

}

const double* p = static_cast<const double*>(buffer.ptr());
if (m_pos.size()) m_file << std::endl; ///< adds a new line
m_pos.push_back(m_file.tellp()); ///< register start of line
for (size_t k=1; k<type.shape[0]; ++k) m_file << *(p++) << ",";
m_file << *(p++);
m_array_type.shape[0] = m_pos.size();
m_array_type.update_strides();
return (m_pos.size()-1);

}

virtual void array_write (const ca::interface& buffer) {

const ca::typeinfo& type = buffer.type();

if (m_newfile) {
if (type.nd != 2 || type.dtype != bob::core::array::t_float64) {
boost::format m("cannot write %s to file '%s' - CSV files only accept a single 2D double precision float array as input");
m % type.str() % m_filename;
throw std::runtime_error(m.str().c_str());
}
const double* p = static_cast<const double*>(buffer.ptr());
for (size_t l=1; l<type.shape[0]; ++l) {
m_pos.push_back(m_file.tellp());
for (size_t k=1; k<type.shape[1]; ++k) m_file << *(p++) << ",";
m_file << *(p++) << std::endl;
}
for (size_t k=1; k<type.shape[1]; ++k) m_file << *(p++) << ",";
m_file << *(p++);
m_arrayset_type = type;
m_arrayset_type.nd = 1;
m_arrayset_type.shape[0] = type.shape[1];
m_arrayset_type.update_strides();
m_array_type = type;
m_newfile = false;
return;
}

//TODO
throw std::runtime_error("Writing a 2D array to a CSV file that already contains entries is not implemented at the moment");

}

private: //representation
std::fstream m_file;
std::string m_filename;
bool m_newfile;
ca::typeinfo m_array_type;
ca::typeinfo m_arrayset_type;
std::vector<std::streampos> m_pos; ///< dictionary of line starts

static std::string s_codecname;

};

std::string CSVFile::s_codecname = "bob.csv";

/**
* From this point onwards we have the registration procedure. If you are
* looking at this file for a coding example, just follow the procedure bellow,
* minus local modifications you may need to apply.
*/

/**
* This defines the factory method F that can create codecs of this type.
*
* Here are the meanings of the mode flag that should be respected by your
* factory implementation:
*
* 'r': opens for reading only - no modifications can occur; it is an
* error to open a file that does not exist for read-only operations.
* 'w': opens for reading and writing, but truncates the file if it
* exists; it is not an error to open files that do not exist with
* this flag.
* 'a': opens for reading and writing - any type of modification can
* occur. If the file does not exist, this flag is effectively like
* 'w'.
*
* Returns a newly allocated File object that can read and write data to the
* file using a specific backend.
*
* @note: This method can be static.
*/
static boost::shared_ptr<io::File>
make_file (const std::string& path, char mode) {
return boost::make_shared<CSVFile>(path, mode);

}

/**
* Takes care of codec registration per se.
*/
static bool register_codec() {

boost::shared_ptr<io::CodecRegistry> instance =
io::CodecRegistry::instance();

instance->registerExtension(".csv", "Comma-Separated Values", &make_file);
instance->registerExtension(".txt", "Comma-Separated Values", &make_file);

return true;

}

static bool codec_registered = register_codec();
2 changes: 1 addition & 1 deletion doc/Dependencies.rst
Expand Up @@ -298,7 +298,7 @@ shell prompt:

.. code-block:: sh
$ sudo port install cmake blitz ffmpeg python26 python_select gcc44 gcc_select py26-numpy -atlas matio imagemagick py26-ipython py26-matplotlib google-perftools doxygen py26-sphinx texlive-latex-extra texlive-fonts-recommended hdf5-18 py26-argparse qt4-mac boost +python26 python26-scipy +no_atlas fftw-3 vlfeat opencv +python26 +qt4 libsvm +python26 +tools dvipng
$ sudo port install cmake blitz ffmpeg python26 python_select gcc44 gcc_select py26-numpy -atlas matio imagemagick py26-ipython py26-matplotlib google-perftools doxygen py26-sphinx texlive-latex-extra texlive-fonts-recommended hdf5-18 py26-argparse qt4-mac boost +python26 py26-scipy +no_atlas fftw-3 vlfeat opencv +python26 +qt4 libsvm +python26 +tools dvipng py26-sqlalchemy
$ # go for a long coffee
After the installation has finished, make sure you select python 2.6 (macports)
Expand Down

0 comments on commit bf04b31

Please sign in to comment.