Skip to content

Commit

Permalink
ASCII PLY IO routines: support arbitrary metadata.
Browse files Browse the repository at this point in the history
Metadata elements are encoded and decoded for ASCII PLY IO routines. Data is written as a PLY comment and is
automatically base64 encoded if the content is expected to interfere with the PLY structure (unprintable and special
printable characters). Base64 decoding is automatic. Note that, whenever possible, metadata is left raw to maximize
interoperability and editability/injection.
  • Loading branch information
Haley Clark committed Dec 9, 2020
1 parent d675ef0 commit 0e81c4c
Show file tree
Hide file tree
Showing 7 changed files with 207 additions and 83 deletions.
24 changes: 16 additions & 8 deletions src/YgorMath.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5989,7 +5989,6 @@ fv_surface_mesh<T,I>::remove_disconnected_vertices(){
new_faces.reserve(this->faces.size());

std::map<I,I> old_to_new_vert;
const auto end = old_to_new_vert.end();

for(const auto& f : this->faces){
std::vector<I> new_face;
Expand Down Expand Up @@ -10857,18 +10856,26 @@ samples_1D<T>::Write_To_Stream(std::ostream &SO) const {
const auto defaultprecision = SO.precision();
SO.precision(std::numeric_limits<T>::max_digits10 );

// Used to determine when text must be base64 encoded.
const auto needs_to_be_escaped = [](const std::string &in) -> bool {
for(const auto &x : in){
// Permit words/sentences but not characters that could potentially affect file interpretation.
// Note that whitespace is significant and will not be altered.
if( !std::isprint(x)
|| (x == static_cast<unsigned char>('=')) ) return true;
}
return false;
};

for(const auto &mp : this->metadata){
// Note: Syntax should be:
// | # metadata: key = value
// | # base64 metadata: encoded_key = encoded_value

bool is_printable = true;
for(const auto &x : mp.first) if(!std::isprint(x)) is_printable = false;
for(const auto &x : mp.second) if(!std::isprint(x)) is_printable = false;

const auto key = mp.first;
const auto value = mp.second;
if(is_printable){
const auto should_escape = (needs_to_be_escaped(key) || needs_to_be_escaped(value));
if(should_escape){
SO << "# metadata: " << key << " = " << value << std::endl;
}else{
const auto encoded_key = Base64::EncodeFromString(key);
Expand Down Expand Up @@ -10946,7 +10953,7 @@ samples_1D<T>::Read_From_Stream(std::istream &SI){
// | # base64 metadata: encoded_key = encoded_value
const auto p_assign = line.find(" = ");
const auto p_metadata = line.find("metadata: ");
const auto p_base64 = line.find("base64 ");
const auto p_base64 = line.find("base64 metadata:");
if(p_assign == std::string::npos) continue; // A comment.
if(p_metadata == std::string::npos) continue; // A comment.

Expand All @@ -10956,7 +10963,8 @@ samples_1D<T>::Read_From_Stream(std::istream &SI){
const auto key = line.substr(key_offset, (p_assign - key_offset));

// Decode using base64, if necessary.
if(p_base64 == std::string::npos){
if( (p_base64 == std::string::npos)
|| (p_metadata < p_base64) ){ // If the base64 term appears in the metadata itself.
indata.metadata[key] = value;
}else{
const auto decoded_key = Base64::DecodeToString(key);
Expand Down
119 changes: 70 additions & 49 deletions src/YgorMathIOPLY.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,45 +11,11 @@
#include "YgorMisc.h"
#include "YgorMath.h"
#include "YgorString.h"
#include "YgorBase64.h" //Used for metadata serialization.

#include "YgorMathIOPLY.h"


//This enum is used by the user to signal whether they want little- or big-endianness when the IO format
// can handle either (e.g., writing raw pixels, FITS files).
enum YgorMathIOPLYEndianness {
Little, // i.e., least significant byte at lowest memory address.
Big, // i.e., most significant byte at lowest memory address.
Default // User unspecified: use the default or try to detect.
};

static inline
YgorMathIOPLYEndianness
Detect_Machine_Endianness(void){

//Check if we are on a big-endian (i.e., "MSB") or little-endian ("LSB") machine. We do this by
// probing where a single bit resides in memory.
//
// NOTE: If endianness is not little or big, this routine throws! Feel free to add additional
// endian types if needed.
//
volatile uint64_t EndianScape = static_cast<uint64_t>(1); //Anything larger than 1 byte will suffice.
volatile uint8_t *EndianCheck = reinterpret_cast<volatile uint8_t *>(&EndianScape);

const bool UsingLittleEndian = (EndianCheck[0] == static_cast<uint8_t>(1)); // "LSB".
const bool UsingBigEndian = (EndianCheck[sizeof(uint64_t)-1] == static_cast<uint8_t>(1)); // "MSB".

if(UsingLittleEndian){
return YgorMathIOPLYEndianness::Little;
}else if(UsingBigEndian){
return YgorMathIOPLYEndianness::Big;
}

throw std::runtime_error("Cannot determine machine's endianness!");
return YgorMathIOPLYEndianness::Default; //(You should never get here.)
}


// This routine reads an fv_surface_mesh from an ASCII PLY format stream.
//
// Note that this routine does not validate or enforce manifoldness.
Expand Down Expand Up @@ -116,9 +82,9 @@ ReadFVSMeshFromASCIIPLY(fv_surface_mesh<T,I> &fvsm,
++lineN;
if(line.empty()) continue;

auto split = SplitStringToVector(line, "comment", 'd'); // Remove any comments on any lines.
if(split.size() > 1) split.resize(1);
split = SplitVector(split, ' ', 'd');
//auto split = SplitStringToVector(line, "comment", 'd'); // Remove any comments on any lines.
//if(split.size() > 1) split.resize(1);
auto split = SplitStringToVector(line, ' ', 'd');
//split = SplitVector(split, '\t', 'd');
//split = SplitVector(split, ',', 'd');
split.erase( std::remove_if(std::begin(split),
Expand All @@ -130,8 +96,34 @@ ReadFVSMeshFromASCIIPLY(fv_surface_mesh<T,I> &fvsm,

if(split.empty()) continue; // Skip all empty lines.

// Read the magic number.
// Handle metadata comments anywhere in the header.
if(false){
}else if( (1 <= split.size()) && (split.at(0) == "comment"_s)){
// Note: Syntax should be:
// | # metadata: key = value
// | # base64 metadata: encoded_key = encoded_value
const auto p_assign = line.find(" = ");
const auto p_metadata = line.find("metadata: ");
const auto p_base64 = line.find("base64 metadata: ");
if( (p_assign == std::string::npos)
|| (p_metadata == std::string::npos) ) continue; // Is a non-metadata comment.

// Determine the boundaries of the key and value.
const auto value = line.substr(p_assign + 3);
const auto key_offset = p_metadata + 10;
const auto key = line.substr(key_offset, (p_assign - key_offset));

// Decode using base64, if necessary.
if( (p_base64 == std::string::npos)
|| (p_metadata < p_base64) ){ // If the base64 keyword appears in the metadata itself.
fvsm.metadata[key] = value;
}else{
const auto decoded_key = Base64::DecodeToString(key);
const auto decoded_value = Base64::DecodeToString(value);
fvsm.metadata[decoded_key] = decoded_value;
}

// Read the magic number.
}else if( (parse_stage == 0) && (split.size() == 1) && (split.at(0) == "ply"_s)){
++parse_stage;

Expand Down Expand Up @@ -227,8 +219,9 @@ ReadFVSMeshFromASCIIPLY(fv_surface_mesh<T,I> &fvsm,
long int index_x = -1;
long int index_y = -1;
long int index_z = -1;
const auto N_props = static_cast<long int>(element.properties.size());

for(long int i = 0; i < element.properties.size(); ++i){
for(long int i = 0; i < N_props; ++i){
if(false){
}else if( (index_x == -1)
&& !element.properties[i].is_list
Expand Down Expand Up @@ -257,7 +250,7 @@ ReadFVSMeshFromASCIIPLY(fv_surface_mesh<T,I> &fvsm,
fvsm.vertices.reserve(element.count);
vec3<T> shtl( static_cast<T>(0), static_cast<T>(0), static_cast<T>(0) );;
for(long int n = 0; n < element.count; ++n){
for(long int i = 0; i < element.properties.size(); ++i){
for(long int i = 0; i < N_props; ++i){
if(false){
}else if(i == index_x){
shtl.x = get_another_T();
Expand All @@ -280,7 +273,8 @@ ReadFVSMeshFromASCIIPLY(fv_surface_mesh<T,I> &fvsm,
|| (element.name == "facet")
|| (element.name == "facets") ){
long int index_vs = -1;
for(long int i = 0; i < element.properties.size(); ++i){
const auto N_props = static_cast<long int>(element.properties.size());
for(long int i = 0; i < N_props; ++i){

if(false){
}else if( (index_vs == -1)
Expand All @@ -300,7 +294,7 @@ ReadFVSMeshFromASCIIPLY(fv_surface_mesh<T,I> &fvsm,
// Read in all properties, disregarding those other than the face's connected vertices list.
fvsm.vertices.reserve(element.count);
for(long int n = 0; n < element.count; ++n){
for(long int i = 0; i < element.properties.size(); ++i){
for(long int i = 0; i < N_props; ++i){
if(false){
}else if(i == index_vs){
const auto l = get_list_T(); // Should already be zero-indexed.
Expand All @@ -323,7 +317,8 @@ ReadFVSMeshFromASCIIPLY(fv_surface_mesh<T,I> &fvsm,
// improve interoperability, even if it makes it harder to verify file contents were parsed correctly.
}else{
for(long int n = 0; n < element.count; ++n){
for(long int i = 0; i < element.properties.size(); ++i){
const auto N_props = static_cast<long int>(element.properties.size());
for(long int i = 0; i < N_props; ++i){
if(false){
}else if(element.properties[i].is_list){
get_list_T();
Expand Down Expand Up @@ -391,12 +386,38 @@ WriteFVSMeshToASCIIPLY(const fv_surface_mesh<T,I> &fvsm,
return false;
}

// Used to determine when text must be base64 encoded.
const auto needs_to_be_escaped = [](const std::string &in) -> bool {
for(const auto &x : in){
// Permit words/sentences but not characters that could potentially affect file interpretation.
// Note that whitespace is significant and will not be altered.
if( !std::isprint(x)
|| (x == static_cast<unsigned char>('=')) ) return true;
}
return false;
};

os << "ply" << std::endl
<< "format ascii 1.0" << std::endl
//<< "comment ... encode metadata here ..." << std::endl
//<< "comment ... encode metadata here ..." << std::endl
//<< "comment ... encode metadata here ..." << std::endl
<< "element vertex " << fvsm.vertices.size() << std::endl
<< "format ascii 1.0" << std::endl;

// Emit metadata.
{
for(const auto &mp : fvsm.metadata){
const auto key = mp.first;
const auto value = mp.second;
const bool must_encode = needs_to_be_escaped(key) || needs_to_be_escaped(value);
if(must_encode){
const auto encoded_key = Base64::EncodeFromString(key);
const auto encoded_value = Base64::EncodeFromString(value);
os << "comment base64 metadata: " << encoded_key << " = " << encoded_value << std::endl;
}else{
// If encoding is not needed then don't. It will make the data more accessible.
os << "comment metadata: " << key << " = " << value << std::endl;
}
}
}

os << "element vertex " << fvsm.vertices.size() << std::endl
<< "property float x" << std::endl
<< "property float y" << std::endl
<< "property float z" << std::endl
Expand Down
10 changes: 8 additions & 2 deletions src/YgorMathIOPLY.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

// This routine reads an fv_surface_mesh from an ASCII PLY format stream.
//
// Note that this routine does not validate or enforce manifoldness.
// Note that reading arbitrary metadata is written as a comment and will be base64-encoded if necessary.
//
// Note that this routine does not validate or enforce manifoldness. Point clouds are valid.
//
// Note that a subset of PLY features are supported. In particular, custom/referenced materials are not supported.
template <class T, class I>
Expand All @@ -26,7 +28,11 @@ ReadFVSMeshFromASCIIPLY(fv_surface_mesh<T,I> &fvsm,

// This routine writes an fv_surface_mesh to an ASCII PLY format stream.
//
// Note that metadata can not be written.
// Note that reading arbitrary metadata is written as a comment and will be base64-encoded if necessary.
//
// Note that this routine does not validate or enforce manifoldness. Point clouds are valid.
//
// Note that only basic PLY features are used (vertex and vertex_index elements).
template <class T, class I>
bool
WriteFVSMeshToASCIIPLY(const fv_surface_mesh<T,I> &fvsm,
Expand Down
1 change: 1 addition & 0 deletions src/YgorMathIOSVG.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ WriteCCToSVG(const contour_collection<T> &cc,
for(const auto &x : in){
// Permit words/sentences but not characters that could potentially affect file interpretation.
if( !std::isprint(x)
|| (x == static_cast<unsigned char>('\''))
|| (x == static_cast<unsigned char>('<'))
|| (x == static_cast<unsigned char>('>'))
|| (x == static_cast<unsigned char>('&')) ) return true;
Expand Down
18 changes: 18 additions & 0 deletions tests2/YgorMath/fv_surface_mesh.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,24 @@ TEST_CASE( "fv_surface_mesh member functions" ){
mesh1.vertices = {{ p1, p3, p4 }};
mesh1.faces = {{ static_cast<uint32_t>(0), static_cast<uint32_t>(1), static_cast<uint32_t>(2) }};

SUBCASE("operator=="){
fv_surface_mesh<double, uint32_t> mesh2;
mesh2.vertices = {{ p1, p3, p4 }};
mesh2.faces = {{ static_cast<uint32_t>(0), static_cast<uint32_t>(1), static_cast<uint32_t>(2) }};
REQUIRE( mesh1 == mesh2 );

// Member 'involved_faces' should not impact equality since it is a derived structure that may be in an
// indeterminate state (i.e., it can be generated on-demand whenever needed, and may not have been generated
// recently).
mesh1.involved_faces.clear();
mesh2.recreate_involved_face_index();
REQUIRE( mesh1 == mesh2 );
mesh2.involved_faces.clear();

// Metadata *is* significant for equality.
mesh2.metadata["new key"] = "new value";
REQUIRE( mesh1 != mesh2 );
}

SUBCASE("surface_area"){
REQUIRE( mesh1.surface_area() == 0.5 );
Expand Down
Loading

0 comments on commit 0e81c4c

Please sign in to comment.