Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support stream wrappers in XML parser extensions, add external entity loader #3249

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions hphp/doc/inconsistencies
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,8 @@ conversion inside the condition of an if statement or similar.

(7) All fatals prevent further PHP code from executing, including __destruct
methods. N.B.: exit() is a fatal.

(8) Loading of external entities in the libxml extension is disabled by default
for security reasons. It can be re-enabled on a per-protocol basis (file, http,
compress.zlib, etc...) with a comma-separated list in the ini setting
hhvm.libxml.ext_entity_whitelist.
23 changes: 12 additions & 11 deletions hphp/runtime/base/stream-wrapper-registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,25 +182,20 @@ Wrapper* getWrapper(const String& scheme, bool warn /*= false */) {
return nullptr;
}

Wrapper* getWrapperFromURI(const String& uri,
int* pathIndex /* = NULL */,
bool warn /*= true */) {
const char *uri_string = uri.data();

String getWrapperProtocol(const char* uri_string, int* pathIndex) {
/* Special case for PHP4 Backward Compatability */
if (!strncasecmp(uri_string, "zlib:", sizeof("zlib:") - 1)) {
return getWrapper(s_compress_zlib, warn);
return s_compress_zlib;
}

// data wrapper can come with or without a double forward slash
if (!strncasecmp(uri_string, "data:", sizeof("data:") - 1)) {
return getWrapper(s_data, warn);
return s_data;
}

int n = 0;
const char* p = uri_string;
while (*p && (isalnum((unsigned char)*p) ||
*p == '+' || *p == '-' || *p == '.')) {
while (*p && (isalnum(*p) || *p == '+' || *p == '-' || *p == '.')) {
n++;
p++;
}
Expand All @@ -210,12 +205,18 @@ Wrapper* getWrapperFromURI(const String& uri,
}

if (!colon) {
return getWrapper(s_file, warn);
return s_file;
}

int len = colon - uri_string;
if (pathIndex != nullptr) *pathIndex = len + sizeof("://") - 1;
return getWrapper(String(uri_string, len, CopyString), warn);
return String(uri_string, len, CopyString);
}

Wrapper* getWrapperFromURI(const String& uri,
int* pathIndex /* = NULL */,
bool warn /*= true */) {
return getWrapper(getWrapperProtocol(uri.data(), pathIndex), warn);
}

static FileStreamWrapper s_file_stream_wrapper;
Expand Down
2 changes: 2 additions & 0 deletions hphp/runtime/base/stream-wrapper-registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ bool disableWrapper(const String& scheme);
bool restoreWrapper(const String& scheme);
bool registerRequestWrapper(const String& scheme, std::unique_ptr<Wrapper> wrapper);
Array enumWrappers();

String getWrapperProtocol(const char* url, int* pathIndex = nullptr);
Wrapper* getWrapper(const String& scheme, bool warn = true);
Wrapper* getWrapperFromURI(const String& uri,
int* pathIndex = nullptr, bool warn = true);
Expand Down
82 changes: 38 additions & 44 deletions hphp/runtime/ext/ext_domdocument.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ static xmlNsPtr dom_get_ns(xmlNodePtr nodep, const char *uri, int *errorcode,
}

static xmlDocPtr dom_document_parser(c_DOMDocument * domdoc, int mode,
char *source, int source_len,
const String& source,
int options) {
xmlDocPtr ret = NULL;
xmlParserCtxtPtr ctxt = NULL;
Expand All @@ -651,27 +651,37 @@ static xmlDocPtr dom_document_parser(c_DOMDocument * domdoc, int mode,
if (mode == DOM_LOAD_FILE) {
String file_dest = libxml_get_valid_file_path(source);
if (!file_dest.empty()) {
ctxt = xmlCreateFileParserCtxt(file_dest.data());
// This is considerably more verbose than just using
// xmlCreateFileParserCtxt, but it allows us to bypass the external
// entity loading path, which is locked down by default for security
// reasons.
auto stream = File::Open(file_dest, "rb");
if (!stream.isInvalid()) {
ctxt = xmlCreateIOParserCtxt(nullptr, nullptr,
libxml_streams_IO_read,
libxml_streams_IO_close,
stream.get(),
XML_CHAR_ENCODING_NONE);

// We're storing a reference in the xmlParserCtxt
if (ctxt) stream.get()->incRefCount();
}
}
} else {
ctxt = xmlCreateMemoryParserCtxt(source, source_len);
ctxt = xmlCreateMemoryParserCtxt(source.data(), source.size());
}

if (ctxt == NULL) {
return NULL;
}
if (ctxt == NULL) return NULL;

/* If loading from memory, we need to set the base directory
for the document */
/* If loading from memory, we need to set the base directory for the
* document */
if (mode != DOM_LOAD_FILE) {
String directory = g_context->getCwd();
if (!directory.empty()) {
if (ctxt->directory != NULL) {
xmlFree((char *) ctxt->directory);
}
if (directory[directory.size() - 1] != '/') {
directory += "/";
}
if (ctxt->directory != NULL) xmlFree(ctxt->directory);

if (directory[directory.size() - 1] != '/') directory += "/";

ctxt->directory =
(char*)xmlCanonicPath((const xmlChar*)directory.c_str());
}
Expand Down Expand Up @@ -712,9 +722,15 @@ static xmlDocPtr dom_document_parser(c_DOMDocument * domdoc, int mode,
if (ctxt->recovery) {
HHVM_FN(error_reporting)(old_error_reporting);
}
/* If loading from memory, set the base reference uri for the document */
if (ret && ret->URL == NULL && ctxt->directory != NULL) {
ret->URL = xmlStrdup((xmlChar*)ctxt->directory);
if (ret && ret->URL == NULL) {
if (mode == DOM_LOAD_FILE) {
ret->URL = xmlStrdup((xmlChar*)source.c_str());
} else {
/* If loading from memory, set the base reference uri for the document */
if (ctxt->directory != NULL) {
ret->URL = xmlStrdup((xmlChar*)ctxt->directory);
}
}
}
} else {
ret = NULL;
Expand All @@ -734,8 +750,7 @@ static Variant dom_parse_document(c_DOMDocument *domdoc, const String& source,
return false;
}
xmlDoc *newdoc =
dom_document_parser(domdoc, mode, (char*)source.data(), source.length(),
options);
dom_document_parser(domdoc, mode, source, options);
if (!newdoc) {
return false;
}
Expand Down Expand Up @@ -3353,12 +3368,7 @@ Variant c_DOMDocument::t_importnode(const Object& importednode,
Variant c_DOMDocument::t_load(const String& filename,
int64_t options /* = 0 */) {
SYNC_VM_REGS_SCOPED();
String translated = File::TranslatePath(filename);
if (translated.empty()) {
raise_warning("Unable to read file: %s", filename.data());
return false;
}
return dom_parse_document(this, translated, options, DOM_LOAD_FILE);
return dom_parse_document(this, filename, options, DOM_LOAD_FILE);
}

Variant c_DOMDocument::t_loadhtml(const String& source) {
Expand All @@ -3368,12 +3378,7 @@ Variant c_DOMDocument::t_loadhtml(const String& source) {

Variant c_DOMDocument::t_loadhtmlfile(const String& filename) {
SYNC_VM_REGS_SCOPED();
String translated = File::TranslatePath(filename);
if (translated.empty()) {
raise_warning("Unable to read file: %s", filename.data());
return false;
}
return dom_load_html(this, translated, DOM_LOAD_FILE);
return dom_load_html(this, filename, DOM_LOAD_FILE);
}

Variant c_DOMDocument::t_loadxml(const String& source,
Expand Down Expand Up @@ -3424,19 +3429,13 @@ Variant c_DOMDocument::t_save(const String& file, int64_t options /* = 0 */) {
xmlDocPtr docp = (xmlDocPtr)m_node;
int bytes, format = 0, saveempty = 0;

String translated = File::TranslatePath(file);
if (translated.empty()) {
raise_warning("Invalid Filename");
return false;
}

/* encoding handled by property on doc */
format = m_formatoutput;
if (options & LIBXML_SAVE_NOEMPTYTAG) {
saveempty = xmlSaveNoEmptyTags;
xmlSaveNoEmptyTags = 1;
}
bytes = xmlSaveFormatFileEnc(translated.data(), docp, NULL, format);
bytes = xmlSaveFormatFileEnc(file.data(), docp, NULL, format);
if (options & LIBXML_SAVE_NOEMPTYTAG) {
xmlSaveNoEmptyTags = saveempty;
}
Expand All @@ -3450,14 +3449,9 @@ Variant c_DOMDocument::t_savehtmlfile(const String& file) {
xmlDocPtr docp = (xmlDocPtr)m_node;
int bytes, format = 0;

String translated = File::TranslatePath(file);
if (translated.empty()) {
raise_warning("Invalid Filename");
return false;
}
/* encoding handled by property on doc */
format = m_formatoutput;
bytes = htmlSaveFileFormat(translated.data(), docp, NULL, format);
bytes = htmlSaveFileFormat(file.data(), docp, NULL, format);
if (bytes == -1) {
return false;
}
Expand Down
43 changes: 41 additions & 2 deletions hphp/runtime/ext/ext_simplexml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "hphp/runtime/ext/ext_domdocument.h"
#include "hphp/runtime/ext/libxml/ext_libxml.h"
#include "hphp/system/systemlib.h"
#include "hphp/runtime/vm/vm-regs.h"

namespace HPHP {

Expand Down Expand Up @@ -1128,6 +1129,7 @@ Variant f_simplexml_load_string(
int64_t options /* = 0 */,
const String& ns /* = "" */,
bool is_prefix /* = false */) {
SYNC_VM_REGS_SCOPED();
Class* cls = class_from_name(class_name, "simplexml_load_string");
if (!cls) {
return init_null();
Expand All @@ -1152,8 +1154,44 @@ Variant f_simplexml_load_file(const String& filename,
const String& class_name /* = "SimpleXMLElement" */,
int64_t options /* = 0 */, const String& ns /* = "" */,
bool is_prefix /* = false */) {
String str = f_file_get_contents(filename);
return f_simplexml_load_string(str, class_name, options, ns, is_prefix);
SYNC_VM_REGS_SCOPED();
Class* cls = class_from_name(class_name, "simplexml_load_file");
if (!cls) {
return init_null();
}

auto stream = File::Open(filename, "rb");
if (stream.isInvalid()) return false;

xmlDocPtr doc = nullptr;
xmlParserCtxtPtr ctxt = xmlCreateIOParserCtxt(nullptr, nullptr,
libxml_streams_IO_read,
libxml_streams_IO_close,
stream.get(),
XML_CHAR_ENCODING_NONE);
if (ctxt == nullptr) return false;
stream.get()->incRefCount();
SCOPE_EXIT { xmlFreeParserCtxt(ctxt); };

if (ctxt->directory == nullptr) {
ctxt->directory = xmlParserGetDirectory(filename.c_str());
}
xmlParseDocument(ctxt);
if (ctxt->wellFormed) {
doc = ctxt->myDoc;
} else {
xmlFreeDoc(ctxt->myDoc);
ctxt->myDoc = nullptr;
return false;
}

Object obj = create_object(cls->nameStr(), Array(), false);
c_SimpleXMLElement* sxe = obj.getTyped<c_SimpleXMLElement>();
sxe->document = Resource(NEWOBJ(XmlDocWrapper)(doc));
sxe->node = xmlDocGetRootElement(doc);
sxe->iter.nsprefix = ns.size() ? xmlStrdup((xmlChar*)ns.data()) : nullptr;
sxe->iter.isprefix = is_prefix;
return obj;
}

///////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -1198,6 +1236,7 @@ void c_SimpleXMLElement::t___construct(const String& data,
bool data_is_url /* = false */,
const String& ns /* = "" */,
bool is_prefix /* = false */) {
SYNC_VM_REGS_SCOPED();
xmlDocPtr docp = data_is_url ?
xmlReadFile(data.data(), nullptr, options) :
xmlReadMemory(data.data(), data.size(), nullptr, nullptr, options);
Expand Down
2 changes: 1 addition & 1 deletion hphp/runtime/ext/ext_xml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -717,8 +717,8 @@ int64_t f_xml_parse(const Resource& parser, const String& data, bool is_final /*

int64_t f_xml_parse_into_struct(const Resource& parser, const String& data, VRefParam values,
VRefParam index /* = null */) {
SYNC_VM_REGS_SCOPED();
int ret;
VMRegAnchor _;
XmlParser * p = parser.getTyped<XmlParser>();
values = Array::Create();
p->data.assignRef(values);
Expand Down