Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
// FIXME: add classList. it is a live list and removes whitespace and duplicates when you use it.
// FIXME: xml namespace support???
// FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML
// FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility...
// FIXME: the scriptable list is quite arbitrary
// xml entity references?!
/++
This is an html DOM implementation, started with cloning
what the browser offers in Javascript, but going well beyond
it in convenience.
If you can do it in Javascript, you can probably do it with
this module, and much more.
---
import arsd.dom;
void main() {
auto document = new Document("<html><p>paragraph</p></html>");
writeln(document.querySelector("p"));
document.root.innerHTML = "<p>hey</p>";
writeln(document);
}
---
BTW: this file optionally depends on `arsd.characterencodings`, to
help it correctly read files from the internet. You should be able to
get characterencodings.d from the same place you got this file.
If you want it to stand alone, just always use the `Document.parseUtf8`
function or the constructor that takes a string.
Symbol_groups:
core_functionality =
These members provide core functionality. The members on these classes
will provide most your direct interaction.
bonus_functionality =
These provide additional functionality for special use cases.
implementations =
These provide implementations of other functionality.
+/
module arsd.dom;
// FIXME: support the css standard namespace thing in the selectors too
version(with_arsd_jsvar)
import arsd.jsvar;
else {
enum scriptable = "arsd_jsvar_compatible";
}
// this is only meant to be used at compile time, as a filter for opDispatch
// lists the attributes we want to allow without the use of .attr
bool isConvenientAttribute(string name) {
static immutable list = [
"name", "id", "href", "value",
"checked", "selected", "type",
"src", "content", "pattern",
"placeholder", "required", "alt",
"rel",
"method", "action", "enctype"
];
foreach(l; list)
if(name == l) return true;
return false;
}
// FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode
// FIXME: failing to close a paragraph sometimes messes things up too
// FIXME: it would be kinda cool to have some support for internal DTDs
// and maybe XPath as well, to some extent
/*
we could do
meh this sux
auto xpath = XPath(element);
// get the first p
xpath.p[0].a["href"]
*/
/// The main document interface, including a html parser.
/// Group: core_functionality
class Document : FileResource, DomParent {
inout(Document) asDocument() inout { return this; }
inout(Element) asElement() inout { return null; }
/// Convenience method for web scraping. Requires [arsd.http2] to be
/// included in the build as well as [arsd.characterencodings].
static Document fromUrl()(string url, bool strictMode = false) {
import arsd.http2;
auto client = new HttpClient();
auto req = client.navigateTo(Uri(url), HttpVerb.GET);
auto res = req.waitForCompletion();
auto document = new Document();
if(strictMode) {
document.parse(cast(string) res.content, true, true, res.contentTypeCharset);
} else {
document.parseGarbage(cast(string) res.content);
}
return document;
}
///.
this(string data, bool caseSensitive = false, bool strict = false) {
parseUtf8(data, caseSensitive, strict);
}
/**
Creates an empty document. It has *nothing* in it at all.
*/
this() {
}
/// This is just something I'm toying with. Right now, you use opIndex to put in css selectors.
/// It returns a struct that forwards calls to all elements it holds, and returns itself so you
/// can chain it.
///
/// Example: document["p"].innerText("hello").addClass("modified");
///
/// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); }
///
/// Note: always use function calls (not property syntax) and don't use toString in there for best results.
///
/// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe
/// you could put in some kind of custom filter function tho.
ElementCollection opIndex(string selector) {
auto e = ElementCollection(this.root);
return e[selector];
}
string _contentType = "text/html; charset=utf-8";
/// If you're using this for some other kind of XML, you can
/// set the content type here.
///
/// Note: this has no impact on the function of this class.
/// It is only used if the document is sent via a protocol like HTTP.
///
/// This may be called by parse() if it recognizes the data. Otherwise,
/// if you don't set it, it assumes text/html; charset=utf-8.
@property string contentType(string mimeType) {
_contentType = mimeType;
return _contentType;
}
/// implementing the FileResource interface, useful for sending via
/// http automatically.
@property string filename() const { return null; }
/// implementing the FileResource interface, useful for sending via
/// http automatically.
override @property string contentType() const {
return _contentType;
}
/// implementing the FileResource interface; it calls toString.
override immutable(ubyte)[] getData() const {
return cast(immutable(ubyte)[]) this.toString();
}
/// Concatenates any consecutive text nodes
/*
void normalize() {
}
*/
/// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them.
/// Call this before calling parse().
/// Note this will also preserve the prolog and doctype from the original file, if there was one.
void enableAddingSpecialTagsToDom() {
parseSawComment = (string) => true;
parseSawAspCode = (string) => true;
parseSawPhpCode = (string) => true;
parseSawQuestionInstruction = (string) => true;
parseSawBangInstruction = (string) => true;
}
/// If the parser sees a html comment, it will call this callback
/// <!-- comment --> will call parseSawComment(" comment ")
/// Return true if you want the node appended to the document.
bool delegate(string) parseSawComment;
/// If the parser sees <% asp code... %>, it will call this callback.
/// It will be passed "% asp code... %" or "%= asp code .. %"
/// Return true if you want the node appended to the document.
bool delegate(string) parseSawAspCode;
/// If the parser sees <?php php code... ?>, it will call this callback.
/// It will be passed "?php php code... ?" or "?= asp code .. ?"
/// Note: dom.d cannot identify the other php <? code ?> short format.
/// Return true if you want the node appended to the document.
bool delegate(string) parseSawPhpCode;
/// if it sees a <?xxx> that is not php or asp
/// it calls this function with the contents.
/// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo")
/// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>.
/// Return true if you want the node appended to the document.
bool delegate(string) parseSawQuestionInstruction;
/// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment),
/// it calls this function with the contents.
/// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo")
/// Return true if you want the node appended to the document.
bool delegate(string) parseSawBangInstruction;
/// Given the kind of garbage you find on the Internet, try to make sense of it.
/// Equivalent to document.parse(data, false, false, null);
/// (Case-insensitive, non-strict, determine character encoding from the data.)
/// NOTE: this makes no attempt at added security.
///
/// It is a template so it lazily imports characterencodings.
void parseGarbage()(string data) {
parse(data, false, false, null);
}
/// Parses well-formed UTF-8, case-sensitive, XML or XHTML
/// Will throw exceptions on things like unclosed tags.
void parseStrict(string data) {
parseStream(toUtf8Stream(data), true, true);
}
/// Parses well-formed UTF-8 in loose mode (by default). Tries to correct
/// tag soup, but does NOT try to correct bad character encodings.
///
/// They will still throw an exception.
void parseUtf8(string data, bool caseSensitive = false, bool strict = false) {
parseStream(toUtf8Stream(data), caseSensitive, strict);
}
// this is a template so we get lazy import behavior
Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) {
import arsd.characterencodings;
// gotta determine the data encoding. If you know it, pass it in above to skip all this.
if(dataEncoding is null) {
dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata);
// it can't tell... probably a random 8 bit encoding. Let's check the document itself.
// Now, XML and HTML can both list encoding in the document, but we can't really parse
// it here without changing a lot of code until we know the encoding. So I'm going to
// do some hackish string checking.
if(dataEncoding is null) {
auto dataAsBytes = cast(immutable(ubyte)[]) rawdata;
// first, look for an XML prolog
auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\"");
if(idx != -1) {
idx += "encoding=\"".length;
// we're probably past the prolog if it's this far in; we might be looking at
// content. Forget about it.
if(idx > 100)
idx = -1;
}
// if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5)..
if(idx == -1) {
idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset=");
if(idx != -1) {
idx += "charset=".length;
if(dataAsBytes[idx] == '"')
idx++;
}
}
// found something in either branch...
if(idx != -1) {
// read till a quote or about 12 chars, whichever comes first...
auto end = idx;
while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12)
end++;
dataEncoding = cast(string) dataAsBytes[idx .. end];
}
// otherwise, we just don't know.
}
}
if(dataEncoding is null) {
if(strict)
throw new MarkupException("I couldn't figure out the encoding of this document.");
else
// if we really don't know by here, it means we already tried UTF-8,
// looked for utf 16 and 32 byte order marks, and looked for xml or meta
// tags... let's assume it's Windows-1252, since that's probably the most
// common aside from utf that wouldn't be labeled.
dataEncoding = "Windows 1252";
}
// and now, go ahead and convert it.
string data;
if(!strict) {
// if we're in non-strict mode, we need to check
// the document for mislabeling too; sometimes
// web documents will say they are utf-8, but aren't
// actually properly encoded. If it fails to validate,
// we'll assume it's actually Windows encoding - the most
// likely candidate for mislabeled garbage.
dataEncoding = dataEncoding.toLower();
dataEncoding = dataEncoding.replace(" ", "");
dataEncoding = dataEncoding.replace("-", "");
dataEncoding = dataEncoding.replace("_", "");
if(dataEncoding == "utf8") {
try {
validate(rawdata);
} catch(UTFException e) {
dataEncoding = "Windows 1252";
}
}
}
if(dataEncoding != "UTF-8") {
if(strict)
data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding);
else {
try {
data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding);
} catch(Exception e) {
data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252");
}
}
} else
data = rawdata;
return toUtf8Stream(data);
}
private
Utf8Stream toUtf8Stream(in string rawdata) {
string data = rawdata;
static if(is(Utf8Stream == string))
return data;
else
return new Utf8Stream(data);
}
/++
List of elements that can be assumed to be self-closed
in this document. The default for a Document are a hard-coded
list of ones appropriate for HTML. For [XmlDocument], it defaults
to empty. You can modify this after construction but before parsing.
History:
Added February 8, 2021 (included in dub release 9.2)
+/
string[] selfClosedElements = htmlSelfClosedElements;
/++
List of elements that are considered inline for pretty printing.
The default for a Document are hard-coded to something appropriate
for HTML. For [XmlDocument], it defaults to empty. You can modify
this after construction but before parsing.
History:
Added June 21, 2021 (included in dub release 10.1)
+/
string[] inlineElements = htmlInlineElements;
/**
Take XMLish data and try to make the DOM tree out of it.
The goal isn't to be perfect, but to just be good enough to
approximate Javascript's behavior.
If strict, it throws on something that doesn't make sense.
(Examples: mismatched tags. It doesn't validate!)
If not strict, it tries to recover anyway, and only throws
when something is REALLY unworkable.
If strict is false, it uses a magic list of tags that needn't
be closed. If you are writing a document specifically for this,
try to avoid such - use self closed tags at least. Easier to parse.
The dataEncoding argument can be used to pass a specific
charset encoding for automatic conversion. If null (which is NOT
the default!), it tries to determine from the data itself,
using the xml prolog or meta tags, and assumes UTF-8 if unsure.
If this assumption is wrong, it can throw on non-ascii
characters!
Note that it previously assumed the data was encoded as UTF-8, which
is why the dataEncoding argument defaults to that.
So it shouldn't break backward compatibility.
But, if you want the best behavior on wild data - figuring it out from the document
instead of assuming - you'll probably want to change that argument to null.
This is a template so it lazily imports arsd.characterencodings, which is required
to fix up data encodings.
If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the
dependency. If it is data from the Internet though, a random website, the encoding
is often a lie. This function, if dataEncoding == null, can correct for that, or
you can try parseGarbage. In those cases, arsd.characterencodings is required to
compile.
*/
void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") {
auto data = handleDataEncoding(rawdata, dataEncoding, strict);
parseStream(data, caseSensitive, strict);
}
// note: this work best in strict mode, unless data is just a simple string wrapper
void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) {
// FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler
// of my big app.
assert(data !is null);
// go through character by character.
// if you see a <, consider it a tag.
// name goes until the first non tagname character
// then see if it self closes or has an attribute
// if not in a tag, anything not a tag is a big text
// node child. It ends as soon as it sees a <
// Whitespace in text or attributes is preserved, but not between attributes
// &amp; and friends are converted when I know them, left the same otherwise
// this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really)
//validate(data); // it *must* be UTF-8 for this to work correctly
sizediff_t pos = 0;
clear();
loose = !caseSensitive;
bool sawImproperNesting = false;
bool paragraphHackfixRequired = false;
int getLineNumber(sizediff_t p) {
int line = 1;
foreach(c; data[0..p])
if(c == '\n')
line++;
return line;
}
void parseError(string message) {
throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message));
}
bool eatWhitespace() {
bool ateAny = false;
while(pos < data.length && data[pos].isSimpleWhite) {
pos++;
ateAny = true;
}
return ateAny;
}
string readTagName() {
// remember to include : for namespaces
// basically just keep going until >, /, or whitespace
auto start = pos;
while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite)
{
pos++;
if(pos == data.length) {
if(strict)
throw new Exception("tag name incomplete when file ended");
else
break;
}
}
if(!caseSensitive)
return toLower(data[start..pos]);
else
return data[start..pos];
}
string readAttributeName() {
// remember to include : for namespaces
// basically just keep going until >, /, or whitespace
auto start = pos;
while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite)
{
if(data[pos] == '<') {
if(strict)
throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos)));
else
break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there
}
pos++;
if(pos == data.length) {
if(strict)
throw new Exception("unterminated attribute name");
else
break;
}
}
if(!caseSensitive)
return toLower(data[start..pos]);
else
return data[start..pos];
}
string readAttributeValue() {
if(pos >= data.length) {
if(strict)
throw new Exception("no attribute value before end of file");
else
return null;
}
switch(data[pos]) {
case '\'':
case '"':
auto started = pos;
char end = data[pos];
pos++;
auto start = pos;
while(pos < data.length && data[pos] != end)
pos++;
if(strict && pos == data.length)
throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started));
string v = htmlEntitiesDecode(data[start..pos], strict);
pos++; // skip over the end
return v;
default:
if(strict)
parseError("Attributes must be quoted");
// read until whitespace or terminator (/> or >)
auto start = pos;
while(
pos < data.length &&
data[pos] != '>' &&
// unquoted attributes might be urls, so gotta be careful with them and self-closed elements
!(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') &&
!data[pos].isSimpleWhite)
pos++;
string v = htmlEntitiesDecode(data[start..pos], strict);
// don't skip the end - we'll need it later
return v;
}
}
TextNode readTextNode() {
auto start = pos;
while(pos < data.length && data[pos] != '<') {
pos++;
}
return TextNode.fromUndecodedString(this, data[start..pos]);
}
// this is obsolete!
RawSource readCDataNode() {
auto start = pos;
while(pos < data.length && data[pos] != '<') {
pos++;
}
return new RawSource(this, data[start..pos]);
}
struct Ele {
int type; // element or closing tag or nothing
/*
type == 0 means regular node, self-closed (element is valid)
type == 1 means closing tag (payload is the tag name, element may be valid)
type == 2 means you should ignore it completely
type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not
type == 4 means the document was totally empty
*/
Element element; // for type == 0 or type == 3
string payload; // for type == 1
}
// recursively read a tag
Ele readElement(string[] parentChain = null) {
// FIXME: this is the slowest function in this module, by far, even in strict mode.
// Loose mode should perform decently, but strict mode is the important one.
if(!strict && parentChain is null)
parentChain = [];
static string[] recentAutoClosedTags;
if(pos >= data.length)
{
if(strict) {
throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain));
} else {
if(parentChain.length)
return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended
else
return Ele(4); // signal emptiness upstream
}
}
if(data[pos] != '<') {
return Ele(0, readTextNode(), null);
}
enforce(data[pos] == '<');
pos++;
if(pos == data.length) {
if(strict)
throw new MarkupException("Found trailing < at end of file");
// if not strict, we'll just skip the switch
} else
switch(data[pos]) {
// I don't care about these, so I just want to skip them
case '!': // might be a comment, a doctype, or a special instruction
pos++;
// FIXME: we should store these in the tree too
// though I like having it stripped out tbh.
if(pos == data.length) {
if(strict)
throw new MarkupException("<! opened at end of file");
} else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') {
// comment
pos += 2;
// FIXME: technically, a comment is anything
// between -- and -- inside a <!> block.
// so in <!-- test -- lol> , the " lol" is NOT a comment
// and should probably be handled differently in here, but for now
// I'll just keep running until --> since that's the common way
auto commentStart = pos;
while(pos+3 < data.length && data[pos..pos+3] != "-->")
pos++;
auto end = commentStart;
if(pos + 3 >= data.length) {
if(strict)
throw new MarkupException("unclosed comment");
end = data.length;
pos = data.length;
} else {
end = pos;
assert(data[pos] == '-');
pos++;
assert(data[pos] == '-');
pos++;
assert(data[pos] == '>');
pos++;
}
if(parseSawComment !is null)
if(parseSawComment(data[commentStart .. end])) {
return Ele(3, new HtmlComment(this, data[commentStart .. end]), null);
}
} else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") {
pos += 7;
auto cdataStart = pos;
ptrdiff_t end = -1;
typeof(end) cdataEnd;
if(pos < data.length) {
// cdata isn't allowed to nest, so this should be generally ok, as long as it is found
end = data[pos .. $].indexOf("]]>");
}
if(end == -1) {
if(strict)
throw new MarkupException("Unclosed CDATA section");
end = pos;
cdataEnd = pos;
} else {
cdataEnd = pos + end;
pos = cdataEnd + 3;
}
return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null);
} else {
auto start = pos;
while(pos < data.length && data[pos] != '>')
pos++;
auto bangEnds = pos;
if(pos == data.length) {
if(strict)
throw new MarkupException("unclosed processing instruction (<!xxx>)");
} else pos++; // skipping the >
if(parseSawBangInstruction !is null)
if(parseSawBangInstruction(data[start .. bangEnds])) {
// FIXME: these should be able to modify the parser state,
// doing things like adding entities, somehow.
return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null);
}
}
/*
if(pos < data.length && data[pos] == '>')
pos++; // skip the >
else
assert(!strict);
*/
break;
case '%':
case '?':
/*
Here's what we want to support:
<% asp code %>
<%= asp code %>
<?php php code ?>
<?= php code ?>
The contents don't really matter, just if it opens with
one of the above for, it ends on the two char terminator.
<?something>
this is NOT php code
because I've seen this in the wild: <?EM-dummyText>
This could be php with shorttags which would be cut off
prematurely because if(a >) - that > counts as the close
of the tag, but since dom.d can't tell the difference
between that and the <?EM> real world example, it will
not try to look for the ?> ending.
The difference between this and the asp/php stuff is that it
ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end
on >.
*/
char end = data[pos];
auto started = pos;
bool isAsp = end == '%';
int currentIndex = 0;
bool isPhp = false;
bool isEqualTag = false;
int phpCount = 0;
more:
pos++; // skip the start
if(pos == data.length) {
if(strict)
throw new MarkupException("Unclosed <"~end~" by end of file");
} else {
currentIndex++;
if(currentIndex == 1 && data[pos] == '=') {
if(!isAsp)
isPhp = true;
isEqualTag = true;
goto more;
}
if(currentIndex == 1 && data[pos] == 'p')
phpCount++;
if(currentIndex == 2 && data[pos] == 'h')
phpCount++;
if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2)
isPhp = true;
if(data[pos] == '>') {
if((isAsp || isPhp) && data[pos - 1] != end)
goto more;
// otherwise we're done
} else
goto more;
}
//writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]);
auto code = data[started .. pos];
assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length));
if(pos < data.length)
pos++; // get past the >
if(isAsp && parseSawAspCode !is null) {
if(parseSawAspCode(code)) {
return Ele(3, new AspCode(this, code), null);
}
} else if(isPhp && parseSawPhpCode !is null) {
if(parseSawPhpCode(code)) {
return Ele(3, new PhpCode(this, code), null);
}
} else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) {
if(parseSawQuestionInstruction(code)) {
return Ele(3, new QuestionInstruction(this, code), null);
}
}
break;
case '/': // closing an element
pos++; // skip the start
auto p = pos;
while(pos < data.length && data[pos] != '>')
pos++;
//writefln("</%s>", data[p..pos]);
if(pos == data.length && data[pos-1] != '>') {
if(strict)
throw new MarkupException("File ended before closing tag had a required >");
else
data ~= ">"; // just hack it in
}
pos++; // skip the '>'
string tname = data[p..pos-1];
if(!caseSensitive)
tname = tname.toLower();
return Ele(1, null, tname); // closing tag reports itself here
case ' ': // assume it isn't a real element...
if(strict) {
parseError("bad markup - improperly placed <");
assert(0); // parseError always throws
} else
return Ele(0, TextNode.fromUndecodedString(this, "<"), null);
default:
if(!strict) {
// what about something that kinda looks like a tag, but isn't?
auto nextTag = data[pos .. $].indexOf("<");
auto closeTag = data[pos .. $].indexOf(">");
if(closeTag != -1 && nextTag != -1)
if(nextTag < closeTag) {
// since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically
auto equal = data[pos .. $].indexOf("=\"");
if(equal != -1 && equal < closeTag) {
// this MIGHT be ok, soldier on
} else {
// definitely no good, this must be a (horribly distorted) text node
pos++; // skip the < we're on - don't want text node to end prematurely
auto node = readTextNode();
node.contents = "<" ~ node.contents; // put this back
return Ele(0, node, null);
}
}
}
string tagName = readTagName();
string[string] attributes;
Ele addTag(bool selfClosed) {
if(selfClosed)
pos++;
else {
if(!strict)
if(tagName.isInArray(selfClosedElements))
// these are de-facto self closed
selfClosed = true;
}
import std.algorithm.comparison;
if(strict) {
enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[max(0, pos - 100) .. min(data.length, pos + 100)]));
} else {
// if we got here, it's probably because a slash was in an
// unquoted attribute - don't trust the selfClosed value
if(!selfClosed)
selfClosed = tagName.isInArray(selfClosedElements);
while(pos < data.length && data[pos] != '>')
pos++;
if(pos >= data.length) {
// the tag never closed
assert(data.length != 0);
pos = data.length - 1; // rewinding so it hits the end at the bottom..
}
}
auto whereThisTagStarted = pos; // for better error messages
pos++;
auto e = createElement(tagName);
e.attributes = attributes;
version(dom_node_indexes) {
if(e.dataset.nodeIndex.length == 0)
e.dataset.nodeIndex = to!string(&(e.attributes));
}
e.selfClosed = selfClosed;
e.parseAttributes();
// HACK to handle script and style as a raw data section as it is in HTML browsers
if(tagName == "script" || tagName == "style") {
if(!selfClosed) {
string closer = "</" ~ tagName ~ ">";
ptrdiff_t ending;
if(pos >= data.length)
ending = -1;
else
ending = indexOf(data[pos..$], closer);
ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes));
/*
if(loose && ending == -1 && pos < data.length)
ending = indexOf(data[pos..$], closer.toUpper());
*/
if(ending == -1) {
if(strict)
throw new Exception("tag " ~ tagName ~ " never closed");
else {
// let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit.
if(pos < data.length) {
e = new TextNode(this, data[pos .. $]);
pos = data.length;
}
}
} else {
ending += pos;
e.innerRawSource = data[pos..ending];
pos = ending + closer.length;
}
}
return Ele(0, e, null);
}
bool closed = selfClosed;
void considerHtmlParagraphHack(Element n) {
assert(!strict);
if(e.tagName == "p" && e.tagName == n.tagName) {
// html lets you write <p> para 1 <p> para 1
// but in the dom tree, they should be siblings, not children.
paragraphHackfixRequired = true;
}
}
//writef("<%s>", tagName);
while(!closed) {
Ele n;
if(strict)
n = readElement();
else
n = readElement(parentChain ~ tagName);
if(n.type == 4) return n; // the document is empty
if(n.type == 3 && n.element !is null) {
// special node, append if possible
if(e !is null)
e.appendChild(n.element);
else
piecesBeforeRoot ~= n.element;
} else if(n.type == 0) {
if(!strict)
considerHtmlParagraphHack(n.element);
e.appendChild(n.element);
} else if(n.type == 1) {
bool found = false;
if(n.payload != tagName) {
if(strict)
parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted)));
else {
sawImproperNesting = true;
// this is so we don't drop several levels of awful markup
if(n.element) {
if(!strict)
considerHtmlParagraphHack(n.element);
e.appendChild(n.element);
n.element = null;
}
// is the element open somewhere up the chain?
foreach(i, parent; parentChain)
if(parent == n.payload) {
recentAutoClosedTags ~= tagName;
// just rotating it so we don't inadvertently break stuff with vile crap
if(recentAutoClosedTags.length > 4)
recentAutoClosedTags = recentAutoClosedTags[1 .. $];
n.element = e;
return n;
}
// if not, this is a text node; we can't fix it up...
// If it's already in the tree somewhere, assume it is closed by algorithm
// and we shouldn't output it - odds are the user just flipped a couple tags
foreach(ele; e.tree) {
if(ele.tagName == n.payload) {
found = true;
break;
}
}
foreach(ele; recentAutoClosedTags) {
if(ele == n.payload) {
found = true;
break;
}
}
if(!found) // if not found in the tree though, it's probably just text
e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">"));
}
} else {
if(n.element) {
if(!strict)
considerHtmlParagraphHack(n.element);
e.appendChild(n.element);
}
}
if(n.payload == tagName) // in strict mode, this is always true
closed = true;
} else { /*throw new Exception("wtf " ~ tagName);*/ }
}
//writef("</%s>\n", tagName);
return Ele(0, e, null);
}
// if a tag was opened but not closed by end of file, we can arrive here
if(!strict && pos >= data.length)
return addTag(false);
//else if(strict) assert(0); // should be caught before
switch(data[pos]) {
default: assert(0);
case '/': // self closing tag
return addTag(true);
case '>':
return addTag(false);
case ' ':
case '\t':
case '\n':
case '\r':
// there might be attributes...
moreAttributes:
eatWhitespace();
// same deal as above the switch....
if(!strict && pos >= data.length)
return addTag(false);
if(strict && pos >= data.length)
throw new MarkupException("tag open, didn't find > before end of file");
switch(data[pos]) {
case '/': // self closing tag
return addTag(true);
case '>': // closed tag; open -- we now read the contents
return addTag(false);
default: // it is an attribute
string attrName = readAttributeName();
string attrValue = attrName;
bool ateAny = eatWhitespace();
if(strict && ateAny)
throw new MarkupException("inappropriate whitespace after attribute name");
if(pos >= data.length) {
if(strict)
assert(0, "this should have thrown in readAttributeName");
else {
data ~= ">";
goto blankValue;
}
}
if(data[pos] == '=') {
pos++;
ateAny = eatWhitespace();
// the spec actually allows this!
//if(strict && ateAny)
//throw new MarkupException("inappropriate whitespace after attribute equals");
attrValue = readAttributeValue();
eatWhitespace();
}
blankValue:
if(strict && attrName in attributes)
throw new MarkupException("Repeated attribute: " ~ attrName);
if(attrName.strip().length)
attributes[attrName] = attrValue;
else if(strict) throw new MarkupException("wtf, zero length attribute name");
if(!strict && pos < data.length && data[pos] == '<') {
// this is the broken tag that doesn't have a > at the end
data = data[0 .. pos] ~ ">" ~ data[pos.. $];
// let's insert one as a hack
goto case '>';
}
goto moreAttributes;
}
}
}
return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly.
//assert(0);
}
eatWhitespace();
Ele r;
do {
r = readElement(); // there SHOULD only be one element...
if(r.type == 3 && r.element !is null)
piecesBeforeRoot ~= r.element;
if(r.type == 4)
break; // the document is completely empty...
} while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node
root = r.element;
root.parent_ = this;
if(!strict) // in strict mode, we'll just ignore stuff after the xml
while(r.type != 4) {
r = readElement();
if(r.type != 4 && r.type != 2) { // if not empty and not ignored
if(r.element !is null)
piecesAfterRoot ~= r.element;
}
}
if(root is null)
{
if(strict)
assert(0, "empty document should be impossible in strict mode");
else
parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do
}
if(paragraphHackfixRequired) {
assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag...
// in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml).
// It's hard to handle above though because my code sucks. So, we'll fix it here.
// Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120
// Kind of inefficient because we can't detect when we recurse back out of a node.
Element[Element] insertLocations;
auto iterator = root.tree;
foreach(ele; iterator) {
if(ele.parentNode is null)
continue;
if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) {
auto shouldBePreviousSibling = ele.parentNode;
auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder...
if (auto p = holder in insertLocations) {
shouldBePreviousSibling = *p;
assert(shouldBePreviousSibling.parentNode is holder);
}
ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree());
insertLocations[holder] = ele;
iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up.
}
}
}
}
/* end massive parse function */
/// Gets the <title> element's innerText, if one exists
@property string title() {
bool doesItMatch(Element e) {
return (e.tagName == "title");
}
auto e = findFirst(&doesItMatch);
if(e)
return e.innerText();
return "";
}
/// Sets the title of the page, creating a <title> element if needed.
@property void title(string t) {
bool doesItMatch(Element e) {
return (e.tagName == "title");
}
auto e = findFirst(&doesItMatch);
if(!e) {
e = createElement("title");
auto heads = getElementsByTagName("head");
if(heads.length)
heads[0].appendChild(e);
}
if(e)
e.innerText = t;
}
// FIXME: would it work to alias root this; ???? might be a good idea
/// These functions all forward to the root element. See the documentation in the Element class.
Element getElementById(string id) {
return root.getElementById(id);
}
/// ditto
final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__)
if( is(SomeElementType : Element))
out(ret) { assert(ret !is null); }
do {
return root.requireElementById!(SomeElementType)(id, file, line);
}
/// ditto
final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__)
if( is(SomeElementType : Element))
out(ret) { assert(ret !is null); }
do {
auto e = cast(SomeElementType) querySelector(selector);
if(e is null)
throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line);
return e;
}
final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__)
if(is(SomeElementType : Element))
{
auto e = cast(SomeElementType) querySelector(selector);
return MaybeNullElement!SomeElementType(e);
}
/// ditto
@scriptable
Element querySelector(string selector) {
// see comment below on Document.querySelectorAll
auto s = Selector(selector);//, !loose);
foreach(ref comp; s.components)
if(comp.parts.length && comp.parts[0].separation == 0)
comp.parts[0].separation = -1;
foreach(e; s.getMatchingElementsLazy(this.root))
return e;
return null;
}
/// ditto
@scriptable
Element[] querySelectorAll(string selector) {
// In standards-compliant code, the document is slightly magical
// in that it is a pseudoelement at top level. It should actually
// match the root as one of its children.
//
// In versions of dom.d before Dec 29 2019, this worked because
// querySelectorAll was willing to return itself. With that bug fix
// (search "arbitrary id asduiwh" in this file for associated unittest)
// this would have failed. Hence adding back the root if it matches the
// selector itself.
//
// I'd love to do this better later.
auto s = Selector(selector);//, !loose);
foreach(ref comp; s.components)
if(comp.parts.length && comp.parts[0].separation == 0)
comp.parts[0].separation = -1;
return s.getMatchingElements(this.root);
}
/// ditto
deprecated("use querySelectorAll instead")
Element[] getElementsBySelector(string selector) {
return root.getElementsBySelector(selector);
}
/// ditto
@scriptable
Element[] getElementsByTagName(string tag) {
return root.getElementsByTagName(tag);
}
/// ditto
@scriptable
Element[] getElementsByClassName(string tag) {
return root.getElementsByClassName(tag);
}
/** FIXME: btw, this could just be a lazy range...... */
Element getFirstElementByTagName(string tag) {
if(loose)
tag = tag.toLower();
bool doesItMatch(Element e) {
return e.tagName == tag;
}
return findFirst(&doesItMatch);
}
/// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.)
Element mainBody() {
return getFirstElementByTagName("body");
}
/// this uses a weird thing... it's [name=] if no colon and
/// [property=] if colon
string getMeta(string name) {
string thing = name.indexOf(":") == -1 ? "name" : "property";
auto e = querySelector("head meta["~thing~"="~name~"]");
if(e is null)
return null;
return e.content;
}
/// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/
void setMeta(string name, string value) {
string thing = name.indexOf(":") == -1 ? "name" : "property";
auto e = querySelector("head meta["~thing~"="~name~"]");
if(e is null) {
e = requireSelector("head").addChild("meta");
e.setAttribute(thing, name);
}
e.content = value;
}
///.
Form[] forms() {
return cast(Form[]) getElementsByTagName("form");
}
///.
Form createForm()
out(ret) {
assert(ret !is null);
}
do {
return cast(Form) createElement("form");
}
///.
Element createElement(string name) {
if(loose)
name = name.toLower();
auto e = Element.make(name, null, null, selfClosedElements);
return e;
// return new Element(this, name, null, selfClosed);
}
///.
Element createFragment() {
return new DocumentFragment(this);
}
///.
Element createTextNode(string content) {
return new TextNode(this, content);
}
///.
Element findFirst(bool delegate(Element) doesItMatch) {
if(root is null)
return null;
Element result;
bool goThroughElement(Element e) {
if(doesItMatch(e)) {
result = e;
return true;
}
foreach(child; e.children) {
if(goThroughElement(child))
return true;
}
return false;
}
goThroughElement(root);
return result;
}
///.
void clear() {
root = null;
loose = false;
}
///.
void setProlog(string d) {
_prolog = d;
prologWasSet = true;
}
///.
private string _prolog = "<!DOCTYPE html>\n";
private bool prologWasSet = false; // set to true if the user changed it
@property string prolog() const {
// if the user explicitly changed it, do what they want
// or if we didn't keep/find stuff from the document itself,
// we'll use the builtin one as a default.
if(prologWasSet || piecesBeforeRoot.length == 0)
return _prolog;
string p;
foreach(e; piecesBeforeRoot)
p ~= e.toString() ~ "\n";
return p;
}
///.
override string toString() const {
return prolog ~ root.toString();
}
/++
Writes it out with whitespace for easier eyeball debugging
Do NOT use for anything other than eyeball debugging,
because whitespace may be significant content in XML.
+/
string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const {
import std.string;
string s = prolog.strip;
/*
if(insertComments) s ~= "<!--";
s ~= "\n";
if(insertComments) s ~= "-->";
*/
s ~= root.toPrettyString(insertComments, indentationLevel, indentWith);
foreach(a; piecesAfterRoot)
s ~= a.toPrettyString(insertComments, indentationLevel, indentWith);
return s;
}
///.
Element root;
/// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s
Element[] piecesBeforeRoot;
/// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it
Element[] piecesAfterRoot;
///.
bool loose;
// what follows are for mutation events that you can observe
void delegate(DomMutationEvent)[] eventObservers;
void dispatchMutationEvent(DomMutationEvent e) {
foreach(o; eventObservers)
o(e);
}
}
interface DomParent {
inout(Document) asDocument() inout;
inout(Element) asElement() inout;
}
/// This represents almost everything in the DOM.
/// Group: core_functionality
class Element : DomParent {
inout(Document) asDocument() inout { return null; }
inout(Element) asElement() inout { return this; }
/// Returns a collection of elements by selector.
/// See: [Document.opIndex]
ElementCollection opIndex(string selector) {
auto e = ElementCollection(this);
return e[selector];
}
/++
Returns the child node with the particular index.
Be aware that child nodes include text nodes, including
whitespace-only nodes.
+/
Element opIndex(size_t index) {
if(index >= children.length)
return null;
return this.children[index];
}
/// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done.
final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__)
if(
is(SomeElementType : Element)
)
out(ret) {
assert(ret !is null);
}
do {
auto e = cast(SomeElementType) getElementById(id);
if(e is null)
throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line);
return e;
}
/// ditto but with selectors instead of ids
final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__)
if(
is(SomeElementType : Element)
)
out(ret) {
assert(ret !is null);
}
do {
auto e = cast(SomeElementType) querySelector(selector);
if(e is null)
throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line);
return e;
}
/++
If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods.
+/
final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__)
if(is(SomeElementType : Element))
{
auto e = cast(SomeElementType) querySelector(selector);
return MaybeNullElement!SomeElementType(e);
}
/// get all the classes on this element
@property string[] classes() {
return split(className, " ");
}
/// Adds a string to the class attribute. The class attribute is used a lot in CSS.
@scriptable
Element addClass(string c) {
if(hasClass(c))
return this; // don't add it twice
string cn = getAttribute("class");
if(cn.length == 0) {
setAttribute("class", c);
return this;
} else {
setAttribute("class", cn ~ " " ~ c);
}
return this;
}
/// Removes a particular class name.
@scriptable
Element removeClass(string c) {
if(!hasClass(c))
return this;
string n;
foreach(name; classes) {
if(c == name)
continue; // cut it out
if(n.length)
n ~= " ";
n ~= name;
}
className = n.strip();
return this;
}
/// Returns whether the given class appears in this element.
bool hasClass(string c) {
string cn = className;
auto idx = cn.indexOf(c);
if(idx == -1)
return false;
foreach(cla; cn.split(" "))
if(cla == c)
return true;
return false;
/*
int rightSide = idx + c.length;
bool checkRight() {
if(rightSide == cn.length)
return true; // it's the only class
else if(iswhite(cn[rightSide]))
return true;
return false; // this is a substring of something else..
}
if(idx == 0) {
return checkRight();
} else {
if(!iswhite(cn[idx - 1]))
return false; // substring
return checkRight();
}
assert(0);
*/
}
/* *******************************
DOM Mutation
*********************************/
/// convenience function to quickly add a tag with some text or
/// other relevant info (for example, it's a src for an <img> element
/// instead of inner text)
Element addChild(string tagName, string childInfo = null, string childInfo2 = null)
in {
assert(tagName !is null);
}
out(e) {
//assert(e.parentNode is this);
//assert(e.parentDocument is this.parentDocument);
}
do {
auto e = Element.make(tagName, childInfo, childInfo2);
// FIXME (maybe): if the thing is self closed, we might want to go ahead and
// return the parent. That will break existing code though.
return appendChild(e);
}
/// Another convenience function. Adds a child directly after the current one, returning
/// the new child.
///
/// Between this, addChild, and parentNode, you can build a tree as a single expression.
Element addSibling(string tagName, string childInfo = null, string childInfo2 = null)
in {
assert(tagName !is null);
assert(parentNode !is null);
}
out(e) {
assert(e.parentNode is this.parentNode);
assert(e.parentDocument is this.parentDocument);
}
do {
auto e = Element.make(tagName, childInfo, childInfo2);
return parentNode.insertAfter(this, e);
}
///
Element addSibling(Element e) {
return parentNode.insertAfter(this, e);
}
///
Element addChild(Element e) {
return this.appendChild(e);
}
/// Convenience function to append text intermixed with other children.
/// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), ".");
/// or div.addChildren("Hello, ", user.name, "!");
/// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping.
void addChildren(T...)(T t) {
foreach(item; t) {
static if(is(item : Element))
appendChild(item);
else static if (is(isSomeString!(item)))
appendText(to!string(item));
else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren");
}
}
///.
Element addChild(string tagName, Element firstChild, string info2 = null)
in {
assert(firstChild !is null);
}
out(ret) {
assert(ret !is null);
assert(ret.parentNode is this);
assert(firstChild.parentNode is ret);
assert(ret.parentDocument is this.parentDocument);
//assert(firstChild.parentDocument is this.parentDocument);
}
do {
auto e = Element.make(tagName, "", info2);
e.appendChild(firstChild);
this.appendChild(e);
return e;
}
///
Element addChild(string tagName, in Html innerHtml, string info2 = null)
in {
}
out(ret) {
assert(ret !is null);
assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null");
assert(ret.parentDocument is this.parentDocument);
}
do {
auto e = Element.make(tagName, "", info2);
this.appendChild(e);
e.innerHTML = innerHtml.source;
return e;
}
/// .
void appendChildren(Element[] children) {
foreach(ele; children)
appendChild(ele);
}
///.
void reparent(Element newParent)
in {
assert(newParent !is null);
assert(parentNode !is null);
}
out {
assert(this.parentNode is newParent);
//assert(isInArray(this, newParent.children));
}
do {
parentNode.removeChild(this);
newParent.appendChild(this);
}
/**
Strips this tag out of the document, putting its inner html
as children of the parent.
For example, given: `<p>hello <b>there</b></p>`, if you
call `stripOut` on the `b` element, you'll be left with
`<p>hello there<p>`.
The idea here is to make it easy to get rid of garbage
markup you aren't interested in.
*/
void stripOut()
in {
assert(parentNode !is null);
}
out {
assert(parentNode is null);
assert(children.length == 0);
}
do {
foreach(c; children)
c.parentNode = null; // remove the parent
if(children.length)
parentNode.replaceChild(this, this.children);
else
parentNode.removeChild(this);
this.children.length = 0; // we reparented them all above
}
/// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check
/// if the element already isn't in a tree, it does nothing.
Element removeFromTree()
in {
}
out(var) {
assert(this.parentNode is null);
assert(var is this);
}
do {
if(this.parentNode is null)
return this;
this.parentNode.removeChild(this);
return this;
}
/++
Wraps this element inside the given element.
It's like `this.replaceWith(what); what.appendchild(this);`
Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));`
you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`.
+/
Element wrapIn(Element what)
in {
assert(what !is null);
}
out(ret) {
assert(this.parentNode is what);
assert(ret is what);
}
do {
this.replaceWith(what);
what.appendChild(this);
return what;
}
/// Replaces this element with something else in the tree.
Element replaceWith(Element e)
in {
assert(this.parentNode !is null);
}
do {
e.removeFromTree();
this.parentNode.replaceChild(this, e);
return e;
}
/**
Splits the className into an array of each class given
*/
string[] classNames() const {
return className().split(" ");
}
/**
Fetches the first consecutive text nodes concatenated together.
`firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered.
See_also: [directText], [innerText]
*/
string firstInnerText() const {
string s;
foreach(child; children) {
if(child.nodeType != NodeType.Text)
break;
s ~= child.nodeValue();
}
return s;
}
/**
Returns the text directly under this element.
Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues
past child tags. So, `<example>some <b>bold</b> text</example>`
will return `some text` because it only gets the text, skipping non-text children.
See_also: [firstInnerText], [innerText]
*/
@property string directText() {
string ret;
foreach(e; children) {
if(e.nodeType == NodeType.Text)
ret ~= e.nodeValue();
}
return ret;
}
/**
Sets the direct text, without modifying other child nodes.
Unlike [innerText], this does *not* remove existing elements in the element.
It only replaces the first text node it sees.
If there are no text nodes, it calls [appendText].
So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`.
*/
@property void directText(string text) {
foreach(e; children) {
if(e.nodeType == NodeType.Text) {
auto it = cast(TextNode) e;
it.contents = text;
return;
}
}
appendText(text);
}
// do nothing, this is primarily a virtual hook
// for links and forms
void setValue(string field, string value) { }
// this is a thing so i can remove observer support if it gets slow
// I have not implemented all these yet
private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) {
if(parentDocument is null) return;
DomMutationEvent me;
me.operation = operation;
me.target = this;
me.relatedString = s1;
me.relatedString2 = s2;
me.related = r;
me.related2 = r2;
parentDocument.dispatchMutationEvent(me);
}
// putting all the members up front
// this ought to be private. don't use it directly.
Element[] children;
/// The name of the tag. Remember, changing this doesn't change the dynamic type of the object.
string tagName;
/// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead.
string[string] attributes;
/// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here.
/// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list.
private bool selfClosed;
private DomParent parent_;
/// Get the parent Document object that contains this element.
/// It may be null, so remember to check for that.
@property inout(Document) parentDocument() inout {
if(this.parent_ is null)
return null;
auto p = cast() this.parent_.asElement;
auto prev = cast() this;
while(p) {
prev = p;
if(p.parent_ is null)
return null;
p = cast() p.parent_.asElement;
}
return cast(inout) prev.parent_.asDocument;
}
deprecated @property void parentDocument(Document doc) {
parent_ = doc;
}
///.
inout(Element) parentNode() inout {
if(parent_ is null)
return null;
auto p = parent_.asElement;
if(cast(DocumentFragment) p)
return p.parent_.asElement;
return p;
}
//protected
Element parentNode(Element e) {
parent_ = e;
return e;
}
// these are here for event handlers. Don't forget that this library never fires events.
// (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.)
version(dom_with_events) {
EventHandler[][string] bubblingEventHandlers;
EventHandler[][string] capturingEventHandlers;
EventHandler[string] defaultEventHandlers;
void addEventListener(string event, EventHandler handler, bool useCapture = false) {
if(event.length > 2 && event[0..2] == "on")
event = event[2 .. $];
if(useCapture)
capturingEventHandlers[event] ~= handler;
else
bubblingEventHandlers[event] ~= handler;
}
}
// and now methods
/++
Convenience function to try to do the right thing for HTML. This is the main way I create elements.
History:
On February 8, 2021, the `selfClosedElements` parameter was added. Previously, it used a private
immutable global list for HTML. It still defaults to the same list, but you can change it now via
the parameter.
+/
static Element make(string tagName, string childInfo = null, string childInfo2 = null, const string[] selfClosedElements = htmlSelfClosedElements) {
bool selfClosed = tagName.isInArray(selfClosedElements);
Element e;
// want to create the right kind of object for the given tag...
switch(tagName) {
case "#text":
e = new TextNode(null, childInfo);
return e;
// break;
case "table":
e = new Table(null);
break;
case "a":
e = new Link(null);
break;
case "form":
e = new Form(null);
break;
case "tr":
e = new TableRow(null);
break;
case "td", "th":
e = new TableCell(null, tagName);
break;
default:
e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere
}
// make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too
e.tagName = tagName;
e.selfClosed = selfClosed;
if(childInfo !is null)
switch(tagName) {
/* html5 convenience tags */
case "audio":
if(childInfo.length)
e.addChild("source", childInfo);
if(childInfo2 !is null)
e.appendText(childInfo2);
break;
case "source":
e.src = childInfo;
if(childInfo2 !is null)
e.type = childInfo2;
break;
/* regular html 4 stuff */
case "img":
e.src = childInfo;
if(childInfo2 !is null)
e.alt = childInfo2;
break;
case "link":
e.href = childInfo;
if(childInfo2 !is null)
e.rel = childInfo2;
break;
case "option":
e.innerText = childInfo;
if(childInfo2 !is null)
e.value = childInfo2;
break;
case "input":
e.type = "hidden";
e.name = childInfo;
if(childInfo2 !is null)
e.value = childInfo2;
break;
case "button":
e.innerText = childInfo;
if(childInfo2 !is null)
e.type = childInfo2;
break;
case "a":
e.innerText = childInfo;
if(childInfo2 !is null)
e.href = childInfo2;
break;
case "script":
case "style":
e.innerRawSource = childInfo;
break;
case "meta":
e.name = childInfo;
if(childInfo2 !is null)
e.content = childInfo2;
break;
/* generically, assume we were passed text and perhaps class */
default:
e.innerText = childInfo;
if(childInfo2.length)
e.className = childInfo2;
}
return e;
}
static Element make(string tagName, in Html innerHtml, string childInfo2 = null) {
// FIXME: childInfo2 is ignored when info1 is null
auto m = Element.make(tagName, "not null"[0..0], childInfo2);
m.innerHTML = innerHtml.source;
return m;
}
static Element make(string tagName, Element child, string childInfo2 = null) {
auto m = Element.make(tagName, cast(string) null, childInfo2);
m.appendChild(child);
return m;
}
/// Generally, you don't want to call this yourself - use Element.make or document.createElement instead.
this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) {
tagName = _tagName;
if(_attributes !is null)
attributes = _attributes;
selfClosed = _selfClosed;
version(dom_node_indexes)
this.dataset.nodeIndex = to!string(&(this.attributes));
assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid");
}
/++
Convenience constructor when you don't care about the parentDocument. Note this might break things on the document.
Note also that without a parent document, elements are always in strict, case-sensitive mode.
History:
On February 8, 2021, the `selfClosedElements` parameter was added. It defaults to the same behavior as
before: using the hard-coded list of HTML elements, but it can now be overridden. If you use
[Document.createElement], it will use the list set for the current document. Otherwise, you can pass
something here if you like.
+/
this(string _tagName, string[string] _attributes = null, const string[] selfClosedElements = htmlSelfClosedElements) {
tagName = _tagName;
if(_attributes !is null)
attributes = _attributes;
selfClosed = tagName.isInArray(selfClosedElements);
// this is meant to reserve some memory. It makes a small, but consistent improvement.
//children.length = 8;
//children.length = 0;
version(dom_node_indexes)
this.dataset.nodeIndex = to!string(&(this.attributes));
}
private this(Document _parentDocument) {
version(dom_node_indexes)
this.dataset.nodeIndex = to!string(&(this.attributes));
}
/* *******************************
Navigating the DOM
*********************************/
/// Returns the first child of this element. If it has no children, returns null.
/// Remember, text nodes are children too.
@property Element firstChild() {
return children.length ? children[0] : null;
}
///
@property Element lastChild() {
return children.length ? children[$ - 1] : null;
}
/// UNTESTED
/// the next element you would encounter if you were reading it in the source
Element nextInSource() {
auto n = firstChild;
if(n is null)
n = nextSibling();
if(n is null) {
auto p = this.parentNode;
while(p !is null && n is null) {
n = p.nextSibling;
}
}
return n;
}
/// UNTESTED
/// ditto
Element previousInSource() {
auto p = previousSibling;
if(p is null) {
auto par = parentNode;
if(par)
p = par.lastChild;
if(p is null)
p = par;
}
return p;
}
///.
@property Element previousElementSibling() {
return previousSibling("*");
}
///.
@property Element previousSibling(string tagName = null) {
if(this.parentNode is null)
return null;
Element ps = null;
foreach(e; this.parentNode.childNodes) {
if(e is this)
break;
if(tagName == "*" && e.nodeType != NodeType.Text) {
ps = e;
} else if(tagName is null || e.tagName == tagName)
ps = e;
}
return ps;
}
///.
@property Element nextElementSibling() {
return nextSibling("*");
}
///.
@property Element nextSibling(string tagName = null) {
if(this.parentNode is null)
return null;
Element ns = null;
bool mightBe = false;
foreach(e; this.parentNode.childNodes) {
if(e is this) {
mightBe = true;
continue;
}
if(mightBe) {
if(tagName == "*" && e.nodeType != NodeType.Text) {
ns = e;
break;
}
if(tagName is null || e.tagName == tagName) {
ns = e;
break;
}
}
}
return ns;
}
/// Gets the nearest node, going up the chain, with the given tagName
/// May return null or throw.
T getParent(T = Element)(string tagName = null) if(is(T : Element)) {
if(tagName is null) {
static if(is(T == Form))
tagName = "form";
else static if(is(T == Table))
tagName = "table";
else static if(is(T == Link))
tagName == "a";
}
auto par = this.parentNode;
while(par !is null) {
if(tagName is null || par.tagName == tagName)
break;
par = par.parentNode;
}
static if(!is(T == Element)) {
auto t = cast(T) par;
if(t is null)
throw new ElementNotFoundException("", tagName ~ " parent not found", this);
} else
auto t = par;
return t;
}
///.
Element getElementById(string id) {
// FIXME: I use this function a lot, and it's kinda slow
// not terribly slow, but not great.
foreach(e; tree)
if(e.id == id)
return e;
return null;
}
/++
Returns a child element that matches the given `selector`.
Note: you can give multiple selectors, separated by commas.
It will return the first match it finds.
+/
@scriptable
Element querySelector(string selector) {
Selector s = Selector(selector);
foreach(ele; tree)
if(s.matchesElement(ele))
return ele;
return null;
}
/// a more standards-compliant alias for getElementsBySelector
@scriptable
Element[] querySelectorAll(string selector) {
return getElementsBySelector(selector);
}
/// If the element matches the given selector. Previously known as `matchesSelector`.
@scriptable
bool matches(string selector) {
/+
bool caseSensitiveTags = true;
if(parentDocument && parentDocument.loose)
caseSensitiveTags = false;
+/
Selector s = Selector(selector);
return s.matchesElement(this);
}
/// Returns itself or the closest parent that matches the given selector, or null if none found
/// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest
@scriptable
Element closest(string selector) {
Element e = this;
while(e !is null) {
if(e.matches(selector))
return e;
e = e.parentNode;
}
return null;
}
/**
Returns elements that match the given CSS selector
* -- all, default if nothing else is there
tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector
It is all additive
OP
space = descendant
> = direct descendant
+ = sibling (E+F Matches any F element immediately preceded by a sibling element E)
[foo] Foo is present as an attribute
[foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning".
E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning"
E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en".
[item$=sdas] ends with
[item^-sdsad] begins with
Quotes are optional here.
Pseudos:
:first-child
:last-child
:link (same as a[href] for our purposes here)
There can be commas separating the selector. A comma separated list result is OR'd onto the main.
This ONLY cares about elements. text, etc, are ignored
There should be two functions: given element, does it match the selector? and given a selector, give me all the elements
*/
Element[] getElementsBySelector(string selector) {
// FIXME: this function could probably use some performance attention
// ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app.
bool caseSensitiveTags = true;
if(parentDocument && parentDocument.loose)
caseSensitiveTags = false;
Element[] ret;
foreach(sel; parseSelectorString(selector, caseSensitiveTags))
ret ~= sel.getElements(this);
return ret;
}
/// .
Element[] getElementsByClassName(string cn) {
// is this correct?
return getElementsBySelector("." ~ cn);
}
///.
Element[] getElementsByTagName(string tag) {
if(parentDocument && parentDocument.loose)
tag = tag.toLower();
Element[] ret;
foreach(e; tree)
if(e.tagName == tag)
ret ~= e;
return ret;
}
/* *******************************
Attributes
*********************************/
/**
Gets the given attribute value, or null if the
attribute is not set.
Note that the returned string is decoded, so it no longer contains any xml entities.
*/
@scriptable
string getAttribute(string name) const {
if(parentDocument && parentDocument.loose)
name = name.toLower();
auto e = name in attributes;
if(e)
return *e;
else
return null;
}
/**
Sets an attribute. Returns this for easy chaining
*/
@scriptable
Element setAttribute(string name, string value) {
if(parentDocument && parentDocument.loose)
name = name.toLower();
// I never use this shit legitimately and neither should you
auto it = name.toLower();
if(it == "href" || it == "src") {
auto v = value.strip().toLower();
if(v.startsWith("vbscript:"))
value = value[9..$];
if(v.startsWith("javascript:"))
value = value[11..$];
}
attributes[name] = value;
sendObserverEvent(DomMutationOperations.setAttribute, name, value);
return this;
}
/**
Returns if the attribute exists.
*/
@scriptable
bool hasAttribute(string name) {
if(parentDocument && parentDocument.loose)
name = name.toLower();
if(name in attributes)
return true;
else
return false;
}
/**
Removes the given attribute from the element.
*/
@scriptable
Element removeAttribute(string name)
out(ret) {
assert(ret is this);
}
do {
if(parentDocument && parentDocument.loose)
name = name.toLower();
if(name in attributes)
attributes.remove(name);
sendObserverEvent(DomMutationOperations.removeAttribute, name);
return this;
}
/**
Gets the class attribute's contents. Returns
an empty string if it has no class.
*/
@property string className() const {
auto c = getAttribute("class");
if(c is null)
return "";
return c;
}
///.
@property Element className(string c) {
setAttribute("class", c);
return this;
}
/**
Provides easy access to common HTML attributes, object style.
---
auto element = Element.make("a");
a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html");
string where = a.href; // same as a.getAttribute("href");
---
*/
@property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) {
if(v !is null)
setAttribute(name, v);
return getAttribute(name);
}
/**
Old access to attributes. Use [attrs] instead.
DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions.
so I want to remove it. A small whitelist of attributes is still allowed, but others are not.
Instead, use element.attrs.attribute, element.attrs["attribute"],
or element.getAttribute("attribute")/element.setAttribute("attribute").
*/
@property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) {
static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName");
}
/*
// this would be nice for convenience, but it broke the getter above.
@property void opDispatch(string name)(bool boolean) if(name != "popFront") {
if(boolean)
setAttribute(name, name);
else
removeAttribute(name);
}
*/
/**
Returns the element's children.
*/
@property const(Element[]) childNodes() const {
return children;
}
/// Mutable version of the same
@property Element[] childNodes() { // FIXME: the above should be inout
return children;
}
/++
HTML5's dataset property. It is an alternate view into attributes with the data- prefix.
Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");`
+/
@property DataSet dataset() {
return DataSet(this);
}
/++
Gives dot/opIndex access to attributes
---
ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo")
---
+/
@property AttributeSet attrs() {
return AttributeSet(this);
}
/++
Provides both string and object style (like in Javascript) access to the style attribute.
---
element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute
---
+/
@property ElementStyle style() {
return ElementStyle(this);
}
/++
This sets the style attribute with a string.
+/
@property ElementStyle style(string s) {
this.setAttribute("style", s);
return this.style;
}
private void parseAttributes(string[] whichOnes = null) {
/+
if(whichOnes is null)
whichOnes = attributes.keys;
foreach(attr; whichOnes) {
switch(attr) {
case "id":
break;
case "class":
break;
case "style":
break;
default:
// we don't care about it
}
}
+/
}
// if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there.
// the next few methods are for implementing interactive kind of things
private CssStyle _computedStyle;
/// Don't use this.
@property CssStyle computedStyle() {
if(_computedStyle is null) {
auto style = this.getAttribute("style");
/* we'll treat shitty old html attributes as css here */
if(this.hasAttribute("width"))
style ~= "; width: " ~ this.attrs.width;
if(this.hasAttribute("height"))
style ~= "; height: " ~ this.attrs.height;
if(this.hasAttribute("bgcolor"))
style ~= "; background-color: " ~ this.attrs.bgcolor;
if(this.tagName == "body" && this.hasAttribute("text"))
style ~= "; color: " ~ this.attrs.text;
if(this.hasAttribute("color"))
style ~= "; color: " ~ this.attrs.color;
/* done */
_computedStyle = new CssStyle(null, style); // gives at least something to work with
}
return _computedStyle;
}
/// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good
version(browser) {
void* expansionHook; ///ditto
int offsetWidth; ///ditto
int offsetHeight; ///ditto
int offsetLeft; ///ditto
int offsetTop; ///ditto
Element offsetParent; ///ditto
bool hasLayout; ///ditto
int zIndex; ///ditto
///ditto
int absoluteLeft() {
int a = offsetLeft;
auto p = offsetParent;
while(p) {
a += p.offsetLeft;
p = p.offsetParent;
}
return a;
}
///ditto
int absoluteTop() {
int a = offsetTop;
auto p = offsetParent;
while(p) {
a += p.offsetTop;
p = p.offsetParent;
}
return a;
}
}
// Back to the regular dom functions
public:
/* *******************************
DOM Mutation
*********************************/
/// Removes all inner content from the tag; all child text and elements are gone.
void removeAllChildren()
out {
assert(this.children.length == 0);
}
do {
foreach(child; children)
child.parentNode = null;
children = null;
}
/// History: added June 13, 2020
Element appendSibling(Element e) {
parentNode.insertAfter(this, e);
return e;
}
/// History: added June 13, 2020
Element prependSibling(Element e) {
parentNode.insertBefore(this, e);
return e;
}
/++
Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one.
See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild
History:
Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here.
+/
Element appendChild(Element e)
in {
assert(e !is null);
}
out (ret) {
assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null");
assert(e.parentDocument is this.parentDocument);
assert(e is ret);
}
do {
if(e.parentNode !is null)
e.parentNode.removeChild(e);
selfClosed = false;
if(auto frag = cast(DocumentFragment) e)
children ~= frag.children;
else
children ~= e;
e.parentNode = this;
/+
foreach(item; e.tree)
item.parentDocument = this.parentDocument;
+/
sendObserverEvent(DomMutationOperations.appendChild, null, null, e);
return e;
}
/// Inserts the second element to this node, right before the first param
Element insertBefore(in Element where, Element what)
in {
assert(where !is null);
assert(where.parentNode is this);
assert(what !is null);
assert(what.parentNode is null);
}
out (ret) {
assert(where.parentNode is this);
assert(what.parentNode is this);
assert(what.parentDocument is this.parentDocument);
assert(ret is what);
}
do {
foreach(i, e; children) {
if(e is where) {
if(auto frag = cast(DocumentFragment) what) {
children = children[0..i] ~ frag.children ~ children[i..$];
foreach(child; frag.children)
child.parentNode = this;
} else {
children = children[0..i] ~ what ~ children[i..$];
}
what.parentNode = this;
return what;
}
}
return what;
assert(0);
}
/++
Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node.
+/
Element insertAfter(in Element where, Element what)
in {
assert(where !is null);
assert(where.parentNode is this);
assert(what !is null);
assert(what.parentNode is null);
}
out (ret) {
assert(where.parentNode is this);
assert(what.parentNode is this);
assert(what.parentDocument is this.parentDocument);
assert(ret is what);
}
do {
foreach(i, e; children) {
if(e is where) {
if(auto frag = cast(DocumentFragment) what) {
children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $];
foreach(child; frag.children)
child.parentNode = this;
} else
children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $];
what.parentNode = this;
return what;
}
}
return what;
assert(0);
}
/// swaps one child for a new thing. Returns the old child which is now parentless.
Element swapNode(Element child, Element replacement)
in {
assert(child !is null);
assert(replacement !is null);
assert(child.parentNode is this);
}
out(ret) {
assert(ret is child);
assert(ret.parentNode is null);
assert(replacement.parentNode is this);
assert(replacement.parentDocument is this.parentDocument);
}
do {
foreach(ref c; this.children)
if(c is child) {
c.parentNode = null;
c = replacement;
c.parentNode = this;
return child;
}
assert(0);
}
/++
Appends the given to the node.
Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>`
yields `<example>text <b>bold</b> hi</example>`.
See_Also:
[firstInnerText], [directText], [innerText], [appendChild]
+/
@scriptable
Element appendText(string text) {
Element e = new TextNode(parentDocument, text);
appendChild(e);
return this;
}
/++
Returns child elements which are of a tag type (excludes text, comments, etc.).
childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag.
Params:
tagName = filter results to only the child elements with the given tag name.
+/
@property Element[] childElements(string tagName = null) {
Element[] ret;
foreach(c; children)
if(c.nodeType == 1 && (tagName is null || c.tagName == tagName))
ret ~= c;
return ret;
}
/++
Appends the given html to the element, returning the elements appended
This is similar to `element.innerHTML += "html string";` in Javascript.
+/
@scriptable
Element[] appendHtml(string html) {
Document d = new Document("<root>" ~ html ~ "</root>");
return stealChildren(d.root);
}
///.
void insertChildAfter(Element child, Element where)
in {
assert(child !is null);
assert(where !is null);
assert(where.parentNode is this);
assert(!selfClosed);
//assert(isInArray(where, children));
}
out {
assert(child.parentNode is this);
assert(where.parentNode is this);
//assert(isInArray(where, children));
//assert(isInArray(child, children));
}
do {
foreach(ref i, c; children) {
if(c is where) {
i++;
if(auto frag = cast(DocumentFragment) child) {
children = children[0..i] ~ child.children ~ children[i..$];
//foreach(child; frag.children)
//child.parentNode = this;
} else
children = children[0..i] ~ child ~ children[i..$];
child.parentNode = this;
break;
}
}
}
/++
Reparents all the child elements of `e` to `this`, leaving `e` childless.
Params:
e = the element whose children you want to steal
position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children.
+/
Element[] stealChildren(Element e, Element position = null)
in {
assert(!selfClosed);
assert(e !is null);
//if(position !is null)
//assert(isInArray(position, children));
}
out (ret) {
assert(e.children.length == 0);
// all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible...
version(none)
debug foreach(child; ret) {
assert(child.parentNode is this);
assert(child.parentDocument is this.parentDocument);
}
}
do {
foreach(c; e.children) {
c.parentNode = this;
}
if(position is null)
children ~= e.children;
else {
foreach(i, child; children) {
if(child is position) {
children = children[0..i] ~
e.children ~
children[i..$];
break;
}
}
}
auto ret = e.children[];
e.children.length = 0;
return ret;
}
/// Puts the current element first in our children list. The given element must not have a parent already.
Element prependChild(Element e)
in {
assert(e.parentNode is null);
assert(!selfClosed);
}
out {
assert(e.parentNode is this);
assert(e.parentDocument is this.parentDocument);
assert(children[0] is e);
}
do {
if(auto frag = cast(DocumentFragment) e) {
children = e.children ~ children;
foreach(child; frag.children)
child.parentNode = this;
} else
children = e ~ children;
e.parentNode = this;
return e;
}
/**
Returns a string containing all child elements, formatted such that it could be pasted into
an XML file.
*/
@property string innerHTML(Appender!string where = appender!string()) const {
if(children is null)
return "";
auto start = where.data.length;
foreach(child; children) {
assert(child !is null);
child.writeToAppender(where);
}
return where.data[start .. $];
}
/**
Takes some html and replaces the element's children with the tree made from the string.
*/
@property Element innerHTML(string html, bool strict = false) {
if(html.length)
selfClosed = false;
if(html.length == 0) {
// I often say innerHTML = ""; as a shortcut to clear it out,
// so let's optimize that slightly.
removeAllChildren();
return this;
}
auto doc = new Document();
doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document
children = doc.root.children;
foreach(c; children) {
c.parentNode = this;
}
doc.root.children = null;
return this;
}
/// ditto
@property Element innerHTML(Html html) {
return this.innerHTML = html.source;
}
/**
Replaces this node with the given html string, which is parsed
Note: this invalidates the this reference, since it is removed
from the tree.
Returns the new children that replace this.
*/
@property Element[] outerHTML(string html) {
auto doc = new Document();
doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness
children = doc.root.children;
foreach(c; children) {
c.parentNode = this;
}
stripOut();
return doc.root.children;
}
/++
Returns all the html for this element, including the tag itself.
This is equivalent to calling toString().
+/
@property string outerHTML() {
return this.toString();
}
/// This sets the inner content of the element *without* trying to parse it.
/// You can inject any code in there; this serves as an escape hatch from the dom.
///
/// The only times you might actually need it are for < style > and < script > tags in html.
/// Other than that, innerHTML and/or innerText should do the job.
@property void innerRawSource(string rawSource) {
children.length = 0;
auto rs = new RawSource(parentDocument, rawSource);
children ~= rs;
rs.parentNode = this;
}
///.
Element replaceChild(Element find, Element replace)
in {
assert(find !is null);
assert(find.parentNode is this);
assert(replace !is null);
assert(replace.parentNode is null);
}
out(ret) {
assert(ret is replace);
assert(replace.parentNode is this);
assert(replace.parentDocument is this.parentDocument);
assert(find.parentNode is null);
}
do {
// FIXME
//if(auto frag = cast(DocumentFragment) replace)
//return this.replaceChild(frag, replace.children);
for(int i = 0; i < children.length; i++) {
if(children[i] is find) {
replace.parentNode = this;
children[i].parentNode = null;
children[i] = replace;