diff --git a/lib/site/HTML/Element.pm b/lib/site/HTML/Element.pm
index 0da27b208..044c0a8ff 100644
--- a/lib/site/HTML/Element.pm
+++ b/lib/site/HTML/Element.pm
@@ -1,109 +1,148 @@
 package HTML::Element;
 
-# $Id$
+# ABSTRACT: Class for objects that represent HTML elements
 
-=head1 NAME
+use strict;
+use warnings;
 
-HTML::Element - Class for objects that represent HTML elements
+our $VERSION = '5.03'; # VERSION from OurPkgVersion
 
-=head1 SYNOPSIS
+use Carp           ();
+use HTML::Entities ();
+use HTML::Tagset   ();
+use integer;    # vroom vroom!
 
- require HTML::Element;
- $a = new HTML::Element 'a', href => 'http://www.oslonett.no/';
- $a->push_content("Oslonett AS");
+# This controls encoding entities on output.
+# When set entities won't be re-encoded.
+# Defaulting off because parser defaults to unencoding entities
+our $encoded_content = 0;
 
- $tag = $a->tag;
- $tag = $a->starttag;
- $tag = $a->endtag;
- $ref = $a->attr('href');
+use vars qw($html_uc $Debug $ID_COUNTER $VERSION %list_type_to_sub);
 
- $links = $a->extract_links();
+# Set up support for weak references, if possible:
+my $using_weaken;
 
- print $a->as_HTML;
+#=head1 CLASS METHODS
 
-=head1 DESCRIPTION
 
-Objects of the HTML::Element class can be used to represent elements
-of HTML.  These objects have attributes and content.  The content is an
-array of text segments and other HTML::Element objects.  Thus a
-tree of HTML::Element objects as nodes can represent the syntax tree
-for a HTML document.
+sub Use_Weak_Refs {
+    my $self_or_class = shift;
 
-The following methods are available:
+    if (@_) {    # set
+        $using_weaken = !! shift; # Normalize boolean value
+        Carp::croak("The installed Scalar::Util lacks support for weak references")
+              if $using_weaken and not defined &Scalar::Util::weaken;
 
-=over 4
+        no warnings 'redefine';
+        *_weaken = $using_weaken ? \&Scalar::Util::weaken : sub ($) {};
+    } # end if setting value
 
-=cut
+    return $using_weaken;
+} # end Use_Weak_Refs
 
+BEGIN {
+    # Attempt to import weaken from Scalar::Util, but don't complain
+    # if we can't.  Also, rename it to _weaken.
+    require Scalar::Util;
 
-use strict;
-use Carp ();
-use HTML::Entities ();
+    __PACKAGE__->Use_Weak_Refs(defined &Scalar::Util::weaken);
+}
 
-use vars qw($VERSION
-	    %emptyElement %optionalEndTag %linkElements %boolean_attr
-           );
+sub import {
+    my $class = shift;
 
-($VERSION) = q$Revision$ =~ /: (\d+)/;
-sub Version { $VERSION; }
+    for (@_) {
+        if (/^-(no_?)?weak$/) {
+            $class->Use_Weak_Refs(not $1);
+        } else {
+            Carp::croak("$_ is not exported by the $class module");
+        }
+    }
+} # end import
 
-# Elements that does not have corresponding end tags (i.e. are empty)
-%emptyElement   = map { $_ => 1 } qw(base link meta isindex
-			             img br hr wbr
-			             input area param
-			            );
-%optionalEndTag = map { $_ => 1 } qw(p li dt dd option); # th tr td);
-
-# Elements that might contain links and the name of the link attribute
-%linkElements =
-(
- body   => 'background',
- base   => 'href',
- a      => 'href',
- img    => [qw(src lowsrc usemap)],   # lowsrc is a Netscape invention
- form   => 'action',
- input  => 'src',
-'link'  => 'href',          # need quoting since link is a perl builtin
- frame  => 'src',
- applet => 'codebase',
- area   => 'href',
-);
 
-# These attributes are normally printed without showing the "='value'".
-# This representation works as long as no element has more than one
-# attribute like this.
-%boolean_attr = (
- area   => 'nohref',
- dir    => 'compact',
- dl     => 'compact',
- hr     => 'noshade',
- img    => 'ismap',
- input  => { checked => 1, readonly => 1, disabled => 1 },
- menu   => 'compact',
- ol     => 'compact',
- option => 'selected',
-'select'=> 'multiple',
- td     => 'nowrap',
- th     => 'nowrap',
- ul     => 'compact',
+$Debug = 0 unless defined $Debug;
+
+#=head1 SUBROUTINES
+
+
+sub Version {
+    Carp::carp("Deprecated subroutine HTML::Element::Version called");
+    $VERSION;
+}
+
+my $nillio = [];
+
+*HTML::Element::emptyElement   = \%HTML::Tagset::emptyElement;      # legacy
+*HTML::Element::optionalEndTag = \%HTML::Tagset::optionalEndTag;    # legacy
+*HTML::Element::linkElements   = \%HTML::Tagset::linkElements;      # legacy
+*HTML::Element::boolean_attr   = \%HTML::Tagset::boolean_attr;      # legacy
+*HTML::Element::canTighten     = \%HTML::Tagset::canTighten;        # legacy
+
+# Constants for signalling back to the traverser:
+my $travsignal_package = __PACKAGE__ . '::_travsignal';
+my ( $ABORT, $PRUNE, $PRUNE_SOFTLY, $OK, $PRUNE_UP )
+    = map { my $x = $_; bless \$x, $travsignal_package; }
+    qw(
+    ABORT  PRUNE   PRUNE_SOFTLY   OK   PRUNE_UP
 );
 
-=item $h = HTML::Element->new('tag', 'attrname' => 'value',...)
 
-The object constructor.  Takes a tag name as argument. Optionally,
-allows you to specify initial attributes at object creation time.
+## Comments from Father Chrysostomos RT #58880
+## The sole purpose for empty parentheses after a sub name is to make it
+## parse as a 0-ary (nihilary?) function. I.e., ABORT+1 should parse as
+## ABORT()+1, not ABORT(+1). The parentheses also tell perl that it can
+### be inlined.
+##Deparse is really useful for demonstrating this:
+##$ perl -MO=Deparse,-p -e 'sub ABORT {7} print ABORT+8'
+# Vs
+# perl -MO=Deparse,-p -e 'sub ABORT() {7} print ABORT+8'
+#
+# With the parentheses, it not only makes it parse as a term.
+# It even resolves the constant at compile-time, making the code run faster.
+
+## no critic
+sub ABORT ()        {$ABORT}
+sub PRUNE ()        {$PRUNE}
+sub PRUNE_SOFTLY () {$PRUNE_SOFTLY}
+sub OK ()           {$OK}
+sub PRUNE_UP ()     {$PRUNE_UP}
+## use critic
+
+$html_uc = 0;
+
+# set to 1 if you want tag and attribute names from starttag and endtag
+#  to be uc'd
+
+# regexs for XML names
+# http://www.w3.org/TR/2006/REC-xml11-20060816/NT-NameStartChar
+my $START_CHAR
+    = qr/(?:\:|[A-Z]|_|[a-z]|[\x{C0}-\x{D6}]|[\x{D8}-\x{F6}]|[\x{F8}-\x{2FF}]|[\x{370}-\x{37D}]|[\x{37F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}-\x{FFFD}]|[\x{10000}-\x{EFFFF}])/;
+
+# http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameChar
+my $NAME_CHAR
+    = qr/(?:$START_CHAR|-|\.|[0-9]|\x{B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}])/;
+
+# Elements that does not have corresponding end tags (i.e. are empty)
+
+#==========================================================================
+
+#=head1 BASIC METHODS
 
-=cut
 
 #
-# An HTML::Element is represented by blessed hash reference.  Key-names
-# not starting with '_' are reserved for the SGML attributes of the element.
+# An HTML::Element is represented by blessed hash reference, much like
+# Tree::DAG_Node objects.  Key-names not starting with '_' are reserved
+# for the SGML attributes of the element.
 # The following special keys are used:
 #
-#    '_tag':    The tag name
+#    '_tag':    The tag name (i.e., the generic identifier)
 #    '_parent': A reference to the HTML::Element above (when forming a tree)
 #    '_pos':    The current position (a reference to a HTML::Element) is
-#               where inserts will be placed (look at the insert_element method)
+#               where inserts will be placed (look at the insert_element
+#               method)  If not set, the implicit value is the object itself.
+#    '_content': A ref to an array of nodes under this.
+#                It might not be set.
 #
 # Example: <img src="gisle.jpg" alt="Gisle's photo"> is represented like this:
 #
@@ -111,495 +150,4335 @@ allows you to specify initial attributes at object creation time.
 #     _tag => 'img',
 #     src  => 'gisle.jpg',
 #     alt  => "Gisle's photo",
-#  }, HTML::Element;
+#  }, 'HTML::Element';
 #
 
-sub new
-{
+sub new {
     my $class = shift;
-    my $tag   = shift;
-    Carp::croak("No tag") unless defined $tag or length $tag;
-    my $self  = bless { _tag => lc $tag }, $class;
-    my($attr, $val);
-    while (($attr, $val) = splice(@_, 0, 2)) {
-	$val = $attr unless defined $val;
-	$self->{lc $attr} = $val;
+    $class = ref($class) || $class;
+
+    my $tag = shift;
+    Carp::croak("No tagname") unless defined $tag and length $tag;
+    Carp::croak "\"$tag\" isn't a good tag name!"
+        if $tag =~ m/[<>\/\x00-\x20]/;    # minimal sanity, certainly!
+    my $self = bless { _tag => scalar( $class->_fold_case($tag) ) }, $class;
+    my ( $attr, $val );
+    while ( ( $attr, $val ) = splice( @_, 0, 2 ) ) {
+## RT #42209 why does this default to the attribute name and not remain unset or the empty string?
+        $val = $attr unless defined $val;
+        $self->{ $class->_fold_case($attr) } = $val;
     }
-    if ($tag eq 'html') {
-	$self->{'_pos'} = undef;
+    if ( $tag eq 'html' ) {
+        $self->{'_pos'} = undef;
     }
-    $self;
+    _weaken($self->{'_parent'}) if $self->{'_parent'};
+    return $self;
 }
 
 
+sub attr {
+    my $self = shift;
+    my $attr = scalar( $self->_fold_case(shift) );
+    if (@_) {    # set
+        if ( defined $_[0] ) {
+            my $old = $self->{$attr};
+            $self->{$attr} = $_[0];
+            return $old;
+        }
+        else {    # delete, actually
+            return delete $self->{$attr};
+        }
+    }
+    else {        # get
+        return $self->{$attr};
+    }
+}
 
-=item $h->tag()
-
-Returns (optionally sets) the tag name for the element.  The tag is
-always converted to lower case.
-
-=cut
 
-sub tag
-{
+sub tag {
     my $self = shift;
-    if (@_) {
-	$self->{'_tag'} = lc $_[0];
-    } else {
-	$self->{'_tag'};
+    if (@_) {    # set
+        $self->{'_tag'} = $self->_fold_case( $_[0] );
+    }
+    else {       # get
+        $self->{'_tag'};
     }
 }
 
 
+sub parent {
+    my $self = shift;
+    if (@_) {    # set
+        Carp::croak "an element can't be made its own parent"
+            if defined $_[0] and ref $_[0] and $self eq $_[0];    # sanity
+        _weaken($self->{'_parent'} = $_[0]);
+    }
+    else {
+        $self->{'_parent'};                                       # get
+    }
+}
 
-=item $h->starttag()
 
-Returns the complete start tag for the element.  Including leading
-"<", trailing ">" and attributes.
+sub content_list {
+    return wantarray
+        ? @{ shift->{'_content'} || return () }
+        : scalar @{ shift->{'_content'} || return 0 };
+}
 
-=cut
 
-sub starttag
-{
-    my $self = shift;
-    my $name = $self->{'_tag'};
-    my $tag = "<\U$name";
-    for (sort keys %$self) {
-	next if /^_/;
-	my $val = $self->{$_};
-	if ($_ eq $val &&
-	    exists($boolean_attr{$name}) &&
-	    (ref($boolean_attr{$name}) ? $boolean_attr{$name}{$_} : 
- 					 $boolean_attr{$name} eq $_)) {
-	    $tag .= " \U$_";
-	} else {
-	    if ($val !~ /^\d+$/) {
-		# count number of " compared to number of '
-		if (($val =~ tr/\"/\"/) > ($val =~ tr/\'/\'/)) {
-		    # use single quotes around the attribute value
-		    HTML::Entities::encode_entities($val, "&'>");
-		    $val = qq('$val');
-		} else {
-		    HTML::Entities::encode_entities($val, '&">');
-		    $val = qq{"$val"};
-		}
-	    }
-	    $tag .= qq{ \U$_\E=$val};
-	}
-    }
-    "$tag>";
-}
-
-
-
-=item $h->endtag()
-
-Returns the complete end tag.  Includes leading "</" and the trailing
-">".
+# a read-only method!  can't say $h->content( [] )!
+sub content {
+    return shift->{'_content'};
+}
 
-=cut
 
-sub endtag
-{
-    "</\U$_[0]->{'_tag'}>";
+sub content_array_ref {
+    return shift->{'_content'} ||= [];
 }
 
 
+sub content_refs_list {
+    return \( @{ shift->{'_content'} || return () } );
+}
 
-=item $h->parent([$newparent])
 
-Returns (optionally sets) the parent for this element.
+sub implicit {
+    return shift->attr( '_implicit', @_ );
+}
 
-=cut
 
-sub parent
-{
+sub pos {
     my $self = shift;
-    if (@_) {
-	$self->{'_parent'} = $_[0];
-    } else {
-	$self->{'_parent'};
+    my $pos  = $self->{'_pos'};
+    if (@_) {    # set
+        my $parm = shift;
+        if ( defined $parm and $parm ne $self ) {
+            $self->{'_pos'} = $parm;    # means that element
+        }
+        else {
+            $self->{'_pos'} = undef;    # means $self
+        }
     }
+    return $pos if defined($pos);
+    return $self;
 }
 
 
+sub all_attr {
+    return %{ $_[0] };
 
-=item $h->implicit([$bool])
+    # Yes, trivial.  But no other way for the user to do the same
+    #  without breaking encapsulation.
+    # And if our object representation changes, this method's behavior
+    #  should stay the same.
+}
 
-Returns (optionally sets) the implicit attribute.  This attribute is
-used to indicate that the element was not originally present in the
-source, but was inserted in order to conform to HTML strucure.
+sub all_attr_names {
+    return keys %{ $_[0] };
+}
 
-=cut
 
-sub implicit
-{
-    shift->attr('_implicit', @_);
+sub all_external_attr {
+    my $self = $_[0];
+    return map( ( length($_) && substr( $_, 0, 1 ) eq '_' )
+        ? ()
+        : ( $_, $self->{$_} ),
+        keys %$self );
 }
 
+sub all_external_attr_names {
+    return grep !( length($_) && substr( $_, 0, 1 ) eq '_' ), keys %{ $_[0] };
+}
 
 
-=item $h->is_inside('tag',...)
+sub id {
+    if ( @_ == 1 ) {
+        return $_[0]{'id'};
+    }
+    elsif ( @_ == 2 ) {
+        if ( defined $_[1] ) {
+            return $_[0]{'id'} = $_[1];
+        }
+        else {
+            return delete $_[0]{'id'};
+        }
+    }
+    else {
+        Carp::croak '$node->id can\'t take ' . scalar(@_) . ' parameters!';
+    }
+}
 
-Returns true if this tag is contained inside one of the specified tags.
 
-=cut
+sub _gensym {
+    unless ( defined $ID_COUNTER ) {
 
-sub is_inside
-{
-    my $self = shift;
-    my $p = $self;
-    while (defined $p) {
-	my $ptag = $p->{'_tag'};
-	for (@_) {
-	    return 1 if $ptag eq $_;
-	}
-	$p = $p->{'_parent'};
+        # start it out...
+        $ID_COUNTER = sprintf( '%04x', rand(0x1000) );
+        $ID_COUNTER =~ tr<0-9a-f><J-NP-Z>;    # yes, skip letter "oh"
+        $ID_COUNTER .= '00000';
     }
-    0;
+    ++$ID_COUNTER;
 }
 
+sub idf {
+    my $nparms = scalar @_;
 
+    if ( $nparms == 1 ) {
+        my $x;
+        if ( defined( $x = $_[0]{'id'} ) and length $x ) {
+            return $x;
+        }
+        else {
+            return $_[0]{'id'} = _gensym();
+        }
+    }
+    if ( $nparms == 2 ) {
+        if ( defined $_[1] ) {
+            return $_[0]{'id'} = $_[1];
+        }
+        else {
+            return delete $_[0]{'id'};
+        }
+    }
+    Carp::croak '$node->idf can\'t take ' . scalar(@_) . ' parameters!';
+}
 
-=item $h->pos()
 
-Returns (and optionally sets) the current position.  The position is a
-reference to a HTML::Element object that is part of the tree that has
-the current object as root.  This restriction is not enforced when
-setting pos(), but unpredictable things will happen if this is not
-true.
+sub push_content {
+    my $self = shift;
+    return $self unless @_;
 
+    my $content = ( $self->{'_content'} ||= [] );
+    for (@_) {
+        if ( ref($_) eq 'ARRAY' ) {
 
-=cut
+            # magically call new_from_lol
+            push @$content, $self->new_from_lol($_);
+            _weaken($content->[-1]->{'_parent'} = $self);
+        }
+        elsif ( ref($_) ) {    # insert an element
+            $_->detach if $_->{'_parent'};
+            _weaken($_->{'_parent'} = $self);
+            push( @$content, $_ );
+        }
+        else {                 # insert text segment
+            if ( @$content && !ref $content->[-1] ) {
 
-sub pos
-{
+                # last content element is also text segment -- append
+                $content->[-1] .= $_;
+            }
+            else {
+                push( @$content, $_ );
+            }
+        }
+    }
+    return $self;
+}
+
+
+sub unshift_content {
     my $self = shift;
-    my $pos = $self->{'_pos'};
-    if (@_) {
-	$self->{'_pos'} = $_[0];
+    return $self unless @_;
+
+    my $content = ( $self->{'_content'} ||= [] );
+    for ( reverse @_ ) {    # so they get added in the order specified
+        if ( ref($_) eq 'ARRAY' ) {
+
+            # magically call new_from_lol
+            unshift @$content, $self->new_from_lol($_);
+            _weaken($content->[0]->{'_parent'} = $self);
+        }
+        elsif ( ref $_ ) {    # insert an element
+            $_->detach if $_->{'_parent'};
+            _weaken($_->{'_parent'} = $self);
+            unshift( @$content, $_ );
+        }
+        else {                # insert text segment
+            if ( @$content && !ref $content->[0] ) {
+
+                # last content element is also text segment -- prepend
+                $content->[0] = $_ . $content->[0];
+            }
+            else {
+                unshift( @$content, $_ );
+            }
+        }
     }
-    return $pos if defined($pos);
-    $self;
+    return $self;
 }
 
+# Cf.  splice ARRAY,OFFSET,LENGTH,LIST
 
 
-=item $h->attr('attr', [$value])
+sub splice_content {
+    my ( $self, $offset, $length, @to_add ) = @_;
+    Carp::croak "splice_content requires at least one argument"
+        if @_ < 2;    # at least $h->splice_content($offset);
 
-Returns (and optionally sets) the value of some attribute.
+    my $content = ( $self->{'_content'} ||= [] );
 
-=cut
+    # prep the list
 
-sub attr
-{
-    my $self = shift;
-    my $attr = lc shift;
-    my $old = $self->{$attr};
-    if (@_) {
-	$self->{$attr} = $_[0];
+    my @out;
+    if ( @_ > 2 ) {    # self, offset, length, ...
+        foreach my $n (@to_add) {
+            if ( ref($n) eq 'ARRAY' ) {
+                $n = $self->new_from_lol($n);
+                _weaken($n->{'_parent'} = $self);
+            }
+            elsif ( ref($n) ) {
+                $n->detach;
+                _weaken($n->{'_parent'} = $self);
+            }
+        }
+        @out = splice @$content, $offset, $length, @to_add;
+    }
+    else {    #  self, offset
+        @out = splice @$content, $offset;
     }
-    $old;
+    foreach my $n (@out) {
+        $n->{'_parent'} = undef if ref $n;
+    }
+    return @out;
 }
 
 
+sub detach {
+    my $self = $_[0];
+    return undef unless ( my $parent = $self->{'_parent'} );
+    $self->{'_parent'} = undef;
+    my $cohort = $parent->{'_content'} || return $parent;
+    @$cohort = grep { not( ref($_) and $_ eq $self ) } @$cohort;
 
-=item $h->content()
+    # filter $self out, if parent has any evident content
 
-Returns the content of this element.  The content is represented as a
-reference to an array of text segments and references to other
-HTML::Element objects.
+    return $parent;
+}
 
-=cut
 
-sub content
-{
-    shift->{'_content'};
+sub detach_content {
+    my $c = $_[0]->{'_content'} || return ();    # in case of no content
+    for (@$c) {
+        $_->{'_parent'} = undef if ref $_;
+    }
+    return splice @$c;
 }
 
 
+sub replace_with {
+    my ( $self, @replacers ) = @_;
+    Carp::croak "the target node has no parent"
+        unless my ($parent) = $self->{'_parent'};
 
-=item $h->is_empty()
+    my $parent_content = $parent->{'_content'};
+    Carp::croak "the target node's parent has no content!?"
+        unless $parent_content and @$parent_content;
 
-Returns true if there is no content.
+    my $replacers_contains_self;
+    for (@replacers) {
+        if ( !ref $_ ) {
 
-=cut
+            # noop
+        }
+        elsif ( $_ eq $self ) {
 
-sub is_empty
-{
+            # noop, but check that it's there just once.
+            Carp::croak "Replacement list contains several copies of target!"
+                if $replacers_contains_self++;
+        }
+        elsif ( $_ eq $parent ) {
+            Carp::croak "Can't replace an item with its parent!";
+        }
+        elsif ( ref($_) eq 'ARRAY' ) {
+            $_ = $self->new_from_lol($_);
+            _weaken($_->{'_parent'} = $parent);
+        }
+        else {
+            $_->detach;
+            _weaken($_->{'_parent'} = $parent);
+
+            # each of these are necessary
+        }
+    }    # for @replacers
+    @$parent_content = map { ( ref($_) and $_ eq $self ) ? @replacers : $_ }
+        @$parent_content;
+
+    $self->{'_parent'} = undef unless $replacers_contains_self;
+
+    # if replacers does contain self, then the parent attribute is fine as-is
+
+    return $self;
+}
+
+
+sub preinsert {
     my $self = shift;
-    !exists($self->{'_content'}) || !@{$self->{'_content'}};
+    return $self unless @_;
+    return $self->replace_with( @_, $self );
 }
 
 
+sub postinsert {
+    my $self = shift;
+    return $self unless @_;
+    return $self->replace_with( $self, @_ );
+}
 
-=item $h->insert_element($element, $implicit)
 
-Inserts a new element at current position and updates pos() to point
-to the inserted element.  Returns $element.
+sub replace_with_content {
+    my $self = $_[0];
+    Carp::croak "the target node has no parent"
+        unless my ($parent) = $self->{'_parent'};
 
-=cut
+    my $parent_content = $parent->{'_content'};
+    Carp::croak "the target node's parent has no content!?"
+        unless $parent_content and @$parent_content;
 
-sub insert_element
-{
-    my($self, $tag, $implicit) = @_;
-    my $e;
-    if (ref $tag) {
-	$e = $tag;
-	$tag = $e->tag;
-    } else {
-	$e = new HTML::Element $tag;
-    }
-    $e->{'_implicit'} = 1 if $implicit;
-    my $pos = $self->{'_pos'};
-    $pos = $self unless defined $pos;
-    $pos->push_content($e);
-    unless ($emptyElement{$tag}) {
-	$self->{'_pos'} = $e;
-	$pos = $e;
-    }
-    $pos;
-}
+    my $content_r = $self->{'_content'} || [];
+    @$parent_content = map { ( ref($_) and $_ eq $self ) ? @$content_r : $_ }
+        @$parent_content;
 
+    $self->{'_parent'} = undef;    # detach $self from its parent
 
-=item $h->push_content($element_or_text,...)
+    # Update parentage link, removing from $self's content list
+    for ( splice @$content_r ) { _weaken($_->{'_parent'} = $parent) if ref $_ }
 
-Adds to the content of the element.  The content should be a text
-segment (scalar) or a reference to a HTML::Element object.
+    return $self;                  # note: doesn't destroy it.
+}
 
-=cut
 
-sub push_content
-{
-    my $self = shift;
-    $self->{'_content'} = [] unless exists $self->{'_content'};
-    my $content = $self->{'_content'};
-    for (@_) {
-	if (ref $_) {
-	    $_->{'_parent'} = $self;
-	    push(@$content, $_);
-	} else {
-	    # The current element is a text segment
-	    if (@$content && !ref $content->[-1]) {
-		# last content element is also text segment
-		$content->[-1] .= $_;
-	    } else {
-		push(@$content, $_);
-	    }
-	}
+sub delete_content {
+    for (
+        splice @{
+            delete( $_[0]->{'_content'} )
+
+                # Deleting it here (while holding its value, for the moment)
+                #  will keep calls to detach() from trying to uselessly filter
+                #  the list (as they won't be able to see it once it's been
+                #  deleted)
+                || return ( $_[0] )    # in case of no content
+        },
+        0
+
+        # the splice is so we can null the array too, just in case
+        # something somewhere holds a ref to it
+        )
+    {
+        $_->delete if ref $_;
     }
-    $self;
+    $_[0];
 }
 
 
+# two handy aliases
+sub destroy         { shift->delete(@_) }
+sub destroy_content { shift->delete_content(@_) }
 
-=item $h->delete_content()
+sub delete {
+    my $self = $_[0];
+    $self->delete_content    # recurse down
+        if $self->{'_content'} && @{ $self->{'_content'} };
 
-Clears the content.
+    $self->detach if $self->{'_parent'} and $self->{'_parent'}{'_content'};
 
-=cut
+    # not the typical case
 
-sub delete_content
-{
-    my $self = shift;
-    for (@{$self->{'_content'}}) {
-	$_->delete if ref $_;
-    }
-    delete $self->{'_content'};
-    $self;
+    %$self = ();             # null out the whole object on the way out
+    return;
 }
 
 
+sub clone {
 
-=item $h->delete()
+    #print "Cloning $_[0]\n";
+    my $it = shift;
+    Carp::croak "clone() can be called only as an object method"
+        unless ref $it;
+    Carp::croak "clone() takes no arguments" if @_;
 
-Frees memory associated with the element and all children.  This is
-needed because perl's reference counting does not work since we use
-circular references.
+    my $new = bless {%$it}, ref($it);    # COPY!!! HOOBOY!
+    delete @$new{ '_content', '_parent', '_pos', '_head', '_body' };
 
-=cut
-#'
+    # clone any contents
+    if ( $it->{'_content'} and @{ $it->{'_content'} } ) {
+        $new->{'_content'}
+            = [ ref($it)->clone_list( @{ $it->{'_content'} } ) ];
+        for ( @{ $new->{'_content'} } ) {
+            _weaken($_->{'_parent'} = $new) if ref $_;
+        }
+    }
 
-sub delete
-{
-    $_[0]->delete_content;
-    delete $_[0]->{'_parent'};
-    delete $_[0]->{'_pos'};
-    $_[0] = undef;
+    return $new;
 }
 
 
+sub clone_list {
+    Carp::croak "clone_list can be called only as a class method"
+        if ref shift @_;
 
-=item $h->traverse(\&callback, [$ignoretext])
+    # all that does is get me here
+    return map {
+        ref($_)
+            ? $_->clone    # copy by method
+            : $_           # copy by evaluation
+    } @_;
+}
 
-Traverse the element and all of its children.  For each node visited, the
-callback routine is called with the node, a startflag and the depth as
-arguments.  If the $ignoretext parameter is true, then the callback
-will not be called for text content.  The flag is 1 when we enter a
-node and 0 when we leave the node.
 
-If the returned value from the callback is false then we will not
-traverse the children.
+sub normalize_content {
+    my $start = $_[0];
+    my $c;
+    return
+        unless $c = $start->{'_content'} and ref $c and @$c;   # nothing to do
+        # TODO: if we start having text elements, deal with catenating those too?
+    my @stretches = (undef);    # start with a barrier
 
-=cut
+    # I suppose this could be rewritten to treat stretches as it goes, instead
+    #  of at the end.  But feh.
 
-sub traverse
-{
-    my($self, $callback, $ignoretext, $depth) = @_;
-    $depth ||= 0;
+    # Scan:
+    for ( my $i = 0; $i < @$c; ++$i ) {
+        if ( defined $c->[$i] and ref $c->[$i] ) {    # not a text segment
+            if ( $stretches[0] ) {
+
+                # put in a barrier
+                if ( $stretches[0][1] == 1 ) {
+
+                    #print "Nixing stretch at ", $i-1, "\n";
+                    undef $stretches[0]; # nix the previous one-node "stretch"
+                }
+                else {
+
+                    #print "End of stretch at ", $i-1, "\n";
+                    unshift @stretches, undef;
+                }
+            }
+
+            # else no need for a barrier
+        }
+        else {                           # text segment
+            $c->[$i] = '' unless defined $c->[$i];
+            if ( $stretches[0] ) {
+                ++$stretches[0][1];      # increase length
+            }
+            else {
 
-    if (&$callback($self, 1, $depth)) {
-	for (@{$self->{'_content'}}) {
-	    if (ref $_) {
-		$_->traverse($callback, $ignoretext, $depth+1);
-	    } else {
-		&$callback($_, 1, $depth+1) unless $ignoretext;
-	    }
-	}
-	&$callback($self, 0, $depth) unless $emptyElement{$self->{'_tag'}};
+                #print "New stretch at $i\n";
+                unshift @stretches, [ $i, 1 ];    # start and length
+            }
+        }
     }
-    $self;
-}
 
+    # Now combine.  Note that @stretches is in reverse order, so the indexes
+    # still make sense as we work our way thru (i.e., backwards thru $c).
+    foreach my $s (@stretches) {
+        if ( $s and $s->[1] > 1 ) {
 
+            #print "Stretch at ", $s->[0], " for ", $s->[1], "\n";
+            $c->[ $s->[0] ]
+                .= join( '', splice( @$c, $s->[0] + 1, $s->[1] - 1 ) )
 
-=item $h->extract_links([@wantedTypes])
+                # append the subsequent ones onto the first one.
+        }
+    }
+    return;
+}
 
-Returns links found by traversing the element and all of its children.
-The return value is a reference to an array.  Each element of the
-array is an array with 2 values; the link value and a reference to the
-corresponding element.
 
-You might specify that you just want to extract some types of links.
-For instance if you only want to extract <a href="..."> and <img
-src="..."> links you might code it like this:
+sub delete_ignorable_whitespace {
+
+    # This doesn't delete all sorts of whitespace that won't actually
+    #  be used in rendering, tho -- that's up to the rendering application.
+    # For example:
+    #   <input type='text' name='foo'>
+    #     [some whitespace]
+    #   <input type='text' name='bar'>
+    # The WS between the two elements /will/ get used by the renderer.
+    # But here:
+    #   <input type='hidden' name='foo' value='1'>
+    #     [some whitespace]
+    #   <input type='text' name='bar' value='2'>
+    # the WS between them won't be rendered in any way, presumably.
+
+    #my $Debug = 4;
+    die "delete_ignorable_whitespace can be called only as an object method"
+        unless ref $_[0];
+
+    print "About to tighten up...\n" if $Debug > 2;
+    my (@to_do) = ( $_[0] );    # Start off.
+    my ( $i, $sibs, $ptag, $this );    # scratch for the loop...
+    while (@to_do) {
+        if (   ( $ptag = ( $this = shift @to_do )->{'_tag'} ) eq 'pre'
+            or $ptag eq 'textarea'
+            or $HTML::Tagset::isCDATA_Parent{$ptag} )
+        {
+
+            # block the traversal under those
+            print "Blocking traversal under $ptag\n" if $Debug;
+            next;
+        }
+        next unless ( $sibs = $this->{'_content'} and @$sibs );
+        for ( $i = $#$sibs; $i >= 0; --$i ) {   # work backwards thru the list
+            if ( ref $sibs->[$i] ) {
+                unshift @to_do, $sibs->[$i];
+
+                # yes, this happens in pre order -- we're going backwards
+                # thru this sibling list.  I doubt it actually matters, tho.
+                next;
+            }
+            next if $sibs->[$i] =~ m<[^\n\r\f\t ]>s;   # it's /all/ whitespace
+
+            print "Under $ptag whose canTighten ",
+                "value is ", 0 + $HTML::Element::canTighten{$ptag}, ".\n"
+                if $Debug > 3;
+
+            # It's all whitespace...
+
+            if ( $i == 0 ) {
+                if ( @$sibs == 1 ) {                   # I'm an only child
+                    next unless $HTML::Element::canTighten{$ptag};    # parent
+                }
+                else {    # I'm leftmost of many
+                          # if either my parent or sib are eligible, I'm good.
+                    next
+                        unless $HTML::Element::canTighten{$ptag}    # parent
+                            or (ref $sibs->[1]
+                                and $HTML::Element::canTighten{ $sibs->[1]
+                                        {'_tag'} }    # right sib
+                            );
+                }
+            }
+            elsif ( $i == $#$sibs ) {                 # I'm rightmost of many
+                    # if either my parent or sib are eligible, I'm good.
+                next
+                    unless $HTML::Element::canTighten{$ptag}    # parent
+                        or (ref $sibs->[ $i - 1 ]
+                            and $HTML::Element::canTighten{ $sibs->[ $i - 1 ]
+                                    {'_tag'} }                  # left sib
+                        );
+            }
+            else {    # I'm the piggy in the middle
+                      # My parent doesn't matter -- it all depends on my sibs
+                next
+                    unless ref $sibs->[ $i - 1 ]
+                        or ref $sibs->[ $i + 1 ];
+
+                # if NEITHER sib is a node, quit
+
+                next if
+
+                    # bailout condition: if BOTH are INeligible nodes
+                    #  (as opposed to being text, or being eligible nodes)
+                    ref $sibs->[ $i - 1 ]
+                        and ref $sibs->[ $i + 1 ]
+                        and !$HTML::Element::canTighten{ $sibs->[ $i - 1 ]
+                                {'_tag'} }    # left sib
+                        and !$HTML::Element::canTighten{ $sibs->[ $i + 1 ]
+                                {'_tag'} }    # right sib
+                ;
+            }
 
-  for (@{ $e->extract_links(qw(a img)) }) {
-      ($link, $linkelem) = @$_;
-      ...
-  }
+       # Unknown tags aren't in canTighten and so AREN'T subject to tightening
 
-=cut
+            print "  delendum: child $i of $ptag\n" if $Debug > 3;
+            splice @$sibs, $i, 1;
+        }
 
-sub extract_links
-{
-    my $self = shift;
-    my %wantType; @wantType{map { lc $_ } @_} = (1) x @_;
-    my $wantType = scalar(@_);
-    my @links;
-    $self->traverse(
-	sub {
-	    my($self, $start, $depth) = @_;
-	    return 1 unless $start;
-	    my $tag = $self->{'_tag'};
-	    return 1 if $wantType && !$wantType{$tag};
-	    my $attr = $linkElements{$tag};
-	    return 1 unless defined $attr;
-	    $attr = [$attr] unless ref $attr;
-            for (@$attr) {
-	       my $val = $self->attr($_);
-	       push(@links, [$val, $self]) if defined $val;
-            }
-	    1;
-	}, 'ignoretext');
-    \@links;
+        # end of the loop-over-children
+    }
+
+    # end of the while loop.
+
+    return;
 }
 
 
+sub insert_element {
+    my ( $self, $tag, $implicit ) = @_;
+    return $self->pos() unless $tag;    # noop if nothing to insert
 
-=item $h->dump()
+    my $e;
+    if ( ref $tag ) {
+        $e   = $tag;
+        $tag = $e->tag;
+    }
+    else {    # just a tag name -- so make the element
+        $e = $self->element_class->new($tag);
+        ++( $self->{'_element_count'} ) if exists $self->{'_element_count'};
 
-Prints the element and all its children to STDOUT.  Mainly useful for
-debugging.  The structure of the document is shown by indentation (no
-end tags).
+        # undocumented.  see TreeBuilder.
+    }
 
-=cut
+    $e->{'_implicit'} = 1 if $implicit;
 
-sub dump
-{
-    my $self = shift;
-    my $depth = shift || 0;
-    print STDERR "  " x $depth;
-    print STDERR $self->starttag, "\n";
-    for (@{$self->{'_content'}}) {
-	if (ref $_) {
-	    $_->dump($depth+1);
-	} else {
-	    print STDERR "  " x ($depth + 1);
-	    print STDERR qq{"$_"\n};
-	}
+    my $pos = $self->{'_pos'};
+    $pos = $self unless defined $pos;
+
+    $pos->push_content($e);
+
+    $self->{'_pos'} = $pos = $e
+        unless $self->_empty_element_map->{$tag} || $e->{'_empty_element'};
+
+    $pos;
+}
+
+#==========================================================================
+# Some things to override in XML::Element
+
+sub _empty_element_map {
+    \%HTML::Element::emptyElement;
+}
+
+sub _fold_case_LC {
+    if (wantarray) {
+        shift;
+        map lc($_), @_;
+    }
+    else {
+        return lc( $_[1] );
     }
 }
 
+sub _fold_case_NOT {
+    if (wantarray) {
+        shift;
+        @_;
+    }
+    else {
+        return $_[1];
+    }
+}
 
+*_fold_case = \&_fold_case_LC;
 
-=item $h->as_HTML()
+#==========================================================================
 
-Returns a string (the HTML document) that represents the element and
-its children.
+#=head1 DUMPING METHODS
 
-=cut
 
-sub as_HTML
-{
-    my $self = shift;
-    my @html = ();
-    $self->traverse(
-        sub {
-	    my($node, $start, $depth) = @_;
-	    if (ref $node) {
-		my $tag = $node->tag;
-		if ($start) {
-		    push(@html, $node->starttag);
-		} elsif (not ($emptyElement{$tag} or $optionalEndTag{$tag})) {
-		    push(@html, $node->endtag);
-		}
-	    } else {
-		# simple text content
-		HTML::Entities::encode_entities($node, "<>&");
-		push(@html, $node);
-	    }
+sub dump {
+    my ( $self, $fh, $depth ) = @_;
+    $fh    = *STDOUT{IO} unless defined $fh;
+    $depth = 0           unless defined $depth;
+    print $fh "  " x $depth, $self->starttag, " \@", $self->address,
+        $self->{'_implicit'} ? " (IMPLICIT)\n" : "\n";
+    for ( @{ $self->{'_content'} } ) {
+        if ( ref $_ ) {    # element
+            $_->dump( $fh, $depth + 1 );    # recurse
         }
-    );
-    join('', @html, "\n");
+        else {                              # text node
+            print $fh "  " x ( $depth + 1 );
+            if ( length($_) > 65 or m<[\x00-\x1F]> ) {
+
+                # it needs prettyin' up somehow or other
+                my $x
+                    = ( length($_) <= 65 )
+                    ? $_
+                    : ( substr( $_, 0, 65 ) . '...' );
+                $x =~ s<([\x00-\x1F])>
+                     <'\\x'.(unpack("H2",$1))>eg;
+                print $fh qq{"$x"\n};
+            }
+            else {
+                print $fh qq{"$_"\n};
+            }
+        }
+    }
 }
 
-sub format
-{
-    my($self, $formatter) = @_;
-    unless (defined $formatter) {
-	require HTML::FormatText;
-	$formatter = new HTML::FormatText;
+
+sub as_HTML {
+    my ( $self, $entities, $indent, $omissible_map ) = @_;
+
+    #my $indent_on = defined($indent) && length($indent);
+    my @html = ();
+
+    $omissible_map ||= \%HTML::Element::optionalEndTag;
+    my $empty_element_map = $self->_empty_element_map;
+
+    my $last_tag_tightenable    = 0;
+    my $this_tag_tightenable    = 0;
+    my $nonindentable_ancestors = 0;    # count of nonindentible tags over us.
+
+    my ( $tag, $node, $start, $depth ); # per-iteration scratch
+
+    if ( defined($indent) && length($indent) ) {
+        $self->traverse(
+            sub {
+                ( $node, $start, $depth ) = @_;
+                if ( ref $node ) {      # it's an element
+
+                    # detect bogus classes. RT #35948, #61673
+                    $node->can('starttag')
+                        or Carp::confess( "Object of class "
+                            . ref($node)
+                            . " cannot be processed by HTML::Element" );
+
+                    $tag = $node->{'_tag'};
+
+                    if ($start) {       # on the way in
+                        if ((   $this_tag_tightenable
+                                = $HTML::Element::canTighten{$tag}
+                            )
+                            and !$nonindentable_ancestors
+                            and $last_tag_tightenable
+                            )
+                        {
+                            push
+                                @html,
+                                "\n",
+                                $indent x $depth,
+                                $node->starttag($entities),
+                                ;
+                        }
+                        else {
+                            push( @html, $node->starttag($entities) );
+                        }
+                        $last_tag_tightenable = $this_tag_tightenable;
+
+                        ++$nonindentable_ancestors
+                            if $tag eq 'pre' or $tag eq 'textarea'
+                                or $HTML::Tagset::isCDATA_Parent{$tag};
+
+                    }
+                    elsif (
+                        not(   $empty_element_map->{$tag}
+                            or $omissible_map->{$tag} )
+                        )
+                    {
+
+                        # on the way out
+                        if (   $tag eq 'pre' or $tag eq 'textarea'
+                            or $HTML::Tagset::isCDATA_Parent{$tag} )
+                        {
+                            --$nonindentable_ancestors;
+                            $last_tag_tightenable
+                                = $HTML::Element::canTighten{$tag};
+                            push @html, $node->endtag;
+
+                        }
+                        else {    # general case
+                            if ((   $this_tag_tightenable
+                                    = $HTML::Element::canTighten{$tag}
+                                )
+                                and !$nonindentable_ancestors
+                                and $last_tag_tightenable
+                                )
+                            {
+                                push
+                                    @html,
+                                    "\n",
+                                    $indent x $depth,
+                                    $node->endtag,
+                                    ;
+                            }
+                            else {
+                                push @html, $node->endtag;
+                            }
+                            $last_tag_tightenable = $this_tag_tightenable;
+
+                           #print "$tag tightenable: $this_tag_tightenable\n";
+                        }
+                    }
+                }
+                else {    # it's a text segment
+
+                    $last_tag_tightenable = 0;    # I guess this is right
+                    HTML::Entities::encode_entities( $node, $entities )
+
+                        # That does magic things if $entities is undef.
+                        unless (
+                        ( defined($entities) && !length($entities) )
+
+                        # If there's no entity to encode, don't call it
+                        || $HTML::Tagset::isCDATA_Parent{ $_[3]{'_tag'} }
+
+                        # To keep from amp-escaping children of script et al.
+                        # That doesn't deal with descendants; but then, CDATA
+                        #  parents shouldn't /have/ descendants other than a
+                        #  text children (or comments?)
+                        || $encoded_content
+                        );
+                    if ($nonindentable_ancestors) {
+                        push @html, $node;    # say no go
+                    }
+                    else {
+                        if ($last_tag_tightenable) {
+                            $node =~ s<[\n\r\f\t ]+>< >s;
+
+                            #$node =~ s< $><>s;
+                            $node =~ s<^ ><>s;
+                            push
+                                @html,
+                                "\n",
+                                $indent x $depth,
+                                $node,
+
+           #Text::Wrap::wrap($indent x $depth, $indent x $depth, "\n" . $node)
+                                ;
+                        }
+                        else {
+                            push
+                                @html,
+                                $node,
+
+                                #Text::Wrap::wrap('', $indent x $depth, $node)
+                                ;
+                        }
+                    }
+                }
+                1;    # keep traversing
+            }
+        );            # End of parms to traverse()
     }
-    $formatter->format($self);
+    else {            # no indenting -- much simpler code
+        $self->traverse(
+            sub {
+                ( $node, $start ) = @_;
+                if ( ref $node ) {
+
+                    # detect bogus classes. RT #35948
+                    $node->isa( $self->element_class )
+                        or Carp::confess( "Object of class "
+                            . ref($node)
+                            . " cannot be processed by HTML::Element" );
+
+                    $tag = $node->{'_tag'};
+                    if ($start) {    # on the way in
+                        push( @html, $node->starttag($entities) );
+                    }
+                    elsif (
+                        not(   $empty_element_map->{$tag}
+                            or $omissible_map->{$tag} )
+                        )
+                    {
+
+                        # on the way out
+                        push( @html, $node->endtag );
+                    }
+                }
+                else {
+
+                    # simple text content
+                    HTML::Entities::encode_entities( $node, $entities )
+
+                        # That does magic things if $entities is undef.
+                        unless (
+                        ( defined($entities) && !length($entities) )
+
+                        # If there's no entity to encode, don't call it
+                        || $HTML::Tagset::isCDATA_Parent{ $_[3]{'_tag'} }
+
+                        # To keep from amp-escaping children of script et al.
+                        # That doesn't deal with descendants; but then, CDATA
+                        #  parents shouldn't /have/ descendants other than a
+                        #  text children (or comments?)
+                        || $encoded_content
+                        );
+                    push( @html, $node );
+                }
+                1;    # keep traversing
+            }
+        );            # End of parms to traverse()
+    }
+
+    if ( $self->{_store_declarations} && defined $self->{_decl} ) {
+        unshift @html, sprintf "<!%s>\n", $self->{_decl}->{text};
+    }
+
+    return join( '', @html );
 }
 
 
-1;
+sub as_text {
 
-__END__
+    # Yet another iteratively implemented traverser
+    my ( $this, %options ) = @_;
+    my $skip_dels = $options{'skip_dels'} || 0;
+    my (@pile) = ($this);
+    my $tag;
+    my $text = '';
+    while (@pile) {
+        if ( !defined( $pile[0] ) ) {    # undef!
+                                         # no-op
+        }
+        elsif ( !ref( $pile[0] ) ) {     # text bit!  save it!
+            $text .= shift @pile;
+        }
+        else {                           # it's a ref -- traverse under it
+            unshift @pile, @{ $this->{'_content'} || $nillio }
+                unless ( $tag = ( $this = shift @pile )->{'_tag'} ) eq 'style'
+                or $tag eq 'script'
+                or ( $skip_dels and $tag eq 'del' );
+        }
+    }
+    return $text;
+}
 
-=back
+# extra_chars added for RT #26436
+sub as_trimmed_text {
+    my ( $this, %options ) = @_;
+    my $text = $this->as_text(%options);
+    my $extra_chars = defined $options{'extra_chars'}
+                        ? $options{'extra_chars'} : '';
+
+    $text =~ s/[\n\r\f\t$extra_chars ]+$//s;
+    $text =~ s/^[\n\r\f\t$extra_chars ]+//s;
+    $text =~ s/[\n\r\f\t$extra_chars ]+/ /g;
+    return $text;
+}
 
-=head1 BUGS
+sub as_text_trimmed { shift->as_trimmed_text(@_) }   # alias, because I forget
 
-If you want to free the memory assosiated with a tree built of
-HTML::Element nodes then you will have to delete it explicitly.  The
-reason for this is that perl currently has no proper garbage
-collector, but depends on reference counts in the objects.  This
-scheme fails because the parse tree contains circular references
-(parents have references to their children and children have a
-reference to their parent).
 
-=head1 SEE ALSO
+# TODO: make it wrap, if not indent?
 
-L<HTML::AsSubs>
+sub as_XML {
 
-=head1 COPYRIGHT
+    # based an as_HTML
+    my ($self) = @_;
 
-Copyright 1995-1998 Gisle Aas.
+    #my $indent_on = defined($indent) && length($indent);
+    my @xml               = ();
+    my $empty_element_map = $self->_empty_element_map;
+
+    my ( $tag, $node, $start );    # per-iteration scratch
+    $self->traverse(
+        sub {
+            ( $node, $start ) = @_;
+            if ( ref $node ) {     # it's an element
+                $tag = $node->{'_tag'};
+                if ($start) {      # on the way in
+
+                    foreach my $attr ( $node->all_attr_names() ) {
+                        Carp::croak(
+                            "$tag has an invalid attribute name '$attr'")
+                            unless ( $attr eq '/' || $self->_valid_name($attr) );
+                    }
+
+                    if ( $empty_element_map->{$tag}
+                        and !@{ $node->{'_content'} || $nillio } )
+                    {
+                        push( @xml, $node->starttag_XML( undef, 1 ) );
+                    }
+                    else {
+                        push( @xml, $node->starttag_XML(undef) );
+                    }
+                }
+                else {    # on the way out
+                    unless ( $empty_element_map->{$tag}
+                        and !@{ $node->{'_content'} || $nillio } )
+                    {
+                        push( @xml, $node->endtag_XML() );
+                    }     # otherwise it will have been an <... /> tag.
+                }
+            }
+            else {        # it's just text
+                _xml_escape($node);
+                push( @xml, $node );
+            }
+            1;            # keep traversing
+        }
+    );
+
+    join( '', @xml, "\n" );
+}
+
+sub _xml_escape {
+
+# DESTRUCTIVE (a.k.a. "in-place")
+# Five required escapes: http://www.w3.org/TR/2006/REC-xml11-20060816/#syntax
+# We allow & if it's part of a valid escape already: http://www.w3.org/TR/2006/REC-xml11-20060816/#sec-references
+    foreach my $x (@_) {
+
+        # In strings with no encoded entities all & should be encoded.
+        if ($encoded_content) {
+            $x
+                =~ s/&(?!                 # An ampersand that isn't followed by...
+                (\#\d+; |                 # A hash mark, digits and semicolon, or
+                \#x[\da-f]+; |            # A hash mark, "x", hex digits and semicolon, or
+                $START_CHAR$NAME_CHAR+; ) # A valid unicode entity name and semicolon
+           )/&amp;/gx;    # Needs to be escaped to amp
+        }
+        else {
+            $x =~ s/&/&amp;/g;
+        }
+
+        # simple character escapes
+        $x =~ s/</&lt;/g;
+        $x =~ s/>/&gt;/g;
+        $x =~ s/"/&quot;/g;
+        $x =~ s/'/&apos;/g;
+    }
+    return;
+}
+
+
+# NOTES:
+#
+# It's been suggested that attribute names be made :-keywords:
+#   (:_tag "img" :border 0 :src "pie.png" :usemap "#main.map")
+# However, it seems that Scheme has no such data type as :-keywords.
+# So, for the moment at least, I tend toward simplicity, uniformity,
+#  and universality, where everything a string or a list.
+
+sub as_Lisp_form {
+    my @out;
+
+    my $sub;
+    my $depth = 0;
+    my ( @list, $val );
+    $sub = sub {    # Recursor
+        my $self = $_[0];
+        @list = ( '_tag', $self->{'_tag'} );
+        @list = () unless defined $list[-1];    # unlikely
+
+        for ( sort keys %$self ) {              # predictable ordering
+            next
+                if $_ eq '_content'
+                    or $_ eq '_tag'
+                    or $_ eq '_parent'
+                    or $_ eq '/';
+
+            # Leave the other private attributes, I guess.
+            push @list, $_, $val
+                if defined( $val = $self->{$_} );    # and !ref $val;
+        }
+
+        for (@list) {
+
+            # octal-escape it
+            s<([^\x20\x21\x23\x27-\x5B\x5D-\x7E])>
+         <sprintf('\\%03o',ord($1))>eg;
+            $_ = qq{"$_"};
+        }
+        push @out, ( '  ' x $depth ) . '(' . join ' ', splice @list;
+        if ( @{ $self->{'_content'} || $nillio } ) {
+            $out[-1] .= " \"_content\" (\n";
+            ++$depth;
+            foreach my $c ( @{ $self->{'_content'} } ) {
+                if ( ref($c) ) {
+
+                    # an element -- recurse
+                    $sub->($c);
+                }
+                else {
+
+                    # a text segment -- stick it in and octal-escape it
+                    push @out, $c;
+                    $out[-1] =~ s<([^\x20\x21\x23\x27-\x5B\x5D-\x7E])>
+             <sprintf('\\%03o',ord($1))>eg;
+
+                    # And quote and indent it.
+                    $out[-1] .= "\"\n";
+                    $out[-1] = ( '  ' x $depth ) . '"' . $out[-1];
+                }
+            }
+            --$depth;
+            substr( $out[-1], -1 )
+                = "))\n";    # end of _content and of the element
+        }
+        else {
+            $out[-1] .= ")\n";
+        }
+        return;
+    };
+
+    $sub->( $_[0] );
+    undef $sub;
+    return join '', @out;
+}
+
+
+sub format {
+    my ( $self, $formatter ) = @_;
+    unless ( defined $formatter ) {
+        # RECOMMEND PREREQ: HTML::FormatText
+        require HTML::FormatText;
+        $formatter = HTML::FormatText->new();
+    }
+    $formatter->format($self);
+}
+
+
+sub starttag {
+    my ( $self, $entities ) = @_;
+
+    my $name = $self->{'_tag'};
+
+    return $self->{'text'}              if $name eq '~literal';
+    return "<!" . $self->{'text'} . ">" if $name eq '~declaration';
+    return "<?" . $self->{'text'} . ">" if $name eq '~pi';
+
+    if ( $name eq '~comment' ) {
+        if ( ref( $self->{'text'} || '' ) eq 'ARRAY' ) {
+
+            # Does this ever get used?  And is this right?
+            return
+                "<!"
+                . join( ' ', map( "--$_--", @{ $self->{'text'} } ) ) . ">";
+        }
+        else {
+            return "<!--" . $self->{'text'} . "-->";
+        }
+    }
+
+    my $tag = $html_uc ? "<\U$name" : "<\L$name";
+    my $val;
+    for ( sort keys %$self ) {    # predictable ordering
+        next if !length $_ or m/^_/s or $_ eq '/';
+        $val = $self->{$_};
+        next if !defined $val;    # or ref $val;
+        if ($_ eq $val &&         # if attribute is boolean, for this element
+            exists( $HTML::Element::boolean_attr{$name} )
+            && (ref( $HTML::Element::boolean_attr{$name} )
+                ? $HTML::Element::boolean_attr{$name}{$_}
+                : $HTML::Element::boolean_attr{$name} eq $_
+            )
+            )
+        {
+            $tag .= $html_uc ? " \U$_" : " \L$_";
+        }
+        else {                    # non-boolean attribute
+
+            if ( ref $val eq 'HTML::Element'
+                and $val->{_tag} eq '~literal' )
+            {
+                $val = $val->{text};
+            }
+            else {
+                HTML::Entities::encode_entities( $val, $entities )
+                    unless (
+                    defined($entities) && !length($entities)
+                    || $encoded_content
+
+                    );
+            }
+
+            $val = qq{"$val"};
+            $tag .= $html_uc ? qq{ \U$_\E=$val} : qq{ \L$_\E=$val};
+        }
+    }    # for keys
+    if ( scalar $self->content_list == 0
+        && $self->_empty_element_map->{ $self->tag } )
+    {
+        return $tag . " />";
+    }
+    else {
+        return $tag . ">";
+    }
+}
+
+
+sub starttag_XML {
+    my ($self) = @_;
+
+    # and a third parameter to signal emptiness?
+
+    my $name = $self->{'_tag'};
+
+    return $self->{'text'}               if $name eq '~literal';
+    return '<!' . $self->{'text'} . '>'  if $name eq '~declaration';
+    return "<?" . $self->{'text'} . "?>" if $name eq '~pi';
+
+    if ( $name eq '~comment' ) {
+        if ( ref( $self->{'text'} || '' ) eq 'ARRAY' ) {
+
+            # Does this ever get used?  And is this right?
+            $name = join( ' ', @{ $self->{'text'} } );
+        }
+        else {
+            $name = $self->{'text'};
+        }
+        $name =~ s/--/-&#45;/g;    # can't have double --'s in XML comments
+        return "<!-- $name -->";
+    }
+
+    my $tag = "<$name";
+    my $val;
+    for ( sort keys %$self ) {     # predictable ordering
+        next if !length $_ or m/^_/s or $_ eq '/';
+
+        # Hm -- what to do if val is undef?
+        # I suppose that shouldn't ever happen.
+        next if !defined( $val = $self->{$_} );    # or ref $val;
+        _xml_escape($val);
+        $tag .= qq{ $_="$val"};
+    }
+    @_ == 3 ? "$tag />" : "$tag>";
+}
+
+
+sub endtag {
+    $html_uc ? "</\U$_[0]->{'_tag'}>" : "</\L$_[0]->{'_tag'}>";
+}
+
+sub endtag_XML {
+    "</$_[0]->{'_tag'}>";
+}
+
+#==========================================================================
+# This, ladies and germs, is an iterative implementation of a
+# recursive algorithm.  DON'T TRY THIS AT HOME.
+# Basically, the algorithm says:
+#
+# To traverse:
+#   1: pre-order visit this node
+#   2: traverse any children of this node
+#   3: post-order visit this node, unless it's a text segment,
+#       or a prototypically empty node (like "br", etc.)
+# Add to that the consideration of the callbacks' return values,
+# so you can block visitation of the children, or siblings, or
+# abort the whole excursion, etc.
+#
+# So, why all this hassle with making the code iterative?
+# It makes for real speed, because it eliminates the whole
+# hassle of Perl having to allocate scratch space for each
+# instance of the recursive sub.  Since the algorithm
+# is basically simple (and not all recursive ones are!) and
+# has few necessary lexicals (basically just the current node's
+# content list, and the current position in it), it was relatively
+# straightforward to store that information not as the frame
+# of a sub, but as a stack, i.e., a simple Perl array (well, two
+# of them, actually: one for content-listrefs, one for indexes of
+# current position in each of those).
+
+my $NIL = [];
+
+sub traverse {
+    my ( $start, $callback, $ignore_text ) = @_;
+
+    Carp::croak "traverse can be called only as an object method"
+        unless ref $start;
+
+    Carp::croak('must provide a callback for traverse()!')
+        unless defined $callback and ref $callback;
+
+    # Elementary type-checking:
+    my ( $c_pre, $c_post );
+    if ( UNIVERSAL::isa( $callback, 'CODE' ) ) {
+        $c_pre = $c_post = $callback;
+    }
+    elsif ( UNIVERSAL::isa( $callback, 'ARRAY' ) ) {
+        ( $c_pre, $c_post ) = @$callback;
+        Carp::croak(
+            "pre-order callback \"$c_pre\" is true but not a coderef!")
+            if $c_pre and not UNIVERSAL::isa( $c_pre, 'CODE' );
+        Carp::croak(
+            "pre-order callback \"$c_post\" is true but not a coderef!")
+            if $c_post and not UNIVERSAL::isa( $c_post, 'CODE' );
+        return $start unless $c_pre or $c_post;
+
+        # otherwise there'd be nothing to actually do!
+    }
+    else {
+        Carp::croak("$callback is not a known kind of reference")
+            unless ref($callback);
+    }
+
+    my $empty_element_map = $start->_empty_element_map;
+
+    my (@C) = [$start];    # a stack containing lists of children
+    my (@I) = (-1);        # initial value must be -1 for each list
+         # a stack of indexes to current position in corresponding lists in @C
+         # In each of these, 0 is the active point
+
+    # scratch:
+    my ($rv,           # return value of callback
+        $this,         # current node
+        $content_r,    # child list of $this
+    );
+
+    # THE BIG LOOP
+    while (@C) {
+
+        # Move to next item in this frame
+        if ( !defined( $I[0] ) or ++$I[0] >= @{ $C[0] } ) {
+
+            # We either went off the end of this list, or aborted the list
+            # So call the post-order callback:
+            if (    $c_post
+                and defined $I[0]
+                and @C > 1
+
+                # to keep the next line from autovivifying
+                and defined( $this = $C[1][ $I[1] ] )    # sanity, and
+                     # suppress callbacks on exiting the fictional top frame
+                and ref($this)    # sanity
+                and not(
+                    $this->{'_empty_element'}
+                    || ( $empty_element_map->{ $this->{'_tag'} || '' }
+                        && !@{ $this->{'_content'} } )    # RT #49932
+                )    # things that don't get post-order callbacks
+                )
+            {
+                shift @I;
+                shift @C;
+
+                #print "Post! at depth", scalar(@I), "\n";
+                $rv = $c_post->(
+
+                    #map $_, # copy to avoid any messiness
+                    $this,     # 0: this
+                    0,         # 1: startflag (0 for post-order call)
+                    @I - 1,    # 2: depth
+                );
+
+                if ( defined($rv) and ref($rv) eq $travsignal_package ) {
+                    $rv = $$rv;    #deref
+                    if ( $rv eq 'ABORT' ) {
+                        last;      # end of this excursion!
+                    }
+                    elsif ( $rv eq 'PRUNE' ) {
+
+                        # NOOP on post!!
+                    }
+                    elsif ( $rv eq 'PRUNE_SOFTLY' ) {
+
+                        # NOOP on post!!
+                    }
+                    elsif ( $rv eq 'OK' ) {
+
+                        # noop
+                    }
+                    elsif ( $rv eq 'PRUNE_UP' ) {
+                        $I[0] = undef;
+                    }
+                    else {
+                        die "Unknown travsignal $rv\n";
+
+                        # should never happen
+                    }
+                }
+            }
+            else {
+                shift @I;
+                shift @C;
+            }
+            next;
+        }
+
+        $this = $C[0][ $I[0] ];
+
+        if ($c_pre) {
+            if ( defined $this and ref $this ) {    # element
+                $rv = $c_pre->(
+
+                    #map $_, # copy to avoid any messiness
+                    $this,     # 0: this
+                    1,         # 1: startflag (1 for pre-order call)
+                    @I - 1,    # 2: depth
+                );
+            }
+            else {             # text segment
+                next if $ignore_text;
+                $rv = $c_pre->(
+
+                    #map $_, # copy to avoid any messiness
+                    $this,           # 0: this
+                    1,               # 1: startflag (1 for pre-order call)
+                    @I - 1,          # 2: depth
+                    $C[1][ $I[1] ],  # 3: parent
+                                     # And there will always be a $C[1], since
+                             #  we can't start traversing at a text node
+                    $I[0]    # 4: index of self in parent's content list
+                );
+            }
+            if ( not $rv ) {    # returned false.  Same as PRUNE.
+                next;           # prune
+            }
+            elsif ( ref($rv) eq $travsignal_package ) {
+                $rv = $$rv;     # deref
+                if ( $rv eq 'ABORT' ) {
+                    last;       # end of this excursion!
+                }
+                elsif ( $rv eq 'PRUNE' ) {
+                    next;
+                }
+                elsif ( $rv eq 'PRUNE_SOFTLY' ) {
+                    if (ref($this)
+                        and not( $this->{'_empty_element'}
+                            || $empty_element_map->{ $this->{'_tag'} || '' } )
+                        )
+                    {
+
+             # push a dummy empty content list just to trigger a post callback
+                        unshift @I, -1;
+                        unshift @C, $NIL;
+                    }
+                    next;
+                }
+                elsif ( $rv eq 'OK' ) {
+
+                    # noop
+                }
+                elsif ( $rv eq 'PRUNE_UP' ) {
+                    $I[0] = undef;
+                    next;
+
+                    # equivalent of last'ing out of the current child list.
+
+            # Used to have PRUNE_UP_SOFTLY and ABORT_SOFTLY here, but the code
+            # for these was seriously upsetting, served no particularly clear
+            # purpose, and could not, I think, be easily implemented with a
+            # recursive routine.  All bad things!
+                }
+                else {
+                    die "Unknown travsignal $rv\n";
+
+                    # should never happen
+                }
+            }
+
+            # else fall thru to meaning same as \'OK'.
+        }
+
+        # end of pre-order calling
+
+        # Now queue up content list for the current element...
+        if (ref $this
+            and not(    # ...except for those which...
+                not( $content_r = $this->{'_content'} and @$content_r )
+
+                # ...have empty content lists...
+                and $this->{'_empty_element'}
+                || $empty_element_map->{ $this->{'_tag'} || '' }
+
+                # ...and that don't get post-order callbacks
+            )
+            )
+        {
+            unshift @I, -1;
+            unshift @C, $content_r || $NIL;
+
+            #print $this->{'_tag'}, " ($this) adds content_r ", $C[0], "\n";
+        }
+    }
+    return $start;
+}
+
+
+sub is_inside {
+    my $self = shift;
+    return 0 unless @_; # if no items specified, I guess this is right.
+
+    my $current = $self;
+
+    # the loop starts by looking at the given element
+    while ( defined $current and ref $current ) {
+        for (@_) {
+            if (ref) {    # element
+                return 1 if $_ eq $current;
+            }
+            else {        # tag name
+                return 1 if $_ eq $current->{'_tag'};
+            }
+        }
+        $current = $current->{'_parent'};
+    }
+    0;
+}
+
+
+sub is_empty {
+    my $self = shift;
+    !$self->{'_content'} || !@{ $self->{'_content'} };
+}
+
+
+sub pindex {
+    my $self = shift;
+
+    my $parent = $self->{'_parent'}    || return undef;
+    my $pc     = $parent->{'_content'} || return undef;
+    for ( my $i = 0; $i < @$pc; ++$i ) {
+        return $i if ref $pc->[$i] and $pc->[$i] eq $self;
+    }
+    return undef;               # we shouldn't ever get here
+}
+
+#--------------------------------------------------------------------------
+
+
+sub left {
+    Carp::croak "left() is supposed to be an object method"
+        unless ref $_[0];
+    my $pc = ( $_[0]->{'_parent'} || return )->{'_content'}
+        || die "parent is childless?";
+
+    die "parent is childless" unless @$pc;
+    return if @$pc == 1;    # I'm an only child
+
+    if (wantarray) {
+        my @out;
+        foreach my $j (@$pc) {
+            return @out if ref $j and $j eq $_[0];
+            push @out, $j;
+        }
+    }
+    else {
+        for ( my $i = 0; $i < @$pc; ++$i ) {
+            return $i ? $pc->[ $i - 1 ] : undef
+                if ref $pc->[$i] and $pc->[$i] eq $_[0];
+        }
+    }
+
+    die "I'm not in my parent's content list?";
+    return;
+}
+
+
+sub right {
+    Carp::croak "right() is supposed to be an object method"
+        unless ref $_[0];
+    my $pc = ( $_[0]->{'_parent'} || return )->{'_content'}
+        || die "parent is childless?";
+
+    die "parent is childless" unless @$pc;
+    return if @$pc == 1;    # I'm an only child
+
+    if (wantarray) {
+        my ( @out, $seen );
+        foreach my $j (@$pc) {
+            if ($seen) {
+                push @out, $j;
+            }
+            else {
+                $seen = 1 if ref $j and $j eq $_[0];
+            }
+        }
+        die "I'm not in my parent's content list?" unless $seen;
+        return @out;
+    }
+    else {
+        for ( my $i = 0; $i < @$pc; ++$i ) {
+            return +( $i == $#$pc ) ? undef : $pc->[ $i + 1 ]
+                if ref $pc->[$i] and $pc->[$i] eq $_[0];
+        }
+        die "I'm not in my parent's content list?";
+        return;
+    }
+}
+
+#--------------------------------------------------------------------------
+
+
+sub address {
+    if ( @_ == 1 ) {    # report-address form
+        return join(
+            '.',
+            reverse(    # so it starts at the top
+                map( $_->pindex() || '0',    # so that root's undef -> '0'
+                    $_[0],                   # self and...
+                    $_[0]->lineage )
+            )
+        );
+    }
+    else {                                   # get-node-at-address
+        my @stack = split( /\./, $_[1] );
+        my $here;
+
+        if ( @stack and !length $stack[0] ) {    # relative addressing
+            $here = $_[0];
+            shift @stack;
+        }
+        else {                                   # absolute addressing
+            return undef unless 0 == shift @stack; # pop the initial 0-for-root
+            $here = $_[0]->root;
+        }
+
+        while (@stack) {
+            return undef
+                unless $here->{'_content'}
+                    and @{ $here->{'_content'} } > $stack[0];
+
+            # make sure the index isn't too high
+            $here = $here->{'_content'}[ shift @stack ];
+            return undef if @stack and not ref $here;
+
+            # we hit a text node when we expected a non-terminal element node
+        }
+
+        return $here;
+    }
+}
+
+
+sub depth {
+    my $here  = $_[0];
+    my $depth = 0;
+    while ( defined( $here = $here->{'_parent'} ) and ref($here) ) {
+        ++$depth;
+    }
+    return $depth;
+}
+
+
+sub root {
+    my $here = my $root = shift;
+    while ( defined( $here = $here->{'_parent'} ) and ref($here) ) {
+        $root = $here;
+    }
+    return $root;
+}
+
+
+sub lineage {
+    my $here = shift;
+    my @lineage;
+    while ( defined( $here = $here->{'_parent'} ) and ref($here) ) {
+        push @lineage, $here;
+    }
+    return @lineage;
+}
+
+
+sub lineage_tag_names {
+    my $here = my $start = shift;
+    my @lineage_names;
+    while ( defined( $here = $here->{'_parent'} ) and ref($here) ) {
+        push @lineage_names, $here->{'_tag'};
+    }
+    return @lineage_names;
+}
+
+
+sub descendents { shift->descendants(@_) }
+
+sub descendants {
+    my $start = shift;
+    if (wantarray) {
+        my @descendants;
+        $start->traverse(
+            [    # pre-order sub only
+                sub {
+                    push( @descendants, $_[0] );
+                    return 1;
+                },
+                undef    # no post
+            ],
+            1,           # ignore text
+        );
+        shift @descendants;    # so $self doesn't appear in the list
+        return @descendants;
+    }
+    else {                     # just returns a scalar
+        my $descendants = -1;    # to offset $self being counted
+        $start->traverse(
+            [                    # pre-order sub only
+                sub {
+                    ++$descendants;
+                    return 1;
+                },
+                undef            # no post
+            ],
+            1,                   # ignore text
+        );
+        return $descendants;
+    }
+}
+
+
+sub find { shift->find_by_tag_name(@_) }
+
+# yup, a handy alias
+
+sub find_by_tag_name {
+    my (@pile) = shift(@_);    # start out the to-do stack for the traverser
+    Carp::croak "find_by_tag_name can be called only as an object method"
+        unless ref $pile[0];
+    return () unless @_;
+    my (@tags) = $pile[0]->_fold_case(@_);
+    my ( @matching, $this, $this_tag );
+    while (@pile) {
+        $this_tag = ( $this = shift @pile )->{'_tag'};
+        foreach my $t (@tags) {
+            if ( $t eq $this_tag ) {
+                if (wantarray) {
+                    push @matching, $this;
+                    last;
+                }
+                else {
+                    return $this;
+                }
+            }
+        }
+        unshift @pile, grep ref($_), @{ $this->{'_content'} || next };
+    }
+    return @matching if wantarray;
+    return;
+}
+
+
+sub find_by_attribute {
+
+    # We could limit this to non-internal attributes, but hey.
+    my ( $self, $attribute, $value ) = @_;
+    Carp::croak "Attribute must be a defined value!"
+        unless defined $attribute;
+    $attribute = $self->_fold_case($attribute);
+
+    my @matching;
+    my $wantarray = wantarray;
+    my $quit;
+    $self->traverse(
+        [    # pre-order only
+            sub {
+                if ( exists $_[0]{$attribute}
+                    and $_[0]{$attribute} eq $value )
+                {
+                    push @matching, $_[0];
+                    return HTML::Element::ABORT
+                        unless $wantarray;    # only take the first
+                }
+                1;                            # keep traversing
+            },
+            undef                             # no post
+        ],
+        1,                                    # yes, ignore text nodes.
+    );
+
+    if ($wantarray) {
+        return @matching;
+    }
+    else {
+        return $matching[0];
+    }
+}
+
+#--------------------------------------------------------------------------
+
+
+sub look_down {
+    ref( $_[0] ) or Carp::croak "look_down works only as an object method";
+
+    my @criteria;
+    for ( my $i = 1; $i < @_; ) {
+        Carp::croak "Can't use undef as an attribute name"
+            unless defined $_[$i];
+        if ( ref $_[$i] ) {
+            Carp::croak "A " . ref( $_[$i] ) . " value is not a criterion"
+                unless ref $_[$i] eq 'CODE';
+            push @criteria, $_[ $i++ ];
+        }
+        else {
+            Carp::croak "param list to look_down ends in a key!" if $i == $#_;
+            push @criteria, [
+                scalar( $_[0]->_fold_case( $_[$i] ) ),
+                defined( $_[ $i + 1 ] )
+                ? ( ( ref $_[ $i + 1 ] ? $_[ $i + 1 ] : lc( $_[ $i + 1 ] ) ),
+                    ref( $_[ $i + 1 ] )
+                    )
+
+                    # yes, leave that LC!
+                : undef
+            ];
+            $i += 2;
+        }
+    }
+    Carp::croak "No criteria?" unless @criteria;
+
+    my (@pile) = ( $_[0] );
+    my ( @matching, $val, $this );
+Node:
+    while ( defined( $this = shift @pile ) ) {
+
+        # Yet another traverser implemented with merely iterative code.
+        foreach my $c (@criteria) {
+            if ( ref($c) eq 'CODE' ) {
+                next Node unless $c->($this);    # jump to the continue block
+            }
+            else {                               # it's an attr-value pair
+                next Node                        # jump to the continue block
+                    if                           # two values are unequal if:
+                        ( defined( $val = $this->{ $c->[0] } ) )
+                    ? (     !defined $c->[ 1
+                                ]    # actual is def, critval is undef => fail
+                                     # allow regex matching
+                                     # allow regex matching
+                                or (
+                                  $c->[2] eq 'Regexp'
+                                ? $val !~ $c->[1]
+                                : ( ref $val ne $c->[2]
+
+                                        # have unequal ref values => fail
+                                        or lc($val) ne lc( $c->[1] )
+
+                                       # have unequal lc string values => fail
+                                )
+                                )
+                        )
+                    : (     defined $c->[1]
+                        )    # actual is undef, critval is def => fail
+            }
+        }
+
+        # We make it this far only if all the criteria passed.
+        return $this unless wantarray;
+        push @matching, $this;
+    }
+    continue {
+        unshift @pile, grep ref($_), @{ $this->{'_content'} || $nillio };
+    }
+    return @matching if wantarray;
+    return;
+}
+
+
+sub look_up {
+    ref( $_[0] ) or Carp::croak "look_up works only as an object method";
+
+    my @criteria;
+    for ( my $i = 1; $i < @_; ) {
+        Carp::croak "Can't use undef as an attribute name"
+            unless defined $_[$i];
+        if ( ref $_[$i] ) {
+            Carp::croak "A " . ref( $_[$i] ) . " value is not a criterion"
+                unless ref $_[$i] eq 'CODE';
+            push @criteria, $_[ $i++ ];
+        }
+        else {
+            Carp::croak "param list to look_up ends in a key!" if $i == $#_;
+            push @criteria, [
+                scalar( $_[0]->_fold_case( $_[$i] ) ),
+                defined( $_[ $i + 1 ] )
+                ? ( ( ref $_[ $i + 1 ] ? $_[ $i + 1 ] : lc( $_[ $i + 1 ] ) ),
+                    ref( $_[ $i + 1 ] )
+                    )
+                : undef    # Yes, leave that LC!
+            ];
+            $i += 2;
+        }
+    }
+    Carp::croak "No criteria?" unless @criteria;
+
+    my ( @matching, $val );
+    my $this = $_[0];
+Node:
+    while (1) {
+
+       # You'll notice that the code here is almost the same as for look_down.
+        foreach my $c (@criteria) {
+            if ( ref($c) eq 'CODE' ) {
+                next Node unless $c->($this);    # jump to the continue block
+            }
+            else {                               # it's an attr-value pair
+                next Node                        # jump to the continue block
+                    if                           # two values are unequal if:
+                        ( defined( $val = $this->{ $c->[0] } ) )
+                    ? (     !defined $c->[ 1
+                                ]    # actual is def, critval is undef => fail
+                                or (
+                                  $c->[2] eq 'Regexp'
+                                ? $val !~ $c->[1]
+                                : ( ref $val ne $c->[2]
+
+                                        # have unequal ref values => fail
+                                        or lc($val) ne $c->[1]
+
+                                       # have unequal lc string values => fail
+                                )
+                                )
+                        )
+                    : (     defined $c->[1]
+                        )    # actual is undef, critval is def => fail
+            }
+        }
+
+        # We make it this far only if all the criteria passed.
+        return $this unless wantarray;
+        push @matching, $this;
+    }
+    continue {
+        last unless defined( $this = $this->{'_parent'} ) and ref $this;
+    }
+
+    return @matching if wantarray;
+    return;
+}
+
+#--------------------------------------------------------------------------
+
+
+sub attr_get_i {
+    if ( @_ > 2 ) {
+        my $self = shift;
+        Carp::croak "No attribute names can be undef!"
+            if grep !defined($_), @_;
+        my @attributes = $self->_fold_case(@_);
+        if (wantarray) {
+            my @out;
+            foreach my $x ( $self, $self->lineage ) {
+                push @out,
+                    map { exists( $x->{$_} ) ? $x->{$_} : () } @attributes;
+            }
+            return @out;
+        }
+        else {
+            foreach my $x ( $self, $self->lineage ) {
+                foreach my $attribute (@attributes) {
+                    return $x->{$attribute}
+                        if exists $x->{$attribute};    # found
+                }
+            }
+            return;                                    # never found
+        }
+    }
+    else {
+
+        # Single-attribute search.  Simpler, most common, so optimize
+        #  for the most common case
+        Carp::croak "Attribute name must be a defined value!"
+            unless defined $_[1];
+        my $self      = $_[0];
+        my $attribute = $self->_fold_case( $_[1] );
+        if (wantarray) {                               # list context
+            return
+                map { exists( $_->{$attribute} ) ? $_->{$attribute} : () }
+                $self, $self->lineage;
+        }
+        else {                                         # scalar context
+            foreach my $x ( $self, $self->lineage ) {
+                return $x->{$attribute} if exists $x->{$attribute};    # found
+            }
+            return;    # never found
+        }
+    }
+}
+
+
+sub tagname_map {
+    my (@pile) = $_[0];    # start out the to-do stack for the traverser
+    Carp::croak "find_by_tag_name can be called only as an object method"
+        unless ref $pile[0];
+    my ( %map, $this_tag, $this );
+    while (@pile) {
+        $this_tag = ''
+            unless defined( $this_tag = ( $this = shift @pile )->{'_tag'} )
+        ;    # dance around the strange case of having an undef tagname.
+        push @{ $map{$this_tag} ||= [] }, $this;    # add to map
+        unshift @pile, grep ref($_),
+            @{ $this->{'_content'} || next };       # traverse
+    }
+    return \%map;
+}
+
+
+sub extract_links {
+    my $start = shift;
+
+    my %wantType;
+    @wantType{ $start->_fold_case(@_) } = (1) x @_;    # if there were any
+    my $wantType = scalar(@_);
+
+    my @links;
+
+    # TODO: add xml:link?
+
+    my ( $link_attrs, $tag, $self, $val );    # scratch for each iteration
+    $start->traverse(
+        [   sub {                             # pre-order call only
+                $self = $_[0];
+
+                $tag = $self->{'_tag'};
+                return 1
+                    if $wantType && !$wantType{$tag};    # if we're selective
+
+                if (defined(
+                        $link_attrs = $HTML::Element::linkElements{$tag}
+                    )
+                    )
+                {
+
+                    # If this is a tag that has any link attributes,
+                    #  look over possibly present link attributes,
+                    #  saving the value, if found.
+                    for ( ref($link_attrs) ? @$link_attrs : $link_attrs ) {
+                        if ( defined( $val = $self->attr($_) ) ) {
+                            push( @links, [ $val, $self, $_, $tag ] );
+                        }
+                    }
+                }
+                1;    # return true, so we keep recursing
+            },
+            undef
+        ],
+        1,            # ignore text nodes
+    );
+    \@links;
+}
+
+
+sub simplify_pres {
+    my $pre = 0;
+
+    my $sub;
+    my $line;
+    $sub = sub {
+        ++$pre if $_[0]->{'_tag'} eq 'pre';
+        foreach my $it ( @{ $_[0]->{'_content'} || return } ) {
+            if ( ref $it ) {
+                $sub->($it);    # recurse!
+            }
+            elsif ($pre) {
+
+                #$it =~ s/(?:(?:\cm\cj*)|(?:\cj))/\n/g;
+
+                $it = join "\n", map {
+                    ;
+                    $line = $_;
+                    while (
+                        $line
+                        =~ s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
+
+              # Sort of adapted from Text::Tabs -- yes, it's hardwired-in that
+              # tabs are at every EIGHTH column.
+                        )
+                    {
+                    }
+                    $line;
+                    }
+                    split /(?:(?:\cm\cj*)|(?:\cj))/, $it, -1;
+            }
+        }
+        --$pre if $_[0]->{'_tag'} eq 'pre';
+        return;
+    };
+    $sub->( $_[0] );
+
+    undef $sub;
+    return;
+}
+
+
+sub same_as {
+    die 'same_as() takes only one argument: $h->same_as($i)' unless @_ == 2;
+    my ( $h, $i ) = @_[ 0, 1 ];
+    die "same_as() can be called only as an object method" unless ref $h;
+
+    return 0 unless defined $i and ref $i;
+
+    # An element can't be same_as anything but another element!
+    # They needn't be of the same class, tho.
+
+    return 1 if $h eq $i;
+
+    # special (if rare) case: anything is the same as... itself!
+
+    # assumes that no content lists in/under $h or $i contain subsequent
+    #  text segments, like: ['foo', ' bar']
+
+    # compare attributes now.
+    #print "Comparing tags of $h and $i...\n";
+
+    return 0 unless $h->{'_tag'} eq $i->{'_tag'};
+
+    # only significant attribute whose name starts with "_"
+
+    #print "Comparing attributes of $h and $i...\n";
+    # Compare attributes, but only the real ones.
+    {
+
+        # Bear in mind that the average element has very few attributes,
+        #  and that element names are rather short.
+        # (Values are a different story.)
+
+    # XXX I would think that /^[^_]/ would be faster, at least easier to read.
+        my @keys_h
+            = sort grep { length $_ and substr( $_, 0, 1 ) ne '_' } keys %$h;
+        my @keys_i
+            = sort grep { length $_ and substr( $_, 0, 1 ) ne '_' } keys %$i;
+
+        return 0 unless @keys_h == @keys_i;
+
+        # different number of real attributes?  they're different.
+        for ( my $x = 0; $x < @keys_h; ++$x ) {
+            return 0
+                unless $keys_h[$x] eq $keys_i[$x] and    # same key name
+                    $h->{ $keys_h[$x] } eq $i->{ $keys_h[$x] };   # same value
+             # Should this test for definedness on values?
+             # People shouldn't be putting undef in attribute values, I think.
+        }
+    }
+
+    #print "Comparing children of $h and $i...\n";
+    my $hcl = $h->{'_content'} || [];
+    my $icl = $i->{'_content'} || [];
+
+    return 0 unless @$hcl == @$icl;
+
+    # different numbers of children?  they're different.
+
+    if (@$hcl) {
+
+        # compare each of the children:
+        for ( my $x = 0; $x < @$hcl; ++$x ) {
+            if ( ref $hcl->[$x] ) {
+                return 0 unless ref( $icl->[$x] );
+
+                # an element can't be the same as a text segment
+                # Both elements:
+                return 0 unless $hcl->[$x]->same_as( $icl->[$x] );  # RECURSE!
+            }
+            else {
+                return 0 if ref( $icl->[$x] );
+
+                # a text segment can't be the same as an element
+                # Both text segments:
+                return 0 unless $hcl->[$x] eq $icl->[$x];
+            }
+        }
+    }
+
+    return 1;    # passed all the tests!
+}
+
+
+sub new_from_lol {
+    my $class = shift;
+    $class = ref($class) || $class;
+
+  # calling as an object method is just the same as ref($h)->new_from_lol(...)
+    my $lol = $_[1];
+
+    my @ancestor_lols;
+
+    # So we can make sure there's no cyclicities in this lol.
+    # That would be perverse, but one never knows.
+    my ( $sub, $k, $v, $node );    # last three are scratch values
+    $sub = sub {
+
+        #print "Building for $_[0]\n";
+        my $lol = $_[0];
+        return unless @$lol;
+        my ( @attributes, @children );
+        Carp::croak "Cyclicity detected in source LOL tree, around $lol?!?"
+            if grep( $_ eq $lol, @ancestor_lols );
+        push @ancestor_lols, $lol;
+
+        my $tag_name = 'null';
+
+        # Recursion in in here:
+        for ( my $i = 0; $i < @$lol; ++$i ) {    # Iterate over children
+            if ( ref( $lol->[$i] ) eq 'ARRAY' )
+            {    # subtree: most common thing in loltree
+                push @children, $sub->( $lol->[$i] );
+            }
+            elsif ( !ref( $lol->[$i] ) ) {
+                if ( $i == 0 ) {    # name
+                    $tag_name = $lol->[$i];
+                    Carp::croak "\"$tag_name\" isn't a good tag name!"
+                        if $tag_name =~ m/[<>\/\x00-\x20]/
+                    ;               # minimal sanity, certainly!
+                }
+                else {              # text segment child
+                    push @children, $lol->[$i];
+                }
+            }
+            elsif ( ref( $lol->[$i] ) eq 'HASH' ) {    # attribute hashref
+                keys %{ $lol->[$i] };   # reset the each-counter, just in case
+                while ( ( $k, $v ) = each %{ $lol->[$i] } ) {
+                    push @attributes, $class->_fold_case($k), $v
+                        if defined $v
+                            and $k ne '_name'
+                            and $k ne '_content'
+                            and $k ne '_parent';
+
+                    # enforce /some/ sanity!
+                }
+            }
+            elsif ( UNIVERSAL::isa( $lol->[$i], __PACKAGE__ ) ) {
+                if ( $lol->[$i]->{'_parent'} ) {    # if claimed
+                        #print "About to clone ", $lol->[$i], "\n";
+                    push @children, $lol->[$i]->clone();
+                }
+                else {
+                    push @children, $lol->[$i];    # if unclaimed...
+                         #print "Claiming ", $lol->[$i], "\n";
+                    $lol->[$i]->{'_parent'} = 1;    # claim it NOW
+                      # This WILL be replaced by the correct value once we actually
+                      #  construct the parent, just after the end of this loop...
+                }
+            }
+            else {
+                Carp::croak "new_from_lol doesn't handle references of type "
+                    . ref( $lol->[$i] );
+            }
+        }
+
+        pop @ancestor_lols;
+        $node = $class->new($tag_name);
+
+        #print "Children: @children\n";
+
+        if ( $class eq __PACKAGE__ ) {    # Special-case it, for speed:
+            %$node = ( %$node, @attributes ) if @attributes;
+
+            #print join(' ', $node, ' ' , map("<$_>", %$node), "\n");
+            if (@children) {
+                $node->{'_content'} = \@children;
+                foreach my $c (@children) {
+                    _weaken($c->{'_parent'} = $node)
+                        if ref $c;
+                }
+            }
+        }
+        else {                            # Do it the clean way...
+                                          #print "Done neatly\n";
+            while (@attributes) { $node->attr( splice @attributes, 0, 2 ) }
+            $node->push_content(
+                  map { _weaken($_->{'_parent'} = $node) if ref $_; $_ }
+                    @children )
+                if @children;
+        }
+
+        return $node;
+    };
+
+    # End of sub definition.
+
+    if (wantarray) {
+        my (@nodes) = map { ; ( ref($_) eq 'ARRAY' ) ? $sub->($_) : $_ } @_;
+        # Let text bits pass thru, I guess.  This makes this act more like
+        #  unshift_content et al.  Undocumented.
+
+        undef $sub;
+        # so it won't be in its own frame, so its refcount can hit 0
+
+        return @nodes;
+    }
+    else {
+        Carp::croak "new_from_lol in scalar context needs exactly one lol"
+            unless @_ == 1;
+        return $_[0] unless ref( $_[0] ) eq 'ARRAY';
+        # used to be a fatal error.  still undocumented tho.
+
+        $node = $sub->( $_[0] );
+        undef $sub;
+        # so it won't be in its own frame, so its refcount can hit 0
+
+        return $node;
+    }
+}
+
+
+sub objectify_text {
+    my (@stack) = ( $_[0] );
+
+    my ($this);
+    while (@stack) {
+        foreach my $c ( @{ ( $this = shift @stack )->{'_content'} } ) {
+            if ( ref($c) ) {
+                unshift @stack, $c;    # visit it later.
+            }
+            else {
+                $c = $this->element_class->new(
+                    '~text',
+                    'text'    => $c,
+                    '_parent' => $this
+                );
+            }
+        }
+    }
+    return;
+}
+
+sub deobjectify_text {
+    my (@stack) = ( $_[0] );
+    my ($old_node);
+
+    if ( $_[0]{'_tag'} eq '~text' ) {    # special case
+            # Puts the $old_node variable to a different purpose
+        if ( $_[0]{'_parent'} ) {
+            $_[0]->replace_with( $old_node = delete $_[0]{'text'} )->delete;
+        }
+        else {    # well, that's that, then!
+            $old_node = delete $_[0]{'text'};
+        }
+
+        if ( ref( $_[0] ) eq __PACKAGE__ ) {    # common case
+            %{ $_[0] } = ();                    # poof!
+        }
+        else {
+
+            # play nice:
+            delete $_[0]{'_parent'};
+            $_[0]->delete;
+        }
+        return '' unless defined $old_node;     # sanity!
+        return $old_node;
+    }
+
+    while (@stack) {
+        foreach my $c ( @{ ( shift @stack )->{'_content'} } ) {
+            if ( ref($c) ) {
+                if ( $c->{'_tag'} eq '~text' ) {
+                    $c = ( $old_node = $c )->{'text'};
+                    if ( ref($old_node) eq __PACKAGE__ ) {    # common case
+                        %$old_node = ();                      # poof!
+                    }
+                    else {
+
+                        # play nice:
+                        delete $old_node->{'_parent'};
+                        $old_node->delete;
+                    }
+                }
+                else {
+                    unshift @stack, $c;    # visit it later.
+                }
+            }
+        }
+    }
+
+    return undef;
+}
+
+
+{
+
+    # The next three subs are basically copied from Number::Latin,
+    # based on a one-liner by Abigail.  Yes, I could simply require that
+    # module, and a Roman numeral module too, but really, HTML-Tree already
+    # has enough dependecies as it is; and anyhow, I don't need the functions
+    # that do latin2int or roman2int.
+    no integer;
+
+    sub _int2latin {
+        return unless defined $_[0];
+        return '0' if $_[0] < 1 and $_[0] > -1;
+        return '-' . _i2l( abs int $_[0] )
+            if $_[0] <= -1;    # tolerate negatives
+        return _i2l( int $_[0] );
+    }
+
+    sub _int2LATIN {
+
+        # just the above plus uc
+        return unless defined $_[0];
+        return '0' if $_[0] < 1 and $_[0] > -1;
+        return '-' . uc( _i2l( abs int $_[0] ) )
+            if $_[0] <= -1;    # tolerate negs
+        return uc( _i2l( int $_[0] ) );
+    }
+
+    my @alpha = ( 'a' .. 'z' );
+
+    sub _i2l {                 # the real work
+        my $int = $_[0] || return "";
+        _i2l( int( ( $int - 1 ) / 26 ) )
+            . $alpha[ $int % 26 - 1 ];    # yes, recursive
+            # Yes, 26 => is (26 % 26 - 1), which is -1 => Z!
+    }
+}
+
+{
+
+    # And now, some much less impressive Roman numerals code:
+
+    my (@i) = ( '', qw(I II III IV V VI VII VIII IX) );
+    my (@x) = ( '', qw(X XX XXX XL L LX LXX LXXX XC) );
+    my (@c) = ( '', qw(C CC CCC CD D DC DCC DCCC CM) );
+    my (@m) = ( '', qw(M MM MMM) );
+
+    sub _int2ROMAN {
+        my ( $i, $pref );
+        return '0'
+            if 0 == ( $i = int( $_[0] || 0 ) );    # zero is a special case
+        return $i + 0 if $i <= -4000 or $i >= 4000;
+
+       # Because over 3999 would require non-ASCII chars, like D-with-)-inside
+        if ( $i < 0 ) {    # grumble grumble tolerate negatives grumble
+            $pref = '-';
+            $i    = abs($i);
+        }
+        else {
+            $pref = '';    # normal case
+        }
+
+        my ( $x, $c, $m ) = ( 0, 0, 0 );
+        if ( $i >= 10 ) {
+            $x = $i / 10;
+            $i %= 10;
+            if ( $x >= 10 ) {
+                $c = $x / 10;
+                $x %= 10;
+                if ( $c >= 10 ) { $m = $c / 10; $c %= 10; }
+            }
+        }
+
+        #print "m$m c$c x$x i$i\n";
+
+        return join( '', $pref, $m[$m], $c[$c], $x[$x], $i[$i] );
+    }
+
+    sub _int2roman { lc( _int2ROMAN( $_[0] ) ) }
+}
+
+sub _int2int { $_[0] }    # dummy
+
+%list_type_to_sub = (
+    'I' => \&_int2ROMAN,
+    'i' => \&_int2roman,
+    'A' => \&_int2LATIN,
+    'a' => \&_int2latin,
+    '1' => \&_int2int,
+);
+
+sub number_lists {
+    my (@stack) = ( $_[0] );
+    my ( $this, $tag, $counter, $numberer );    # scratch
+    while (@stack) {    # yup, pre-order-traverser idiom
+        if ( ( $tag = ( $this = shift @stack )->{'_tag'} ) eq 'ol' ) {
+
+            # Prep some things:
+            $counter
+                = ( ( $this->{'start'} || '' ) =~ m<^\s*(\d{1,7})\s*$>s )
+                ? $1
+                : 1;
+            $numberer = $list_type_to_sub{ $this->{'type'} || '' }
+                || $list_type_to_sub{'1'};
+
+            # Immeditately iterate over all children
+            foreach my $c ( @{ $this->{'_content'} || next } ) {
+                next unless ref $c;
+                unshift @stack, $c;
+                if ( $c->{'_tag'} eq 'li' ) {
+                    $counter = $1
+                        if (
+                        ( $c->{'value'} || '' ) =~ m<^\s*(\d{1,7})\s*$>s );
+                    $c->{'_bullet'} = $numberer->($counter) . '.';
+                    ++$counter;
+                }
+            }
+
+        }
+        elsif ( $tag eq 'ul' or $tag eq 'dir' or $tag eq 'menu' ) {
+
+            # Immeditately iterate over all children
+            foreach my $c ( @{ $this->{'_content'} || next } ) {
+                next unless ref $c;
+                unshift @stack, $c;
+                $c->{'_bullet'} = '*' if $c->{'_tag'} eq 'li';
+            }
+
+        }
+        else {
+            foreach my $c ( @{ $this->{'_content'} || next } ) {
+                unshift @stack, $c if ref $c;
+            }
+        }
+    }
+    return;
+}
+
+
+sub has_insane_linkage {
+    my @pile = ( $_[0] );
+    my ( $c, $i, $p, $this );    # scratch
+
+    # Another iterative traverser; this time much simpler because
+    #  only in pre-order:
+    my %parent_of = ( $_[0], 'TOP-OF-SCAN' );
+    while (@pile) {
+        $this = shift @pile;
+        $c = $this->{'_content'} || next;
+        return ( $this, "_content attribute is true but nonref." )
+            unless ref($c) eq 'ARRAY';
+        next unless @$c;
+        for ( $i = 0; $i < @$c; ++$i ) {
+            return ( $this, "Child $i is undef" )
+                unless defined $c->[$i];
+            if ( ref( $c->[$i] ) ) {
+                return ( $c->[$i], "appears in its own content list" )
+                    if $c->[$i] eq $this;
+                return ( $c->[$i],
+                    "appears twice in the tree: once under $this, once under $parent_of{$c->[$i]}"
+                ) if exists $parent_of{ $c->[$i] };
+                $parent_of{ $c->[$i] } = '' . $this;
+
+                # might as well just use the stringification of it.
+
+                return ( $c->[$i],
+                    "_parent attribute is wrong (not defined)" )
+                    unless defined( $p = $c->[$i]{'_parent'} );
+                return ( $c->[$i], "_parent attribute is wrong (nonref)" )
+                    unless ref($p);
+                return ( $c->[$i],
+                    "_parent attribute is wrong (is $p; should be $this)" )
+                    unless $p eq $this;
+            }
+        }
+        unshift @pile, grep ref($_), @$c;
+
+        # queue up more things on the pile stack
+    }
+    return;    #okay
+}
+
+sub _asserts_fail {    # to be run on trusted documents only
+    my (@pile) = ( $_[0] );
+    my ( @errors, $this, $id, $assert, $parent, $rv );
+    while (@pile) {
+        $this = shift @pile;
+        if ( defined( $assert = $this->{'assert'} ) ) {
+            $id = ( $this->{'id'} ||= $this->address )
+                ;      # don't use '0' as an ID, okay?
+            unless ( ref($assert) ) {
+
+                package main;
+## no critic
+                $assert = $this->{'assert'} = (
+                    $assert =~ m/\bsub\b/
+                    ? eval($assert)
+                    : eval("sub {  $assert\n}")
+                );
+## use critic
+                if ($@) {
+                    push @errors,
+                        [ $this, "assertion at $id broke in eval: $@" ];
+                    $assert = $this->{'assert'} = sub { };
+                }
+            }
+            $parent = $this->{'_parent'};
+            $rv     = undef;
+            eval {
+                $rv = $assert->(
+                    $this, $this->{'_tag'}, $this->{'_id'},    # 0,1,2
+                    $parent
+                    ? ( $parent, $parent->{'_tag'}, $parent->{'id'} )
+                    : ()                                       # 3,4,5
+                );
+            };
+            if ($@) {
+                push @errors, [ $this, "assertion at $id died: $@" ];
+            }
+            elsif ( !$rv ) {
+                push @errors, [ $this, "assertion at $id failed" ];
+            }
+
+            # else OK
+        }
+        push @pile, grep ref($_), @{ $this->{'_content'} || next };
+    }
+    return @errors;
+}
+
+## _valid_name
+#  validate XML style attribute names
+#  http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Name
+
+sub _valid_name {
+    my $self = shift;
+    my $attr = shift
+        or Carp::croak("sub valid_name requires an attribute name");
+
+    return (0) unless ( $attr =~ /^$START_CHAR$NAME_CHAR+$/ );
+
+    return (1);
+}
+
+
+sub element_class {
+    $_[0]->{_element_class} || __PACKAGE__;
+}
+
+1;
+
+
+1;
+
+__END__
+
+=pod
+
+=head1 NAME
+
+HTML::Element - Class for objects that represent HTML elements
+
+=head1 VERSION
+
+This document describes version 5.03 of
+HTML::Element, released September 22, 2012
+as part of L<HTML-Tree|HTML::Tree>.
+
+=head1 SYNOPSIS
+
+    use HTML::Element;
+    $a = HTML::Element->new('a', href => 'http://www.perl.com/');
+    $a->push_content("The Perl Homepage");
+
+    $tag = $a->tag;
+    print "$tag starts out as:",  $a->starttag, "\n";
+    print "$tag ends as:",  $a->endtag, "\n";
+    print "$tag\'s href attribute is: ", $a->attr('href'), "\n";
+
+    $links_r = $a->extract_links();
+    print "Hey, I found ", scalar(@$links_r), " links.\n";
+
+    print "And that, as HTML, is: ", $a->as_HTML, "\n";
+    $a = $a->delete;
+
+=head1 DESCRIPTION
+
+(This class is part of the L<HTML::Tree|HTML::Tree> dist.)
+
+Objects of the HTML::Element class can be used to represent elements
+of HTML document trees.  These objects have attributes, notably attributes that
+designates each element's parent and content.  The content is an array
+of text segments and other HTML::Element objects.  A tree with HTML::Element
+objects as nodes can represent the syntax tree for a HTML document.
+
+=head1 HOW WE REPRESENT TREES
+
+Consider this HTML document:
+
+  <html lang='en-US'>
+    <head>
+      <title>Stuff</title>
+      <meta name='author' content='Jojo'>
+    </head>
+    <body>
+     <h1>I like potatoes!</h1>
+    </body>
+  </html>
+
+Building a syntax tree out of it makes a tree-structure in memory
+that could be diagrammed as:
+
+                     html (lang='en-US')
+                      / \
+                    /     \
+                  /         \
+                head        body
+               /\               \
+             /    \               \
+           /        \               \
+         title     meta              h1
+          |       (name='author',     |
+       "Stuff"    content='Jojo')    "I like potatoes"
+
+This is the traditional way to diagram a tree, with the "root" at the
+top, and it's this kind of diagram that people have in mind when they
+say, for example, that "the meta element is under the head element
+instead of under the body element".  (The same is also said with
+"inside" instead of "under" -- the use of "inside" makes more sense
+when you're looking at the HTML source.)
+
+Another way to represent the above tree is with indenting:
+
+  html (attributes: lang='en-US')
+    head
+      title
+        "Stuff"
+      meta (attributes: name='author' content='Jojo')
+    body
+      h1
+        "I like potatoes"
+
+Incidentally, diagramming with indenting works much better for very
+large trees, and is easier for a program to generate.  The C<< $tree->dump >>
+method uses indentation just that way.
+
+However you diagram the tree, it's stored the same in memory -- it's a
+network of objects, each of which has attributes like so:
+
+  element #1:  _tag: 'html'
+               _parent: none
+               _content: [element #2, element #5]
+               lang: 'en-US'
+
+  element #2:  _tag: 'head'
+               _parent: element #1
+               _content: [element #3, element #4]
+
+  element #3:  _tag: 'title'
+               _parent: element #2
+               _content: [text segment "Stuff"]
+
+  element #4   _tag: 'meta'
+               _parent: element #2
+               _content: none
+               name: author
+               content: Jojo
+
+  element #5   _tag: 'body'
+               _parent: element #1
+               _content: [element #6]
+
+  element #6   _tag: 'h1'
+               _parent: element #5
+               _content: [text segment "I like potatoes"]
+
+The "treeness" of the tree-structure that these elements comprise is
+not an aspect of any particular object, but is emergent from the
+relatedness attributes (_parent and _content) of these element-objects
+and from how you use them to get from element to element.
+
+While you could access the content of a tree by writing code that says
+"access the 'src' attribute of the root's I<first> child's I<seventh>
+child's I<third> child", you're more likely to have to scan the contents
+of a tree, looking for whatever nodes, or kinds of nodes, you want to
+do something with.  The most straightforward way to look over a tree
+is to "traverse" it; an HTML::Element method (C<< $h->traverse >>) is
+provided for this purpose; and several other HTML::Element methods are
+based on it.
+
+(For everything you ever wanted to know about trees, and then some,
+see Niklaus Wirth's I<Algorithms + Data Structures = Programs> or
+Donald Knuth's I<The Art of Computer Programming, Volume 1>.)
+
+=head2 Weak References
+
+TL;DR summary: S<C<use HTML::TreeBuilder 5 -weak;>> and forget about
+the C<delete> method (except for pruning a node from a tree).
+
+Because HTML::Element stores a reference to the parent element, Perl's
+reference-count garbage collection doesn't work properly with
+HTML::Element trees.  Starting with version 5.00, HTML::Element uses
+weak references (if available) to prevent that problem.  Weak
+references were introduced in Perl 5.6.0, but you also need a version
+of L<Scalar::Util> that provides the C<weaken> function.
+
+Weak references are enabled by default.  If you want to be certain
+they're in use, you can say S<C<use HTML::Element 5 -weak;>>.  You
+must include the version number; previous versions of HTML::Element
+ignored the import list entirely.
+
+To disable weak references, you can say S<C<use HTML::Element -noweak;>>.
+This is a global setting.  B<This feature is deprecated> and is
+provided only as a quick fix for broken code.  If your code does not
+work properly with weak references, you should fix it immediately, as
+weak references may become mandatory in a future version.  Generally,
+all you need to do is keep a reference to the root of the tree until
+you're done working with it.
+
+Because HTML::TreeBuilder is a subclass of HTML::Element, you can also
+import C<-weak> or C<-noweak> from HTML::TreeBuilder: e.g.
+S<C<use HTML::TreeBuilder: 5 -weak;>>.
+
+=head1 BASIC METHODS
+
+=head2 new
+
+  $h = HTML::Element->new('tag', 'attrname' => 'value', ... );
+
+This constructor method returns a new HTML::Element object.  The tag
+name is a required argument; it will be forced to lowercase.
+Optionally, you can specify other initial attributes at object
+creation time.
+
+=head2 attr
+
+  $value = $h->attr('attr');
+  $old_value = $h->attr('attr', $new_value);
+
+Returns (optionally sets) the value of the given attribute of C<$h>.  The
+attribute name (but not the value, if provided) is forced to
+lowercase.  If trying to read the value of an attribute not present
+for this element, the return value is undef.
+If setting a new value, the old value of that attribute is
+returned.
+
+If methods are provided for accessing an attribute (like C<< $h->tag >> for
+"_tag", C<< $h->content_list >>, etc. below), use those instead of calling
+attr C<< $h->attr >>, whether for reading or setting.
+
+Note that setting an attribute to C<undef> (as opposed to "", the empty
+string) actually deletes the attribute.
+
+=head2 tag
+
+  $tagname = $h->tag();
+  $h->tag('tagname');
+
+Returns (optionally sets) the tag name (also known as the generic
+identifier) for the element C<$h>.  In setting, the tag name is always
+converted to lower case.
+
+There are four kinds of "pseudo-elements" that show up as
+HTML::Element objects:
+
+=over
+
+=item Comment pseudo-elements
+
+These are element objects with a C<$h-E<gt>tag> value of "~comment",
+and the content of the comment is stored in the "text" attribute
+(C<$h-E<gt>attr("text")>).  For example, parsing this code with
+HTML::TreeBuilder...
+
+  <!-- I like Pie.
+     Pie is good
+  -->
+
+produces an HTML::Element object with these attributes:
+
+  "_tag",
+  "~comment",
+  "text",
+  " I like Pie.\n     Pie is good\n  "
+
+=item Declaration pseudo-elements
+
+Declarations (rarely encountered) are represented as HTML::Element
+objects with a tag name of "~declaration", and content in the "text"
+attribute.  For example, this:
+
+  <!DOCTYPE foo>
+
+produces an element whose attributes include:
+
+  "_tag", "~declaration", "text", "DOCTYPE foo"
+
+=item Processing instruction pseudo-elements
+
+PIs (rarely encountered) are represented as HTML::Element objects with
+a tag name of "~pi", and content in the "text" attribute.  For
+example, this:
+
+  <?stuff foo?>
+
+produces an element whose attributes include:
+
+  "_tag", "~pi", "text", "stuff foo?"
+
+(assuming a recent version of HTML::Parser)
+
+=item ~literal pseudo-elements
+
+These objects are not currently produced by HTML::TreeBuilder, but can
+be used to represent a "super-literal" -- i.e., a literal you want to
+be immune from escaping.  (Yes, I just made that term up.)
+
+That is, this is useful if you want to insert code into a tree that
+you plan to dump out with C<as_HTML>, where you want, for some reason,
+to suppress C<as_HTML>'s normal behavior of amp-quoting text segments.
+
+For example, this:
+
+  my $literal = HTML::Element->new('~literal',
+    'text' => 'x < 4 & y > 7'
+  );
+  my $span = HTML::Element->new('span');
+  $span->push_content($literal);
+  print $span->as_HTML;
+
+prints this:
+
+  <span>x < 4 & y > 7</span>
+
+Whereas this:
+
+  my $span = HTML::Element->new('span');
+  $span->push_content('x < 4 & y > 7');
+    # normal text segment
+  print $span->as_HTML;
+
+prints this:
+
+  <span>x &lt; 4 &amp; y &gt; 7</span>
+
+Unless you're inserting lots of pre-cooked code into existing trees,
+and dumping them out again, it's not likely that you'll find
+C<~literal> pseudo-elements useful.
+
+=back
+
+=head2 parent
+
+  $parent = $h->parent();
+  $h->parent($new_parent);
+
+Returns (optionally sets) the parent (aka "container") for this element.
+The parent should either be undef, or should be another element.
+
+You B<should not> use this to directly set the parent of an element.
+Instead use any of the other methods under "Structure-Modifying
+Methods", below.
+
+Note that C<< not($h->parent) >> is a simple test for whether C<$h> is the
+root of its subtree.
+
+=head2 content_list
+
+  @content = $h->content_list();
+  $num_children = $h->content_list();
+
+Returns a list of the child nodes of this element -- i.e., what
+nodes (elements or text segments) are inside/under this element. (Note
+that this may be an empty list.)
+
+In a scalar context, this returns the count of the items,
+as you may expect.
+
+=head2 content
+
+  $content_array_ref = $h->content(); # may return undef
+
+This somewhat deprecated method returns the content of this element;
+but unlike content_list, this returns either undef (which you should
+understand to mean no content), or a I<reference to the array> of
+content items, each of which is either a text segment (a string, i.e.,
+a defined non-reference scalar value), or an HTML::Element object.
+Note that even if an arrayref is returned, it may be a reference to an
+empty array.
+
+While older code should feel free to continue to use C<< $h->content >>,
+new code should use C<< $h->content_list >> in almost all conceivable
+cases.  It is my experience that in most cases this leads to simpler
+code anyway, since it means one can say:
+
+    @children = $h->content_list;
+
+instead of the inelegant:
+
+    @children = @{$h->content || []};
+
+If you do use C<< $h->content >> (or C<< $h->content_array_ref >>), you should not
+use the reference returned by it (assuming it returned a reference,
+and not undef) to directly set or change the content of an element or
+text segment!  Instead use L<content_refs_list> or any of the other
+methods under "Structure-Modifying Methods", below.
+
+=head2 content_array_ref
+
+  $content_array_ref = $h->content_array_ref(); # never undef
+
+This is like C<content> (with all its caveats and deprecations) except
+that it is guaranteed to return an array reference.  That is, if the
+given node has no C<_content> attribute, the C<content> method would
+return that undef, but C<content_array_ref> would set the given node's
+C<_content> value to C<[]> (a reference to a new, empty array), and
+return that.
+
+=head2 content_refs_list
+
+  @content_refs = $h->content_refs_list;
+
+This returns a list of scalar references to each element of C<$h>'s
+content list.  This is useful in case you want to in-place edit any
+large text segments without having to get a copy of the current value
+of that segment value, modify that copy, then use the
+C<splice_content> to replace the old with the new.  Instead, here you
+can in-place edit:
+
+    foreach my $item_r ($h->content_refs_list) {
+        next if ref $$item_r;
+        $$item_r =~ s/honour/honor/g;
+    }
+
+You I<could> currently achieve the same affect with:
+
+    foreach my $item (@{ $h->content_array_ref }) {
+        # deprecated!
+        next if ref $item;
+        $item =~ s/honour/honor/g;
+    }
+
+...except that using the return value of C<< $h->content >> or
+C<< $h->content_array_ref >> to do that is deprecated, and just might stop
+working in the future.
+
+=head2 implicit
+
+  $is_implicit = $h->implicit();
+  $h->implicit($make_implicit);
+
+Returns (optionally sets) the "_implicit" attribute.  This attribute is
+a flag that's used for indicating that the element was not originally
+present in the source, but was added to the parse tree (by
+HTML::TreeBuilder, for example) in order to conform to the rules of
+HTML structure.
+
+=head2 pos
+
+  $pos = $h->pos();
+  $h->pos($element);
+
+Returns (and optionally sets) the "_pos" (for "current I<pos>ition")
+pointer of C<$h>.  This attribute is a pointer used during some
+parsing operations, whose value is whatever HTML::Element element
+at or under C<$h> is currently "open", where C<< $h->insert_element(NEW) >>
+will actually insert a new element.
+
+(This has nothing to do with the Perl function called C<pos>, for
+controlling where regular expression matching starts.)
+
+If you set C<< $h->pos($element) >>, be sure that C<$element> is
+either C<$h>, or an element under C<$h>.
+
+If you've been modifying the tree under C<$h> and are no longer
+sure C<< $h->pos >> is valid, you can enforce validity with:
+
+    $h->pos(undef) unless $h->pos->is_inside($h);
+
+=head2 all_attr
+
+  %attr = $h->all_attr();
+
+Returns all this element's attributes and values, as key-value pairs.
+This will include any "internal" attributes (i.e., ones not present
+in the original element, and which will not be represented if/when you
+call C<< $h->as_HTML >>).  Internal attributes are distinguished by the fact
+that the first character of their key (not value! key!) is an
+underscore ("_").
+
+Example output of C<< $h->all_attr() >> :
+C<'_parent', >I<[object_value]>C< , '_tag', 'em', 'lang', 'en-US',
+'_content', >I<[array-ref value]>.
+
+=head2 all_attr_names
+
+  @names = $h->all_attr_names();
+  $num_attrs = $h->all_attr_names();
+
+Like C<all_attr>, but only returns the names of the attributes.
+In scalar context, returns the number of attributes.
+
+Example output of C<< $h->all_attr_names() >> :
+C<'_parent', '_tag', 'lang', '_content', >.
+
+=head2 all_external_attr
+
+  %attr = $h->all_external_attr();
+
+Like C<all_attr>, except that internal attributes are not present.
+
+=head2 all_external_attr_names
+
+  @names = $h->all_external_attr_names();
+  $num_attrs = $h->all_external_attr_names();
+
+Like C<all_attr_names>, except that internal attributes' names
+are not present (or counted).
+
+=head2 id
+
+  $id = $h->id();
+  $h->id($string);
+
+Returns (optionally sets to C<$string>) the "id" attribute.
+C<< $h->id(undef) >> deletes the "id" attribute.
+
+C<< $h->id(...) >> is basically equivalent to C<< $h->attr('id', ...) >>,
+except that when setting the attribute, this method returns the new value,
+not the old value.
+
+=head2 idf
+
+  $id = $h->idf();
+  $h->idf($string);
+
+Just like the C<id> method, except that if you call C<< $h->idf() >> and
+no "id" attribute is defined for this element, then it's set to a
+likely-to-be-unique value, and returned.  (The "f" is for "force".)
+
+=head1 STRUCTURE-MODIFYING METHODS
+
+These methods are provided for modifying the content of trees
+by adding or changing nodes as parents or children of other nodes.
+
+=head2 push_content
+
+  $h->push_content($element_or_text, ...);
+
+Adds the specified items to the I<end> of the content list of the
+element C<$h>.  The items of content to be added should each be either a
+text segment (a string), an HTML::Element object, or an arrayref.
+Arrayrefs are fed thru C<< $h->new_from_lol(that_arrayref) >> to
+convert them into elements, before being added to the content
+list of C<$h>.  This means you can say things concise things like:
+
+  $body->push_content(
+    ['br'],
+    ['ul',
+      map ['li', $_], qw(Peaches Apples Pears Mangos)
+    ]
+  );
+
+See the L</new_from_lol> method's documentation, far below, for more
+explanation.
+
+Returns C<$h> (the element itself).
+
+The push_content method will try to consolidate adjacent text segments
+while adding to the content list.  That's to say, if C<$h>'s C<content_list> is
+
+  ('foo bar ', $some_node, 'baz!')
+
+and you call
+
+   $h->push_content('quack?');
+
+then the resulting content list will be this:
+
+  ('foo bar ', $some_node, 'baz!quack?')
+
+and not this:
+
+  ('foo bar ', $some_node, 'baz!', 'quack?')
+
+If that latter is what you want, you'll have to override the
+feature of consolidating text by using splice_content,
+as in:
+
+  $h->splice_content(scalar($h->content_list),0,'quack?');
+
+Similarly, if you wanted to add 'Skronk' to the beginning of
+the content list, calling this:
+
+   $h->unshift_content('Skronk');
+
+then the resulting content list will be this:
+
+  ('Skronkfoo bar ', $some_node, 'baz!')
+
+and not this:
+
+  ('Skronk', 'foo bar ', $some_node, 'baz!')
+
+What you'd to do get the latter is:
+
+  $h->splice_content(0,0,'Skronk');
+
+=head2 unshift_content
+
+  $h->unshift_content($element_or_text, ...)
+
+Just like C<push_content>, but adds to the I<beginning> of the C<$h>
+element's content list.
+
+The items of content to be added should each be
+either a text segment (a string), an HTML::Element object, or
+an arrayref (which is fed thru C<new_from_lol>).
+
+The unshift_content method will try to consolidate adjacent text segments
+while adding to the content list.  See above for a discussion of this.
+
+Returns C<$h> (the element itself).
+
+=head2 splice_content
+
+  @removed = $h->splice_content($offset, $length,
+                                $element_or_text, ...);
+
+Detaches the elements from C<$h>'s list of content-nodes, starting at
+C<$offset> and continuing for C<$length> items, replacing them with the
+elements of the following list, if any.  Returns the elements (if any)
+removed from the content-list.  If C<$offset> is negative, then it starts
+that far from the end of the array, just like Perl's normal C<splice>
+function.  If C<$length> and the following list is omitted, removes
+everything from C<$offset> onward.
+
+The items of content to be added (if any) should each be either a text
+segment (a string), an arrayref (which is fed thru L</new_from_lol>),
+or an HTML::Element object that's not already
+a child of C<$h>.
+
+=head2 detach
+
+  $old_parent = $h->detach();
+
+This unlinks C<$h> from its parent, by setting its 'parent' attribute to
+undef, and by removing it from the content list of its parent (if it
+had one).  The return value is the parent that was detached from (or
+undef, if C<$h> had no parent to start with).  Note that neither C<$h> nor
+its parent are explicitly destroyed.
+
+=head2 detach_content
+
+  @old_content = $h->detach_content();
+
+This unlinks all of C<$h>'s children from C<$h>, and returns them.
+Note that these are not explicitly destroyed; for that, you
+can just use C<< $h->delete_content >>.
+
+=head2 replace_with
+
+  $h->replace_with( $element_or_text, ... )
+
+This replaces C<$h> in its parent's content list with the nodes
+specified.  The element C<$h> (which by then may have no parent)
+is returned.  This causes a fatal error if C<$h> has no parent.
+The list of nodes to insert may contain C<$h>, but at most once.
+Aside from that possible exception, the nodes to insert should not
+already be children of C<$h>'s parent.
+
+Also, note that this method does not destroy C<$h> if weak references are
+turned off -- use C<< $h->replace_with(...)->delete >> if you need that.
+
+=head2 preinsert
+
+  $h->preinsert($element_or_text...);
+
+Inserts the given nodes right BEFORE C<$h> in C<$h>'s parent's
+content list.  This causes a fatal error if C<$h> has no parent.
+None of the given nodes should be C<$h> or other children of C<$h>.
+Returns C<$h>.
+
+=head2 postinsert
+
+  $h->postinsert($element_or_text...)
+
+Inserts the given nodes right AFTER C<$h> in C<$h>'s parent's content
+list.  This causes a fatal error if C<$h> has no parent.  None of
+the given nodes should be C<$h> or other children of C<$h>.  Returns
+C<$h>.
+
+=head2 replace_with_content
+
+  $h->replace_with_content();
+
+This replaces C<$h> in its parent's content list with its own content.
+The element C<$h> (which by then has no parent or content of its own) is
+returned.  This causes a fatal error if C<$h> has no parent.  Also, note
+that this does not destroy C<$h> if weak references are turned off -- use
+C<< $h->replace_with_content->delete >> if you need that.
+
+=head2 delete_content
+
+  $h->delete_content();
+  $h->destroy_content(); # alias
+
+Clears the content of C<$h>, calling C<< $h->delete >> for each content
+element.  Compare with C<< $h->detach_content >>.
+
+Returns C<$h>.
+
+C<destroy_content> is an alias for this method.
+
+=head2 delete
+
+  $h->delete();
+  $h->destroy(); # alias
+
+Detaches this element from its parent (if it has one) and explicitly
+destroys the element and all its descendants.  The return value is
+the empty list (or C<undef> in scalar context).
+
+Before version 5.00 of HTML::Element, you had to call C<delete> when
+you were finished with the tree, or your program would leak memory.
+This is no longer necessary if weak references are enabled, see
+L</"Weak References">.
+
+=head2 destroy
+
+An alias for L</delete>.
+
+=head2 destroy_content
+
+An alias for L</delete_content>.
+
+=head2 clone
+
+  $copy = $h->clone();
+
+Returns a copy of the element (whose children are clones (recursively)
+of the original's children, if any).
+
+The returned element is parentless.  Any '_pos' attributes present in the
+source element/tree will be absent in the copy.  For that and other reasons,
+the clone of an HTML::TreeBuilder object that's in mid-parse (i.e, the head
+of a tree that HTML::TreeBuilder is elaborating) cannot (currently) be used
+to continue the parse.
+
+You are free to clone HTML::TreeBuilder trees, just as long as:
+1) they're done being parsed, or 2) you don't expect to resume parsing
+into the clone.  (You can continue parsing into the original; it is
+never affected.)
+
+=head2 clone_list
+
+  @copies = HTML::Element->clone_list(...nodes...);
+
+Returns a list consisting of a copy of each node given.
+Text segments are simply copied; elements are cloned by
+calling C<< $it->clone >> on each of them.
+
+Note that this must be called as a class method, not as an instance
+method.  C<clone_list> will croak if called as an instance method.
+You can also call it like so:
+
+    ref($h)->clone_list(...nodes...)
+
+=head2 normalize_content
+
+  $h->normalize_content
+
+Normalizes the content of C<$h> -- i.e., concatenates any adjacent
+text nodes.  (Any undefined text segments are turned into empty-strings.)
+Note that this does not recurse into C<$h>'s descendants.
+
+=head2 delete_ignorable_whitespace
+
+  $h->delete_ignorable_whitespace()
+
+This traverses under C<$h> and deletes any text segments that are ignorable
+whitespace.  You should not use this if C<$h> is under a C<< <pre> >> element.
+
+=head2 insert_element
+
+  $h->insert_element($element, $implicit);
+
+Inserts (via push_content) a new element under the element at
+C<< $h->pos() >>.  Then updates C<< $h->pos() >> to point to the inserted
+element, unless $element is a prototypically empty element like
+C<< <br> >>, C<< <hr> >>, C<< <img> >>, etc.
+The new C<< $h->pos() >> is returned.  This
+method is useful only if your particular tree task involves setting
+C<< $h->pos() >>.
+
+=head1 DUMPING METHODS
+
+=head2 dump
+
+  $h->dump()
+  $h->dump(*FH)  ; # or *FH{IO} or $fh_obj
+
+Prints the element and all its children to STDOUT (or to a specified
+filehandle), in a format useful
+only for debugging.  The structure of the document is shown by
+indentation (no end tags).
+
+=head2 as_HTML
+
+  $s = $h->as_HTML();
+  $s = $h->as_HTML($entities);
+  $s = $h->as_HTML($entities, $indent_char);
+  $s = $h->as_HTML($entities, $indent_char, \%optional_end_tags);
+
+Returns a string representing in HTML the element and its
+descendants.  The optional argument C<$entities> specifies a string of
+the entities to encode.  For compatibility with previous versions,
+specify C<'E<lt>E<gt>&'> here.  If omitted or undef, I<all> unsafe
+characters are encoded as HTML entities.  See L<HTML::Entities> for
+details.  If passed an empty string, no entities are encoded.
+
+If $indent_char is specified and defined, the HTML to be output is
+intented, using the string you specify (which you probably should
+set to "\t", or some number of spaces, if you specify it).
+
+If C<\%optional_end_tags> is specified and defined, it should be
+a reference to a hash that holds a true value for every tag name
+whose end tag is optional.  Defaults to
+C<\%HTML::Element::optionalEndTag>, which is an alias to
+C<%HTML::Tagset::optionalEndTag>, which, at time of writing, contains
+true values for C<p, li, dt, dd>.  A useful value to pass is an empty
+hashref, C<{}>, which means that no end-tags are optional for this dump.
+Otherwise, possibly consider copying C<%HTML::Tagset::optionalEndTag> to a
+hash of your own, adding or deleting values as you like, and passing
+a reference to that hash.
+
+=head2 as_text
+
+  $s = $h->as_text();
+  $s = $h->as_text(skip_dels => 1);
+
+Returns a string consisting of only the text parts of the element's
+descendants.  Any whitespace inside the element is included unchanged,
+but whitespace not in the tree is never added.  But remember that
+whitespace may be ignored or compacted by HTML::TreeBuilder during
+parsing (depending on the value of the C<ignore_ignorable_whitespace>
+and C<no_space_compacting> attributes).  Also, since whitespace is
+never added during parsing,
+
+  HTML::TreeBuilder->new_from_content("<p>a</p><p>b</p>")
+                   ->as_text;
+
+returns C<"ab">, not C<"a b"> or C<"a\nb">.
+
+Text under C<< <script> >> or C<< <style> >> elements is never
+included in what's returned.  If C<skip_dels> is true, then text
+content under C<< <del> >> nodes is not included in what's returned.
+
+=head2 as_trimmed_text
+
+  $s = $h->as_trimmed_text(...);
+  $s = $h->as_trimmed_text(extra_chars => '\xA0'); # remove &nbsp;
+  $s = $h->as_text_trimmed(...); # alias
+
+This is just like C<as_text(...)> except that leading and trailing
+whitespace is deleted, and any internal whitespace is collapsed.
+
+This will not remove non-breaking spaces, Unicode spaces, or any other
+non-ASCII whitespace unless you supply the extra characters as
+a string argument (e.g. C<< $h->as_trimmed_text(extra_chars => '\xA0') >>).
+C<extra_chars> may be any string that can appear inside a character
+class, including ranges like C<a-z>, POSIX character classes like
+C<[:alpha:]>, and character class escapes like C<\p{Zs}>.
+
+=head2 as_XML
+
+  $s = $h->as_XML()
+
+Returns a string representing in XML the element and its descendants.
+
+The XML is not indented.
+
+=head2 as_Lisp_form
+
+  $s = $h->as_Lisp_form();
+
+Returns a string representing the element and its descendants as a
+Lisp form.  Unsafe characters are encoded as octal escapes.
+
+The Lisp form is indented, and contains external ("href", etc.)  as
+well as internal attributes ("_tag", "_content", "_implicit", etc.),
+except for "_parent", which is omitted.
+
+Current example output for a given element:
+
+  ("_tag" "img" "border" "0" "src" "pie.png" "usemap" "#main.map")
+
+=head2 format
+
+  $s = $h->format; # use HTML::FormatText
+  $s = $h->format($formatter);
+
+Formats text output. Defaults to HTML::FormatText.
+
+Takes a second argument that is a reference to a formatter.
+
+=head2 starttag
+
+  $start = $h->starttag();
+  $start = $h->starttag($entities);
+
+Returns a string representing the complete start tag for the element.
+I.e., leading "<", tag name, attributes, and trailing ">".
+All values are surrounded with
+double-quotes, and appropriate characters are encoded.  If C<$entities>
+is omitted or undef, I<all> unsafe characters are encoded as HTML
+entities.  See L<HTML::Entities> for details.  If you specify some
+value for C<$entities>, remember to include the double-quote character in
+it.  (Previous versions of this module would basically behave as if
+C<'&"E<gt>'> were specified for C<$entities>.)  If C<$entities> is
+an empty string, no entity is escaped.
+
+=head2 starttag_XML
+
+  $start = $h->starttag_XML();
+
+Returns a string representing the complete start tag for the element.
+
+=head2 endtag
+
+  $end = $h->endtag();
+
+Returns a string representing the complete end tag for this element.
+I.e., "</", tag name, and ">".
+
+=head2 endtag_XML
+
+  $end = $h->endtag_XML();
+
+Returns a string representing the complete end tag for this element.
+I.e., "</", tag name, and ">".
+
+=head1 SECONDARY STRUCTURAL METHODS
+
+These methods all involve some structural aspect of the tree;
+either they report some aspect of the tree's structure, or they involve
+traversal down the tree, or walking up the tree.
+
+=head2 is_inside
+
+  $inside = $h->is_inside('tag', $element, ...);
+
+Returns true if the C<$h> element is, or is contained anywhere inside an
+element that is any of the ones listed, or whose tag name is any of
+the tag names listed.  You can use any mix of elements and tag names.
+
+=head2 is_empty
+
+  $empty = $h->is_empty();
+
+Returns true if C<$h> has no content, i.e., has no elements or text
+segments under it.  In other words, this returns true if C<$h> is a leaf
+node, AKA a terminal node.  Do not confuse this sense of "empty" with
+another sense that it can have in SGML/HTML/XML terminology, which
+means that the element in question is of the type (like HTML's C<< <hr> >>,
+C<< <br> >>, C<< <img> >>, etc.) that I<can't> have any content.
+
+That is, a particular C<< <p> >> element may happen to have no content, so
+$that_p_element->is_empty will be true -- even though the prototypical
+C<< <p> >> element isn't "empty" (not in the way that the prototypical
+C<< <hr> >> element is).
+
+If you think this might make for potentially confusing code, consider
+simply using the clearer exact equivalent:  C<< not($h->content_list) >>.
+
+=head2 pindex
+
+  $index = $h->pindex();
+
+Return the index of the element in its parent's contents array, such
+that C<$h> would equal
+
+  $h->parent->content->[$h->pindex]
+  # or
+  ($h->parent->content_list)[$h->pindex]
+
+assuming C<$h> isn't root.  If the element C<$h> is root, then
+C<< $h->pindex >> returns C<undef>.
+
+=head2 left
+
+  $element = $h->left();
+  @elements = $h->left();
+
+In scalar context: returns the node that's the immediate left sibling
+of C<$h>.  If C<$h> is the leftmost (or only) child of its parent (or has no
+parent), then this returns undef.
+
+In list context: returns all the nodes that're the left siblings of C<$h>
+(starting with the leftmost).  If C<$h> is the leftmost (or only) child
+of its parent (or has no parent), then this returns an empty list.
+
+(See also C<< $h->preinsert(LIST) >>.)
+
+=head2 right
+
+  $element = $h->right();
+  @elements = $h->right();
+
+In scalar context: returns the node that's the immediate right sibling
+of C<$h>.  If C<$h> is the rightmost (or only) child of its parent (or has
+no parent), then this returns C<undef>.
+
+In list context: returns all the nodes that're the right siblings of
+C<$h>, starting with the leftmost.  If C<$h> is the rightmost (or only) child
+of its parent (or has no parent), then this returns an empty list.
+
+(See also C<< $h->postinsert(LIST) >>.)
+
+=head2 address
+
+  $address = $h->address();
+  $element_or_text = $h->address($address);
+
+The first form (with no parameter) returns a string representing the
+location of C<$h> in the tree it is a member of.
+The address consists of numbers joined by a '.', starting with '0',
+and followed by the pindexes of the nodes in the tree that are
+ancestors of C<$h>, starting from the top.
+
+So if the way to get to a node starting at the root is to go to child
+2 of the root, then child 10 of that, and then child 0 of that, and
+then you're there -- then that node's address is "0.2.10.0".
+
+As a bit of a special case, the address of the root is simply "0".
+
+I forsee this being used mainly for debugging, but you may
+find your own uses for it.
+
+  $element_or_text = $h->address($address);
+
+This form returns the node (whether element or text-segment) at
+the given address in the tree that C<$h> is a part of.  (That is,
+the address is resolved starting from C<< $h->root >>.)
+
+If there is no node at the given address, this returns C<undef>.
+
+You can specify "relative addressing" (i.e., that indexing is supposed
+to start from C<$h> and not from C<< $h->root >>) by having the address start
+with a period -- e.g., C<< $h->address(".3.2") >> will look at child 3 of C<$h>,
+and child 2 of that.
+
+=head2 depth
+
+  $depth = $h->depth();
+
+Returns a number expressing C<$h>'s depth within its tree, i.e., how many
+steps away it is from the root.  If C<$h> has no parent (i.e., is root),
+its depth is 0.
+
+=head2 root
+
+  $root = $h->root();
+
+Returns the element that's the top of C<$h>'s tree.  If C<$h> is
+root, this just returns C<$h>.  (If you want to test whether C<$h>
+I<is> the root, instead of asking what its root is, just test
+C<< not($h->parent) >>.)
+
+=head2 lineage
+
+  @lineage = $h->lineage();
+
+Returns the list of C<$h>'s ancestors, starting with its parent,
+and then that parent's parent, and so on, up to the root.  If C<$h>
+is root, this returns an empty list.
+
+If you simply want a count of the number of elements in C<$h>'s lineage,
+use C<< $h->depth >>.
+
+=head2 lineage_tag_names
+
+  @names = $h->lineage_tag_names();
+
+Returns the list of the tag names of C<$h>'s ancestors, starting
+with its parent, and that parent's parent, and so on, up to the
+root.  If C<$h> is root, this returns an empty list.
+Example output: C<('em', 'td', 'tr', 'table', 'body', 'html')>
+
+Equivalent to:
+
+  map { $_->tag } $h->lineage;
+
+=head2 descendants
+
+  @descendants = $h->descendants();
+
+In list context, returns the list of all C<$h>'s descendant elements,
+listed in pre-order (i.e., an element appears before its
+content-elements).  Text segments DO NOT appear in the list.
+In scalar context, returns a count of all such elements.
+
+=head2 descendents
+
+This is just an alias to the C<descendants> method, for people who
+can't spell.
+
+=head2 find_by_tag_name
+
+  @elements = $h->find_by_tag_name('tag', ...);
+  $first_match = $h->find_by_tag_name('tag', ...);
+
+In list context, returns a list of elements at or under C<$h> that have
+any of the specified tag names.  In scalar context, returns the first
+(in pre-order traversal of the tree) such element found, or undef if
+none.
+
+=head2 find
+
+This is just an alias to C<find_by_tag_name>.  (There was once
+going to be a whole find_* family of methods, but then C<look_down>
+filled that niche, so there turned out not to be much reason for the
+verboseness of the name "find_by_tag_name".)
+
+=head2 find_by_attribute
+
+  @elements = $h->find_by_attribute('attribute', 'value');
+  $first_match = $h->find_by_attribute('attribute', 'value');
+
+In a list context, returns a list of elements at or under C<$h> that have
+the specified attribute, and have the given value for that attribute.
+In a scalar context, returns the first (in pre-order traversal of the
+tree) such element found, or undef if none.
+
+This method is B<deprecated> in favor of the more expressive
+C<look_down> method, which new code should use instead.
+
+=head2 look_down
+
+  @elements = $h->look_down( ...criteria... );
+  $first_match = $h->look_down( ...criteria... );
+
+This starts at C<$h> and looks thru its element descendants (in
+pre-order), looking for elements matching the criteria you specify.
+In list context, returns all elements that match all the given
+criteria; in scalar context, returns the first such element (or undef,
+if nothing matched).
+
+There are three kinds of criteria you can specify:
+
+=over
+
+=item (attr_name, attr_value)
+
+This means you're looking for an element with that value for that
+attribute.  Example: C<"alt", "pix!">.  Consider that you can search
+on internal attribute values too: C<"_tag", "p">.
+
+=item (attr_name, qr/.../)
+
+This means you're looking for an element whose value for that
+attribute matches the specified Regexp object.
+
+=item a coderef
+
+This means you're looking for elements where coderef->(each_element)
+returns true.  Example:
+
+  my @wide_pix_images = $h->look_down(
+    _tag => "img",
+    alt  => "pix!",
+    sub { $_[0]->attr('width') > 350 }
+  );
+
+=back
+
+Note that C<(attr_name, attr_value)> and C<(attr_name, qr/.../)>
+criteria are almost always faster than coderef
+criteria, so should presumably be put before them in your list of
+criteria.  That is, in the example above, the sub ref is called only
+for elements that have already passed the criteria of having a "_tag"
+attribute with value "img", and an "alt" attribute with value "pix!".
+If the coderef were first, it would be called on every element, and
+I<then> what elements pass that criterion (i.e., elements for which
+the coderef returned true) would be checked for their "_tag" and "alt"
+attributes.
+
+Note that comparison of string attribute-values against the string
+value in C<(attr_name, attr_value)> is case-INsensitive!  A criterion
+of C<('align', 'right')> I<will> match an element whose "align" value
+is "RIGHT", or "right" or "rIGhT", etc.
+
+Note also that C<look_down> considers "" (empty-string) and undef to
+be different things, in attribute values.  So this:
+
+  $h->look_down("alt", "")
+
+will find elements I<with> an "alt" attribute, but where the value for
+the "alt" attribute is "".  But this:
+
+  $h->look_down("alt", undef)
+
+is the same as:
+
+  $h->look_down(sub { !defined($_[0]->attr('alt')) } )
+
+That is, it finds elements that do not have an "alt" attribute at all
+(or that do have an "alt" attribute, but with a value of undef --
+which is not normally possible).
+
+Note that when you give several criteria, this is taken to mean you're
+looking for elements that match I<all> your criterion, not just I<any>
+of them.  In other words, there is an implicit "and", not an "or".  So
+if you wanted to express that you wanted to find elements with a
+"name" attribute with the value "foo" I<or> with an "id" attribute
+with the value "baz", you'd have to do it like:
+
+  @them = $h->look_down(
+    sub {
+      # the lcs are to fold case
+      lc($_[0]->attr('name')) eq 'foo'
+      or lc($_[0]->attr('id')) eq 'baz'
+    }
+  );
+
+Coderef criteria are more expressive than C<(attr_name, attr_value)>
+and C<(attr_name, qr/.../)>
+criteria, and all C<(attr_name, attr_value)>
+and C<(attr_name, qr/.../)>
+criteria could be
+expressed in terms of coderefs.  However, C<(attr_name, attr_value)>
+and C<(attr_name, qr/.../)>
+criteria are a convenient shorthand.  (In fact, C<look_down> itself is
+basically "shorthand" too, since anything you can do with C<look_down>
+you could do by traversing the tree, either with the C<traverse>
+method or with a routine of your own.  However, C<look_down> often
+makes for very concise and clear code.)
+
+=head2 look_up
+
+  @elements = $h->look_up( ...criteria... );
+  $first_match = $h->look_up( ...criteria... );
+
+This is identical to C<< $h->look_down >>, except that whereas
+C<< $h->look_down >>
+basically scans over the list:
+
+   ($h, $h->descendants)
+
+C<< $h->look_up >> instead scans over the list
+
+   ($h, $h->lineage)
+
+So, for example, this returns all ancestors of C<$h> (possibly including
+C<$h> itself) that are C<< <td> >> elements with an "align" attribute with a
+value of "right" (or "RIGHT", etc.):
+
+   $h->look_up("_tag", "td", "align", "right");
+
+=head2 traverse
+
+  $h->traverse(...options...)
+
+Lengthy discussion of HTML::Element's unnecessary and confusing
+C<traverse> method has been moved to a separate file:
+L<HTML::Element::traverse>
+
+=head2 attr_get_i
+
+  @values = $h->attr_get_i('attribute');
+  $first_value = $h->attr_get_i('attribute');
+
+In list context, returns a list consisting of the values of the given
+attribute for C<$h> and for all its ancestors starting from C<$h> and
+working its way up.  Nodes with no such attribute are skipped.
+("attr_get_i" stands for "attribute get, with inheritance".)
+In scalar context, returns the first such value, or undef if none.
+
+Consider a document consisting of:
+
+   <html lang='i-klingon'>
+     <head><title>Pati Pata</title></head>
+     <body>
+       <h1 lang='la'>Stuff</h1>
+       <p lang='es-MX' align='center'>
+         Foo bar baz <cite>Quux</cite>.
+       </p>
+       <p>Hooboy.</p>
+     </body>
+   </html>
+
+If C<$h> is the C<< <cite> >> element, C<< $h->attr_get_i("lang") >>
+in list context will return the list C<('es-MX', 'i-klingon')>.
+In scalar context, it will return the value C<'es-MX'>.
+
+If you call with multiple attribute names...
+
+  @values = $h->attr_get_i('a1', 'a2', 'a3');
+  $first_value = $h->attr_get_i('a1', 'a2', 'a3');
+
+...in list context, this will return a list consisting of
+the values of these attributes which exist in C<$h> and its ancestors.
+In scalar context, this returns the first value (i.e., the value of
+the first existing attribute from the first element that has
+any of the attributes listed).  So, in the above example,
+
+  $h->attr_get_i('lang', 'align');
+
+will return:
+
+   ('es-MX', 'center', 'i-klingon') # in list context
+  or
+   'es-MX' # in scalar context.
+
+But note that this:
+
+ $h->attr_get_i('align', 'lang');
+
+will return:
+
+   ('center', 'es-MX', 'i-klingon') # in list context
+  or
+   'center' # in scalar context.
+
+=head2 tagname_map
+
+  $hash_ref = $h->tagname_map();
+
+Scans across C<$h> and all its descendants, and makes a hash (a
+reference to which is returned) where each entry consists of a key
+that's a tag name, and a value that's a reference to a list to all
+elements that have that tag name.  I.e., this method returns:
+
+   {
+     # Across $h and all descendants...
+     'a'   => [ ...list of all <a>   elements... ],
+     'em'  => [ ...list of all <em>  elements... ],
+     'img' => [ ...list of all <img> elements... ],
+   }
+
+(There are entries in the hash for only those tagnames that occur
+at/under C<$h> -- so if there's no C<< <img> >> elements, there'll be no
+"img" entry in the returned hashref.)
+
+Example usage:
+
+    my $map_r = $h->tagname_map();
+    my @heading_tags = sort grep m/^h\d$/s, keys %$map_r;
+    if(@heading_tags) {
+      print "Heading levels used: @heading_tags\n";
+    } else {
+      print "No headings.\n"
+    }
+
+=head2 extract_links
+
+  $links_array_ref = $h->extract_links();
+  $links_array_ref = $h->extract_links(@wantedTypes);
+
+Returns links found by traversing the element and all of its children
+and looking for attributes (like "href" in an C<< <a> >> element, or "src" in
+an C<< <img> >> element) whose values represent links.  The return value is a
+I<reference> to an array.  Each element of the array is reference to
+an array with I<four> items: the link-value, the element that has the
+attribute with that link-value, and the name of that attribute, and
+the tagname of that element.
+(Example: C<['http://www.suck.com/',> I<$elem_obj> C<, 'href', 'a']>.)
+You may or may not end up using the
+element itself -- for some purposes, you may use only the link value.
+
+You might specify that you want to extract links from just some kinds
+of elements (instead of the default, which is to extract links from
+I<all> the kinds of elements known to have attributes whose values
+represent links).  For instance, if you want to extract links from
+only C<< <a> >> and C<< <img> >> elements, you could code it like this:
+
+  for (@{  $e->extract_links('a', 'img')  }) {
+      my($link, $element, $attr, $tag) = @$_;
+      print
+        "Hey, there's a $tag that links to ",
+        $link, ", in its $attr attribute, at ",
+        $element->address(), ".\n";
+  }
+
+=head2 simplify_pres
+
+  $h->simplify_pres();
+
+In text bits under PRE elements that are at/under C<$h>, this routine
+nativizes all newlines, and expands all tabs.
+
+That is, if you read a file with lines delimited by C<\cm\cj>'s, the
+text under PRE areas will have C<\cm\cj>'s instead of C<\n>'s. Calling
+C<< $h->simplify_pres >> on such a tree will turn C<\cm\cj>'s into
+C<\n>'s.
+
+Tabs are expanded to however many spaces it takes to get
+to the next 8th column -- the usual way of expanding them.
+
+=head2 same_as
+
+  $equal = $h->same_as($i)
+
+Returns true if C<$h> and C<$i> are both elements representing the same tree
+of elements, each with the same tag name, with the same explicit
+attributes (i.e., not counting attributes whose names start with "_"),
+and with the same content (textual, comments, etc.).
+
+Sameness of descendant elements is tested, recursively, with
+C<< $child1->same_as($child_2) >>, and sameness of text segments is tested
+with C<$segment1 eq $segment2>.
+
+=head2 new_from_lol
+
+  $h = HTML::Element->new_from_lol($array_ref);
+  @elements = HTML::Element->new_from_lol($array_ref, ...);
+
+Resursively constructs a tree of nodes, based on the (non-cyclic)
+data structure represented by each C<$array_ref>, where that is a reference
+to an array of arrays (of arrays (of arrays (etc.))).
+
+In each arrayref in that structure, different kinds of values are
+treated as follows:
+
+=over
+
+=item * Arrayrefs
+
+Arrayrefs are considered to
+designate a sub-tree representing children for the node constructed
+from the current arrayref.
+
+=item * Hashrefs
+
+Hashrefs are considered to contain
+attribute-value pairs to add to the element to be constructed from
+the current arrayref
+
+=item * Text segments
+
+Text segments at the start of any arrayref
+will be considered to specify the name of the element to be
+constructed from the current arrayref; all other text segments will
+be considered to specify text segments as children for the current
+arrayref.
+
+=item * Elements
+
+Existing element objects are either inserted into the treelet
+constructed, or clones of them are.  That is, when the lol-tree is
+being traversed and elements constructed based what's in it, if
+an existing element object is found, if it has no parent, then it is
+added directly to the treelet constructed; but if it has a parent,
+then C<$that_node-E<gt>clone> is added to the treelet at the
+appropriate place.
+
+=back
+
+An example will hopefully make this more obvious:
+
+  my $h = HTML::Element->new_from_lol(
+    ['html',
+      ['head',
+        [ 'title', 'I like stuff!' ],
+      ],
+      ['body',
+        {'lang', 'en-JP', _implicit => 1},
+        'stuff',
+        ['p', 'um, p < 4!', {'class' => 'par123'}],
+        ['div', {foo => 'bar'}, '123'],
+      ]
+    ]
+  );
+  $h->dump;
+
+Will print this:
+
+  <html> @0
+    <head> @0.0
+      <title> @0.0.0
+        "I like stuff!"
+    <body lang="en-JP"> @0.1 (IMPLICIT)
+      "stuff"
+      <p class="par123"> @0.1.1
+        "um, p < 4!"
+      <div foo="bar"> @0.1.2
+        "123"
+
+And printing $h->as_HTML will give something like:
+
+  <html><head><title>I like stuff!</title></head>
+  <body lang="en-JP">stuff<p class="par123">um, p &lt; 4!
+  <div foo="bar">123</div></body></html>
+
+You can even do fancy things with C<map>:
+
+  $body->push_content(
+    # push_content implicitly calls new_from_lol on arrayrefs...
+    ['br'],
+    ['blockquote',
+      ['h2', 'Pictures!'],
+      map ['p', $_],
+      $body2->look_down("_tag", "img"),
+        # images, to be copied from that other tree.
+    ],
+    # and more stuff:
+    ['ul',
+      map ['li', ['a', {'href'=>"$_.png"}, $_ ] ],
+      qw(Peaches Apples Pears Mangos)
+    ],
+  );
+
+In scalar context, you must supply exactly one arrayref.  In list
+context, you can pass a list of arrayrefs, and L<new_from_lol> will
+return a list of elements, one for each arrayref.
+
+  @elements = HTML::Element->new_from_lol(
+    ['hr'],
+    ['p', 'And there, on the door, was a hook!'],
+  );
+   # constructs two elements.
+
+=head2 objectify_text
+
+  $h->objectify_text();
+
+This turns any text nodes under C<$h> from mere text segments (strings)
+into real objects, pseudo-elements with a tag-name of "~text", and the
+actual text content in an attribute called "text".  (For a discussion
+of pseudo-elements, see the L</"tag"> method, far above.)  This method is
+provided because, for some purposes, it is convenient or necessary to
+be able, for a given text node, to ask what element is its parent; and
+clearly this is not possible if a node is just a text string.
+
+Note that these "~text" objects are not recognized as text nodes by
+methods like L</as_text>.  Presumably you will want to call
+C<< $h->objectify_text >>, perform whatever task that you needed that for,
+and then call C<< $h->deobjectify_text >> before calling anything like
+C<< $h->as_text >>.
+
+=head2 deobjectify_text
+
+  $h->deobjectify_text();
+
+This undoes the effect of C<< $h->objectify_text >>.  That is, it takes any
+"~text" pseudo-elements in the tree at/under C<$h>, and deletes each one,
+replacing each with the content of its "text" attribute.
+
+Note that if C<$h> itself is a "~text" pseudo-element, it will be
+destroyed -- a condition you may need to treat specially in your
+calling code (since it means you can't very well do anything with C<$h>
+after that).  So that you can detect that condition, if C<$h> is itself a
+"~text" pseudo-element, then this method returns the value of the
+"text" attribute, which should be a defined value; in all other cases,
+it returns undef.
+
+(This method assumes that no "~text" pseudo-element has any children.)
+
+=head2 number_lists
+
+  $h->number_lists();
+
+For every UL, OL, DIR, and MENU element at/under C<$h>, this sets a
+"_bullet" attribute for every child LI element.  For LI children of an
+OL, the "_bullet" attribute's value will be something like "4.", "d.",
+"D.", "IV.", or "iv.", depending on the OL element's "type" attribute.
+LI children of a UL, DIR, or MENU get their "_bullet" attribute set
+to "*".
+There should be no other LIs (i.e., except as children of OL, UL, DIR,
+or MENU elements), and if there are, they are unaffected.
+
+=head2 has_insane_linkage
+
+  $h->has_insane_linkage
+
+This method is for testing whether this element or the elements
+under it have linkage attributes (_parent and _content) whose values
+are deeply aberrant: if there are undefs in a content list; if an
+element appears in the content lists of more than one element;
+if the _parent attribute of an element doesn't match its actual
+parent; or if an element appears as its own descendant (i.e.,
+if there is a cyclicity in the tree).
+
+This returns empty list (or false, in scalar context) if the subtree's
+linkage methods are sane; otherwise it returns two items (or true, in
+scalar context): the element where the error occurred, and a string
+describing the error.
+
+This method is provided is mainly for debugging and troubleshooting --
+it should be I<quite impossible> for any document constructed via
+HTML::TreeBuilder to parse into a non-sane tree (since it's not
+the content of the tree per se that's in question, but whether
+the tree in memory was properly constructed); and it I<should> be
+impossible for you to produce an insane tree just thru reasonable
+use of normal documented structure-modifying methods.  But if you're
+constructing your own trees, and your program is going into infinite
+loops as during calls to traverse() or any of the secondary
+structural methods, as part of debugging, consider calling
+C<has_insane_linkage> on the tree.
+
+=head2 element_class
+
+  $classname = $h->element_class();
+
+This method returns the class which will be used for new elements.  It
+defaults to HTML::Element, but can be overridden by subclassing or esoteric
+means best left to those will will read the source and then not complain when
+those esoteric means change.  (Just subclass.)
+
+=head1 CLASS METHODS
+
+=head2 Use_Weak_Refs
+
+  $enabled = HTML::Element->Use_Weak_Refs;
+  HTML::Element->Use_Weak_Refs( $enabled );
+
+This method allows you to check whether weak reference support is
+enabled, and to enable or disable it. For details, see L</"Weak References">.
+C<$enabled> is true if weak references are enabled.
+
+You should not switch this in the middle of your program, and you
+probably shouldn't use it at all.  Existing trees are not affected by
+this method (until you start modifying nodes in them).
+
+Throws an exception if you attempt to enable weak references and your
+Perl or Scalar::Util does not support them.
+
+Disabling weak reference support is deprecated.
+
+=head1 SUBROUTINES
+
+=head2 Version
+
+This subroutine is deprecated.  Please use the standard VERSION method
+(e.g. C<< HTML::Element->VERSION >>) instead.
+
+=head2 ABORT OK PRUNE PRUNE_SOFTLY PRUNE_UP
+
+Constants for signalling back to the traverser
+
+=for Pod::Coverage as_text_trimmed
+
+=head1 BUGS
+
+* If you want to free the memory associated with a tree built of
+HTML::Element nodes, and you have disabled weak references, then you
+will have to delete it explicitly using the L</delete> method.
+See L</"Weak References">.
+
+* There's almost nothing to stop you from making a "tree" with
+cyclicities (loops) in it, which could, for example, make the
+traverse method go into an infinite loop.  So don't make
+cyclicities!  (If all you're doing is parsing HTML files,
+and looking at the resulting trees, this will never be a problem
+for you.)
+
+* There's no way to represent comments or processing directives
+in a tree with HTML::Elements.  Not yet, at least.
+
+* There's (currently) nothing to stop you from using an undefined
+value as a text segment.  If you're running under C<perl -w>, however,
+this may make HTML::Element's code produce a slew of warnings.
+
+=head1 NOTES ON SUBCLASSING
+
+You are welcome to derive subclasses from HTML::Element, but you
+should be aware that the code in HTML::Element makes certain
+assumptions about elements (and I'm using "element" to mean ONLY an
+object of class HTML::Element, or of a subclass of HTML::Element):
+
+* The value of an element's _parent attribute must either be undef or
+otherwise false, or must be an element.
+
+* The value of an element's _content attribute must either be undef or
+otherwise false, or a reference to an (unblessed) array.  The array
+may be empty; but if it has items, they must ALL be either mere
+strings (text segments), or elements.
+
+* The value of an element's _tag attribute should, at least, be a
+string of printable characters.
+
+Moreover, bear these rules in mind:
+
+* Do not break encapsulation on objects.  That is, access their
+contents only thru $obj->attr or more specific methods.
+
+* You should think twice before completely overriding any of the
+methods that HTML::Element provides.  (Overriding with a method that
+calls the superclass method is not so bad, though.)
+
+=head1 SEE ALSO
+
+L<HTML::Tree>; L<HTML::TreeBuilder>; L<HTML::AsSubs>; L<HTML::Tagset>;
+and, for the morbidly curious, L<HTML::Element::traverse>.
+
+=head1 ACKNOWLEDGEMENTS
+
+Thanks to Mark-Jason Dominus for a POD suggestion.
+
+=head1 AUTHOR
+
+Current maintainers:
+
+=over
+
+=item * Christopher J. Madsen S<C<< <perl AT cjmweb.net> >>>
+
+=item * Jeff Fearn S<C<< <jfearn AT cpan.org> >>>
+
+=back
+
+Original HTML-Tree author:
+
+=over
+
+=item * Gisle Aas
+
+=back
+
+Former maintainers:
+
+=over
+
+=item * Sean M. Burke
+
+=item * Andy Lester
+
+=item * Pete Krawczyk S<C<< <petek AT cpan.org> >>>
+
+=back
+
+You can follow or contribute to HTML-Tree's development at
+L<< http://github.com/madsen/HTML-Tree >>.
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 1995-1998 Gisle Aas, 1999-2004 Sean M. Burke,
+2005 Andy Lester, 2006 Pete Krawczyk, 2010 Jeff Fearn,
+2012 Christopher J. Madsen.
 
 This library is free software; you can redistribute it and/or
 modify it under the same terms as Perl itself.
 
+The programs in this library are distributed in the hope that they
+will be useful, but without any warranty; without even the implied
+warranty of merchantability or fitness for a particular purpose.
+
 =cut
diff --git a/lib/site/HTML/FormatMarkdown.pm b/lib/site/HTML/FormatMarkdown.pm
new file mode 100644
index 000000000..600f3a5cf
--- /dev/null
+++ b/lib/site/HTML/FormatMarkdown.pm
@@ -0,0 +1,375 @@
+package HTML::FormatMarkdown;
+
+# ABSTRACT: Format HTML as Markdown
+
+
+use 5.006_001;
+use strict;
+use warnings;
+
+use parent 'HTML::Formatter';
+
+our $VERSION = '2.14'; # VERSION
+our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
+
+sub default_values {
+    (   shift->SUPER::default_values(),
+        lm => 0,
+        rm => 70,
+    );
+}
+
+sub configure {
+    my ( $self, $hash ) = @_;
+
+    my $lm = $self->{lm};
+    my $rm = $self->{rm};
+
+    $lm = delete $hash->{lm}          if exists $hash->{lm};
+    $lm = delete $hash->{leftmargin}  if exists $hash->{leftmargin};
+    $rm = delete $hash->{rm}          if exists $hash->{rm};
+    $rm = delete $hash->{rightmargin} if exists $hash->{rightmargin};
+
+    my $width = $rm - $lm;
+    if ( $width < 1 ) {
+        warn "Bad margins, ignored" if $^W;
+        return;
+    }
+    if ( $width < 20 ) {
+        warn "Page probably too narrow" if $^W;
+    }
+
+    for ( keys %$hash ) {
+        warn "Unknown configure option '$_'" if $^W;
+    }
+
+    $self->{lm} = $lm;
+    $self->{rm} = $rm;
+    $self;
+}
+
+sub begin {
+    my $self = shift;
+
+    $self->SUPER::begin();
+    $self->{maxpos} = 0;
+    $self->{curpos} = 0;    # current output position.
+}
+
+sub end {
+    shift->collect("\n");
+}
+
+sub header_start {
+    my ( $self, $level ) = @_;
+
+    $self->vspace(1);
+    $self->out( '#' x $level . ' ' );
+    1;
+}
+
+sub header_end {
+    my ( $self, $level ) = @_;
+
+    $self->out( ' ' . '#' x $level );
+    $self->vspace(1);
+}
+
+sub bullet {
+    my $self = shift;
+
+    $self->SUPER::bullet( $_[0] . ' ' );
+
+}
+
+sub hr_start {
+    my $self = shift;
+
+    $self->vspace(1);
+    $self->out('- - -');
+    $self->vspace(1);
+}
+
+sub img_start {
+    my ( $self, $node ) = @_;
+
+    my $alt = $node->attr('alt');
+    my $src = $node->attr('src');
+
+    $self->out("![$alt]($src)");
+}
+
+sub a_start {
+    my ( $self, $node ) = @_;
+
+    # ignore named anchors
+    if ( $node->attr('name') ) {
+        1;
+    }
+    elsif ( $node->attr('href') =~ /^#/ ) {
+        1;
+    }
+    else {
+        $self->out("[");
+    }
+
+}
+
+sub a_end {
+    my ( $self, $node ) = @_;
+
+    if ( $node->attr('name') ) {
+        return;
+    }
+    elsif ( my $href = $node->attr('href') ) {
+        if ( $href =~ /^#/ ) {
+            return;
+        }
+        $self->out("]($href)");
+    }
+}
+
+sub b_start { shift->out("**") }
+sub b_end   { shift->out("**") }
+sub i_start { shift->out("*") }
+sub i_end   { shift->out("*") }
+
+sub tt_start {
+    my $self = shift;
+
+    if ( $self->{pre} ) {
+        return 1;
+    }
+    else {
+        $self->out("`");
+    }
+}
+
+sub tt_end {
+    my $self = shift;
+
+    if ( $self->{pre} ) {
+        return;
+    }
+    else {
+        $self->out("`");
+    }
+}
+
+sub blockquote_start {
+    my $self = shift;
+
+    $self->{blockquote}++;
+    $self->vspace(1);
+    $self->adjust_rm(-4);
+
+    1;
+}
+
+sub blockquote_end {
+    my $self = shift;
+
+    $self->{blockquote}--;
+    $self->vspace(1);
+    $self->adjust_rm(+4);
+
+}
+
+sub blockquote_out {
+    my ( $self, $text ) = @_;
+
+    $self->nl;
+    $self->goto_lm;
+
+    my $line = "> ";
+    $self->{curpos} += 2;
+
+    foreach my $word ( split /\s/, $text ) {
+        $line .= "$word ";
+        if ( ( $self->{curpos} + length($line) ) > $self->{rm} ) {
+            $self->collect($line);
+            $self->nl;
+            $self->goto_lm;
+            $line = "> ";
+            $self->{curpos} += 2;
+        }
+    }
+
+    $self->collect($line);
+    $self->nl;
+
+}
+
+# Quoted from HTML::FormatText
+sub pre_out {
+    my $self = shift;
+
+    if ( defined $self->{vspace} ) {
+        if ( $self->{out} ) {
+            $self->nl() while $self->{vspace}-- >= 0;
+            $self->{vspace} = undef;
+        }
+    }
+
+    my $indent = ' ' x $self->{lm};
+    $indent .= ' ' x 4;
+    my $pre = shift;
+    $pre =~ s/^/$indent/mg;
+    $self->collect($pre);
+    $self->{out}++;
+}
+
+sub out {
+    my $self = shift;
+    my $text = shift;
+
+    $text =~ tr/\xA0\xAD/ /d;
+
+    if ( $text =~ /^\s*$/ ) {
+        $self->{hspace} = 1;
+        return;
+    }
+
+    if ( defined $self->{vspace} ) {
+        if ( $self->{out} ) {
+            $self->nl while $self->{vspace}-- >= 0;
+        }
+        $self->goto_lm;
+        $self->{vspace} = undef;
+        $self->{hspace} = 0;
+    }
+
+    if ( $self->{hspace} ) {
+        if ( $self->{curpos} + length($text) > $self->{rm} ) {
+
+            # word will not fit on line; do a line break
+            $self->nl;
+            $self->goto_lm;
+        }
+        else {
+
+            # word fits on line; use a space
+            $self->collect(' ');
+            ++$self->{curpos};
+        }
+        $self->{hspace} = 0;
+    }
+
+    $self->collect($text);
+    my $pos = $self->{curpos} += length $text;
+    $self->{maxpos} = $pos if $self->{maxpos} < $pos;
+    $self->{'out'}++;
+}
+
+sub goto_lm {
+    my $self = shift;
+
+    my $pos = $self->{curpos};
+    my $lm  = $self->{lm};
+    if ( $pos < $lm ) {
+        $self->{curpos} = $lm;
+        $self->collect( " " x ( $lm - $pos ) );
+    }
+}
+
+sub nl {
+    my $self = shift;
+
+    $self->{'out'}++;
+    $self->{curpos} = 0;
+    $self->collect("\n");
+}
+
+sub adjust_lm {
+    my $self = shift;
+
+    $self->{lm} += $_[0];
+    $self->goto_lm;
+}
+
+sub adjust_rm {
+    shift->{rm} += $_[0];
+}
+
+1;
+
+__END__
+
+=pod
+
+=for stopwords CPAN Markdown homepage
+
+=for test_synopsis 1;
+__END__
+
+=head1 NAME
+
+HTML::FormatMarkdown - Format HTML as Markdown
+
+=head1 VERSION
+
+version 2.14
+
+=head1 SYNOPSIS
+
+    use HTML::FormatMarkdown;
+
+    my $string = HTML::FormatMarkdown->format_file(
+        'test.html'
+    );
+
+    open my $fh, ">", "test.md" or die "$!\n";
+    print $fh $string;
+    close $fh;
+
+=head1 DESCRIPTION
+
+HTML::FormatMarkdown is a formatter that outputs Markdown.
+
+HTML::FormatMarkdown is built on L<HTML::Formatter> and documentation for that
+module applies to this - especially L<HTML::Formatter/new>,
+L<HTML::Formatter/format_file> and L<HTML::Formatter/format_string>.
+
+=head1 INSTALLATION
+
+See perlmodinstall for information and options on installing Perl modules.
+
+=head1 BUGS AND LIMITATIONS
+
+You can make new bug reports, and view existing ones, through the
+web interface at L<http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Formatter>.
+
+=head1 AVAILABILITY
+
+The project homepage is L<https://metacpan.org/release/HTML-Formatter>.
+
+The latest version of this module is available from the Comprehensive Perl
+Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
+site near you, or see L<https://metacpan.org/module/HTML::Formatter/>.
+
+=head1 AUTHORS
+
+=over 4
+
+=item *
+
+Nigel Metheringham <nigelm@cpan.org>
+
+=item *
+
+Sean M Burke <sburke@cpan.org>
+
+=item *
+
+Gisle Aas <gisle@ActiveState.com>
+
+=back
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2015 by Nigel Metheringham, 2002-2005 Sean M Burke, 1999-2002 Gisle Aas.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
diff --git a/lib/site/HTML/FormatPS.pm b/lib/site/HTML/FormatPS.pm
index c188e2ee5..f51f24032 100644
--- a/lib/site/HTML/FormatPS.pm
+++ b/lib/site/HTML/FormatPS.pm
@@ -1,188 +1,103 @@
 package HTML::FormatPS;
 
-# $Id$
+# ABSTRACT: Format HTML as PostScript
 
-=head1 NAME
-
-HTML::FormatPS - Format HTML as postscript
-
-=head1 SYNOPSIS
-
-  require HTML::FormatPS;
-  $html = parse_htmlfile("test.html");
-  $formatter = new HTML::FormatPS
-		   FontFamily => 'Helvetica',
-		   PaperSize  => 'Letter';
-  print $formatter->format($html);
-
-=head1 DESCRIPTION
-
-The HTML::FormatPS is a formatter that outputs PostScript code.
-Formatting of HTML tables and forms is not implemented.
-
-You might specify the following parameters when constructing the formatter:
-
-=over 4
-
-=item PaperSize
-
-What kind of paper should we format for.  The value can be one of
-these: A3, A4, A5, B4, B5, Letter, Legal, Executive, Tabloid,
-Statement, Folio, 10x14, Quarto.
-
-The default is "A4".
-
-=item PaperWidth
-
-The width of the paper in points.  Setting PaperSize also defines this
-value.
-
-=item PaperHeight
-
-The height of the paper in points.  Setting PaperSize also defines
-this value.
-
-=item LeftMargin
-
-The left margin in points.
-
-=item RightMargin
-
-The right margin in points.
-
-=item HorizontalMargin
-
-Both left and right margin at the same time.  The default value is 4 cm.
-
-=item TopMargin
-
-The top margin in points.
-
-=item BottomMargin
-
-The bottom margin in points.
-
-=item VerticalMargin
-
-Both top and bottom margin at the same time.  The default value is 2 cm.
-
-=item PageNo
-
-The parameter determines if we should put page numbers on the pages.
-The default is yes, so you have to set this value to 0 in order to
-suppress page numbers.
-
-=item FontFamily
-
-The parameter specifies which family of fonts to use for the formatting.
-Legal values are "Courier", "Helvetica" and "Times".  The default is
-"Times".
-
-=item FontScale
-
-All fontsizes might be scaled by this factor.
-
-=item Leading
-
-How much space between lines.  This is a factor of the fontsize used
-for that line.  Default is 0.1.
-
-=back
-
-=head1 SEE ALSO
-
-L<HTML::Formatter>
-
-=head1 COPYRIGHT
-
-Copyright (c) 1995-1998 Gisle Aas. All rights reserved.
-
-This library is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
-
-=head1 AUTHOR
 
-Gisle Aas <aas@sn.no>
-
-=cut
-
-use Carp;
+use 5.008;
 use strict;
-use vars qw(@ISA $VERSION);
+use warnings;
+use Carp;
+use Encode;
+use IO::File;
+use utf8;    # for the is_utf8 function
 
-require HTML::Formatter;
-@ISA = qw(HTML::Formatter);
+use base 'HTML::Formatter';
 
-($VERSION) = q$Revision$ =~ /: (\d+)/;
+our $VERSION = '2.14'; # VERSION
+our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
 
-use vars qw(%PaperSizes %FontFamilies @FontSizes %param $DEBUG);
+# We now use Smart::Comments in place of the old DEBUG framework.
+# this should be commented out in release versions....
+##use Smart::Comments;
 
+# ------------------------------------------------------------------------
 # A few routines that convert lengths into points
 sub mm { $_[0] * 72 / 25.4; }
 sub in { $_[0] * 72; }
 
-%PaperSizes =
-(
- A3        => [mm(297), mm(420)],
- A4        => [mm(210), mm(297)],
- A5        => [mm(148), mm(210)],
- B4        => [729,     1032   ],
- B5        => [516,     729    ],
- Letter    => [in(8.5), in(11) ],
- Legal     => [in(8.5), in(14) ],
- Executive => [in(7.5), in(10) ],
- Tabloid   => [in(11),  in(17) ],
- Statement => [in(5.5), in(8.5)],
- Folio     => [in(8.5), in(13) ],
- "10x14"   => [in(10),  in(14) ],
- Quarto    => [610,     780    ],
+# ------------------------------------------------------------------------
+my %PaperSizes = (
+    A3        => [ mm(297), mm(420) ],
+    A4        => [ mm(210), mm(297) ],
+    A5        => [ mm(148), mm(210) ],
+    B4        => [ 729,     1032 ],
+    B5        => [ 516,     729 ],
+    Letter    => [ in(8.5), in(11) ],
+    Legal     => [ in(8.5), in(14) ],
+    Executive => [ in(7.5), in(10) ],
+    Tabloid   => [ in(11),  in(17) ],
+    Statement => [ in(5.5), in(8.5) ],
+    Folio     => [ in(8.5), in(13) ],
+    "10x14"   => [ in(10),  in(14) ],
+    Quarto    => [ 610,     780 ],
 );
 
-%FontFamilies =
-(
- Courier   => [qw(Courier
-		  Courier-Bold
-		  Courier-Oblique
-		  Courier-BoldOblique)],
-
- Helvetica => [qw(Helvetica
-		  Helvetica-Bold
-		  Helvetica-Oblique
-		  Helvetica-BoldOblique)],
-
- Times     => [qw(Times-Roman
-		  Times-Bold
-		  Times-Italic
-		  Times-BoldItalic)],
+# ------------------------------------------------------------------------
+my %FontFamilies = (
+    Courier => [
+        qw(Courier
+            Courier-Bold
+            Courier-Oblique
+            Courier-BoldOblique)
+    ],
+
+    Helvetica => [
+        qw(Helvetica
+            Helvetica-Bold
+            Helvetica-Oblique
+            Helvetica-BoldOblique)
+    ],
+
+    Times => [
+        qw(Times-Roman
+            Times-Bold
+            Times-Italic
+            Times-BoldItalic)
+    ],
 );
 
-      # size   0   1   2   3   4   5   6   7
-@FontSizes = ( 5,  6,  8, 10, 12, 14, 18, 24, 32);
+# ------------------------------------------------------------------------
+# size            0  1  2  3   4   5   6   7   8
+my @FontSizes = ( 5, 6, 8, 10, 12, 14, 18, 24, 32 );
 
 sub BOLD   { 0x01; }
 sub ITALIC { 0x02; }
 
-%param =
-(
- papersize        => 'papersize',
- paperwidth       => 'paperwidth',
- paperheight      => 'paperheigth',
- leftmargin       => 'lmW',
- rightmargin      => 'rmW',
- horizontalmargin => 'mW',
- topmargin        => 'tmH',
- bottommargin     => 'bmH',
- verticalmargin   => 'mH',
- pageno           => 'printpageno',
- fontfamily       => 'family',
- fontscale        => 'fontscale',
- leading          => 'leading',
+my %param = (
+    papersize        => 'papersize',
+    paperwidth       => 'paperwidth',
+    paperheight      => 'paperheigth',
+    leftmargin       => 'lmW',
+    rightmargin      => 'rmW',
+    horizontalmargin => 'mW',
+    topmargin        => 'tmH',
+    bottommargin     => 'bmH',
+    verticalmargin   => 'mH',
+    no_prolog        => 'no_prolog',
+    no_trailer       => 'no_trailer',
+    pageno           => 'printpageno',
+    startpage        => 'startpage',
+    fontfamily       => 'family',
+    fontscale        => 'fontscale',
+    leading          => 'leading',
 );
 
+# ------------------------------------------------------------------------
 
-sub new
-{
+
+sub new {
     my $class = shift;
+
     my $self = $class->SUPER::new(@_);
 
     # Obtained from the <title> element
@@ -192,70 +107,80 @@ sub new
     # temporarily different from the "current font" as read from
     # the HTML input).  Initially none.
     $self->{psfontid} = "";
-    
+
     # Pending horizontal space.  A list [ " ", $fontid, $width ],
     # or undef if no space is pending.
     $self->{hspace} = undef;
-    
+
+    # add an encoder object for perl native to Latin1 output
+    $self->{encoder} = find_encoding('iso-8859-1');
+
     $self;
 }
 
-sub default_values
-{
-    (
-     family      => "Times",
-     mH          => mm(40),
-     mW          => mm(20),
-     printpageno => 1,
-     fontscale   => 1,
-     leading     => 0.1,
-     papersize   => 'A4',
-     paperwidth  => mm(210),
-     paperheight => mm(297),
-    )
+# ------------------------------------------------------------------------
+sub default_values {
+    (   shift->SUPER::default_values(),
+
+        family      => "Times",
+        mH          => mm(40),
+        mW          => mm(20),
+        printpageno => 1,
+        startpage   => 1,         # yes, you can start numbering at 10, or whatever.
+        fontscale   => 1,
+        leading     => 0.1,
+        papersize   => 'A4',
+        paperwidth  => mm(210),
+        paperheight => mm(297),
+    );
 }
 
-sub configure
-{
-    my($self, $hash) = @_;
-    my($key,$val);
-    while (($key, $val) = each %$hash) {
-	$key = lc $key;
-	croak "Illegal parameter ($key => $val)" unless exists $param{$key};
-	$key = $param{$key};
-	{
-	    $key eq "family" && do {
-		$val = "\u\L$val";
-		croak "Unknown font family ($val)"
-		  unless exists $FontFamilies{$val};
-		$self->{family} = $val;
-		last;
-	    };
-	    $key eq "papersize" && do {
-		$self->papersize($val) || croak "Unknown papersize ($val)";
-		last;
-	    };
-	    $self->{$key} = lc $val;
-	}
+# ------------------------------------------------------------------------
+sub configure {
+    my ( $self, $hash ) = @_;
+
+    my ( $key, $val );
+    while ( ( $key, $val ) = each %$hash ) {
+        $key = lc $key;
+        croak "Illegal parameter ($key => $val)" unless exists $param{$key};
+        $key = $param{$key};
+        {
+            $key eq "family" && do {
+                $val = "\u\L$val";
+                croak "Unknown font family ($val)"
+                    unless exists $FontFamilies{$val};
+                $self->{family} = $val;
+                last;
+            };
+            $key eq "papersize" && do {
+                $self->papersize($val)
+                    || croak sprintf "Unknown papersize '%s'.\nThe knowns are: %s.\nAborting",
+                    $val,
+                    join( ', ', sort keys %PaperSizes );
+                last;
+            };
+            $self->{$key} = lc $val;
+        }
     }
 }
 
-sub papersize
-{
-    my($self, $val) = @_;
+# ------------------------------------------------------------------------
+sub papersize {
+    my ( $self, $val ) = @_;
+
     $val = "\u\L$val";
-    my($width, $height) = @{$PaperSizes{$val}};
+    my ( $width, $height ) = @{ $PaperSizes{$val} || return 0 };
     return 0 unless defined $width;
-    $self->{papersize} = $val;
-    $self->{paperwidth} = $width;
+    $self->{papersize}   = $val;
+    $self->{paperwidth}  = $width;
     $self->{paperheight} = $height;
     1;
 }
 
-
-sub fontsize
-{
+# ------------------------------------------------------------------------
+sub fontsize {
     my $self = shift;
+
     my $size = $self->{font_size}[-1];
     $size = 8 if $size > 8;
     $size = 3 if $size < 0;
@@ -267,142 +192,158 @@ sub fontsize
 # of that size.  Otherwise, use the font specified by the
 # HTML context.  Returns the "font ID" of the current font.
 
-sub setfont
-{
-    my($self, $plain_with_size) = @_;
-    my $index = 0;
+# ------------------------------------------------------------------------
+sub setfont {
+    my ( $self, $plain_with_size ) = @_;
+
+    my $index  = 0;
     my $family = $self->{family} || 'Times';
-    my $size = $plain_with_size;
+    my $size   = $plain_with_size;
     unless ($plain_with_size) {
-	$index |= BOLD   if $self->{bold};
-	$index |= ITALIC if $self->{italic} || $self->{underline};
-	$family = 'Courier' if $self->{teletype};
-	$size = $self->fontsize;
+        $index |= BOLD if $self->{bold};
+        $index |= ITALIC if $self->{italic} || $self->{underline};
+        $family = 'Courier' if $self->{teletype};
+        $size = $self->fontsize;
     }
-    my $font = $FontFamilies{$family}[$index];
+    my $font           = $FontFamilies{$family}[$index];
     my $font_with_size = "$font-$size";
-    if ($self->{currentfont} eq $font_with_size) {
-	return $self->{currentfontid};
+    if ( $self->{currentfont} eq $font_with_size ) {
+        return $self->{currentfontid};
     }
     $self->{currentfont} = $font_with_size;
-    $self->{pointsize} = $size;
+    $self->{pointsize}   = $size;
     my $fontmod = "Font::Metrics::$font";
     $fontmod =~ s/-//g;
     my $fontfile = $fontmod . ".pm";
     $fontfile =~ s,::,/,g;
     require $fontfile;
     {
-	no strict 'refs';
-	$self->{wx} = \@{ "${fontmod}::wx" };
+        ## no critic
+        no strict 'refs';
+        $self->{wx} = \@{"${fontmod}::wx"};
+        ## use critic
     }
     $font = $self->{fonts}{$font_with_size} || do {
-	my $fontID = "F" . ++$self->{fno};
-	$self->{fonts}{$font_with_size} = $fontID;
-	$fontID;
+        my $fontID = "F" . ++$self->{fno};
+        $self->{fonts}{$font_with_size} = $fontID;
+        $fontID;
     };
     $self->{currentfontid} = $font;
     return $font;
 }
 
-# Construct PostScript code for setting the current font according 
+# ------------------------------------------------------------------------
+# Construct PostScript code for setting the current font according
 # to $fontid, or an empty string if no font change is needed.
 # Assumes the return string will always be output as PostScript if
 # nonempty, so that our notion of the current PostScript font
 # stays in sync with that of the PostScript interpreter.
+#
+sub switchfont {
+    my ( $self, $fontid ) = @_;
 
-sub switchfont
-{
-    my($self, $fontid) = @_;
-    if ($self->{psfontid} eq $fontid) {
-	return "";
-    } else {
-	$self->{psfontid} = $fontid;
-	return "$fontid SF";
+    if ( $self->{psfontid} eq $fontid ) {
+        return "";
+    }
+    else {
+        $self->{psfontid} = $fontid;
+        return "$fontid SF";
     }
 }
 
+# ------------------------------------------------------------------------
 # Like setfont + switchfont.
+sub findfont {
+    my ( $self, $plain_with_size ) = @_;
 
-sub findfont
-{
-    my($self, $plain_with_size) = @_;
-    return $self->switchfont($self->setfont($plain_with_size));
+    return $self->switchfont( $self->setfont($plain_with_size) );
 }
 
-sub width
-{
+# ------------------------------------------------------------------------
+sub width {
     my $self = shift;
-    my $w = 0;
+    my $str  = shift;
+
+    my $w  = 0;
     my $wx = $self->{wx};
     my $sz = $self->{pointsize};
-    for (unpack("C*", $_[0])) {
-	$w += $wx->[$_] * $sz;
+
+    # need to encode to same encoding as font before getting width
+    for ( unpack( "C*", $self->encode_string($str) ) ) {
+
+        # if the character is outside the table, assume its m sized
+        $w += ( ( $_ > $#{$wx} ) ? $wx->[ ord('m') ] : $wx->[$_] ) * $sz    # unless  $_ eq 0xAD; # optional hyphen
     }
     $w;
 }
 
-
-sub begin
-{
+# ------------------------------------------------------------------------
+sub begin {
     my $self = shift;
-    $self->HTML::Formatter::begin;
 
-    # Margins is points
+    $self->SUPER::begin;
+
+    # Margins are in points
     $self->{lm} = $self->{lmW} || $self->{mW};
-    $self->{rm} = $self->{paperwidth}  - ($self->{rmW} || $self->{mW});
-    $self->{tm} = $self->{paperheight} - ($self->{tmH} || $self->{mH});
+    $self->{rm} = $self->{paperwidth} -  ( $self->{rmW} || $self->{mW} );
+    $self->{tm} = $self->{paperheight} - ( $self->{tmH} || $self->{mH} );
     $self->{bm} = $self->{bmH} || $self->{mH};
 
+    $self->{'orig_margins'} =    # used only by the debug-mode print-area marker
+        [ map { sprintf "%.1f", $_ } @{$self}{qw(lm bm rm tm)} ];
+
     # Font setup
-    $self->{fno} = 0;
+    $self->{fno}   = 0;
     $self->{fonts} = {};
-    $self->{en} = 0.55 * $self->fontsize(3);
+    $self->{en}    = 0.55 * $self->fontsize(3);
 
     # Initial position
-    $self->{xpos} = $self->{lm};  # top of the current line
+    $self->{xpos} = $self->{lm};    # top of the current line
     $self->{ypos} = $self->{tm};
 
-    $self->{pageno} = 1;
+    $self->{pageno}              = 1;
+    $self->{visible_page_number} = $self->{startpage};
 
-    $self->{line} = "";
-    $self->{showstring} = "";
-    $self->{currentfont} = "";
-    $self->{prev_currentfont} = "";
+    $self->{line}              = "";
+    $self->{showstring}        = "";
+    $self->{currentfont}       = "";
+    $self->{prev_currentfont}  = "";
     $self->{largest_pointsize} = 0;
 
     $self->newpage;
 }
 
-
-sub end
-{
+# ------------------------------------------------------------------------
+sub end {
     my $self = shift;
+
     $self->showline;
-    $self->endpage if $self->{out};
+    $self->endpage if $self->{'out'};
     my $pages = $self->{pageno} - 1;
 
     my @prolog = ();
-    push(@prolog, "%!PS-Adobe-3.0\n");
+    push( @prolog, "%!PS-Adobe-3.0\n" );
+
     #push(@prolog,"%%Title: No title\n"); # should look for the <title> element
-    push(@prolog, "%%Creator: HTML::FormatPS (libwww-perl)\n");
-    push(@prolog, "%%CreationDate: " . localtime() . "\n");
-    push(@prolog, "%%Pages: $pages\n");
-    push(@prolog, "%%PageOrder: Ascend\n");
-    push(@prolog, "%%Orientation: Portrait\n");
-    my($pw, $ph) = map { int($_); } @{$self}{qw(paperwidth paperheight)};
-
-    push(@prolog, "%%DocumentMedia: Plain $pw $ph 0 white ()\n");
-    push(@prolog, "%%DocumentNeededResources: \n");
-    my($full, %seenfont);
-    for $full (sort keys %{$self->{fonts}}) {
-	$full =~ s/-\d+$//;
-	next if $seenfont{$full}++;
-	push(@prolog, "%%+ font $full\n");
+    push( @prolog, "%%Creator: " . $self->version_tag . "\n" );
+    push( @prolog, "%%CreationDate: " . localtime() . "\n" );
+    push( @prolog, "%%Pages: $pages\n" );
+    push( @prolog, "%%PageOrder: Ascend\n" );
+    push( @prolog, "%%Orientation: Portrait\n" );
+    my ( $pw, $ph ) = map { int($_); } @{$self}{qw(paperwidth paperheight)};
+
+    push( @prolog, "%%DocumentMedia: Plain $pw $ph 0 white ()\n" );
+    push( @prolog, "%%DocumentNeededResources: \n" );
+    my %seenfont;
+    for my $full ( sort keys %{ $self->{fonts} } ) {
+        $full =~ s/-\d+$//;
+        next if $seenfont{$full}++;
+        push( @prolog, "%%+ font $full\n" );
     }
-    push(@prolog, "%%DocumentSuppliedResources: procset newencode 1.0 0\n");
-    push(@prolog, "%%+ encoding ISOLatin1Encoding\n");
-    push(@prolog, "%%EndComments\n");
-    push(@prolog, <<'EOT');
+    push( @prolog, "%%DocumentSuppliedResources: procset newencode 1.0 0\n" );
+    push( @prolog, "%%+ encoding ISOLatin1Encoding\n" );
+    push( @prolog, "%%EndComments\n" );
+    push( @prolog, <<'EOT');
 
 %%BeginProlog
 /S/show load def
@@ -412,46 +353,49 @@ sub end
 %%BeginResource: encoding ISOLatin1Encoding
 systemdict /ISOLatin1Encoding known not {
     /ISOLatin1Encoding [
-	/space /space /space /space /space /space /space /space
-	/space /space /space /space /space /space /space /space
-	/space /space /space /space /space /space /space /space
-	/space /space /space /space /space /space /space /space
-	/space /exclam /quotedbl /numbersign /dollar /percent /ampersand
-	    /quoteright
-	/parenleft /parenright /asterisk /plus /comma /minus /period /slash
-	/zero /one /two /three /four /five /six /seven
-	/eight /nine /colon /semicolon /less /equal /greater /question
-	/at /A /B /C /D /E /F /G
-	/H /I /J /K /L /M /N /O
-	/P /Q /R /S /T /U /V /W
-	/X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore
-	/quoteleft /a /b /c /d /e /f /g
-	/h /i /j /k /l /m /n /o
-	/p /q /r /s /t /u /v /w
-	/x /y /z /braceleft /bar /braceright /asciitilde /space
-	/space /space /space /space /space /space /space /space
-	/space /space /space /space /space /space /space /space
-	/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
-	/dieresis /space /ring /cedilla /space /hungarumlaut /ogonek /caron
-	/space /exclamdown /cent /sterling /currency /yen /brokenbar /section
-	/dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen
-	    /registered /macron
-	/degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph
-	    /periodcentered
-	/cedillar /onesuperior /ordmasculine /guillemotright /onequarter
-	    /onehalf /threequarters /questiondown
-	/Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla
-	/Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex
-	    /Idieresis
-	/Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply
-	/Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn
-	    /germandbls
-	/agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla
-	/egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex
-	    /idieresis
-	/eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide
-	/oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn
-	    /ydieresis
+    /space /space /space /space /space /space /space /space
+    /space /space /space /space /space /space /space /space
+    /space /space /space /space /space /space /space /space
+    /space /space /space /space /space /space /space /space
+
+    /space /exclam /quotedbl /numbersign /dollar /percent /ampersand
+        /quoteright
+    /parenleft /parenright /asterisk /plus /comma /minus /period /slash
+    /zero /one /two /three /four /five /six /seven
+    /eight /nine /colon /semicolon /less /equal /greater /question
+    /at /A /B /C /D /E /F /G
+    /H /I /J /K /L /M /N /O
+    /P /Q /R /S /T /U /V /W
+    /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore
+    /quoteleft /a /b /c /d /e /f /g
+    /h /i /j /k /l /m /n /o
+    /p /q /r /s /t /u /v /w
+    /x /y /z /braceleft /bar /braceright /asciitilde /space
+
+    /space /space /space /space /space /space /space /space
+    /space /space /space /space /space /space /space /space
+    /dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
+    /dieresis /space /ring /cedilla /space /hungarumlaut /ogonek /caron
+
+    /space /exclamdown /cent /sterling /currency /yen /brokenbar /section
+    /dieresis /copyright /ordfeminine /guillemotleft /logicalnot /hyphen
+        /registered /macron
+    /degree /plusminus /twosuperior /threesuperior /acute /mu /paragraph
+        /periodcentered
+    /cedillar /onesuperior /ordmasculine /guillemotright /onequarter
+        /onehalf /threequarters /questiondown
+    /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla
+    /Egrave /Eacute /Ecircumflex /Edieresis /Igrave /Iacute /Icircumflex
+        /Idieresis
+    /Eth /Ntilde /Ograve /Oacute /Ocircumflex /Otilde /Odieresis /multiply
+    /Oslash /Ugrave /Uacute /Ucircumflex /Udieresis /Yacute /Thorn
+        /germandbls
+    /agrave /aacute /acircumflex /atilde /adieresis /aring /ae /ccedilla
+    /egrave /eacute /ecircumflex /edieresis /igrave /iacute /icircumflex
+        /idieresis
+    /eth /ntilde /ograve /oacute /ocircumflex /otilde /odieresis /divide
+    /oslash /ugrave /uacute /ucircumflex /udieresis /yacute /thorn
+        /ydieresis
     ] def
 } if
 %%EndResource
@@ -459,12 +403,12 @@ systemdict /ISOLatin1Encoding known not {
 /NE { %def
    findfont begin
       currentdict dup length dict begin
-	 { %forall
-	    1 index/FID ne {def} {pop pop} ifelse
-	 } forall
-	 /FontName exch def
-	 /Encoding exch def
-	 currentdict dup
+     { %forall
+        1 index/FID ne {def} {pop pop} ifelse
+     } forall
+     /FontName exch def
+     /Encoding exch def
+     currentdict dup
       end
    end
    /FontName get exch definefont pop
@@ -473,207 +417,300 @@ systemdict /ISOLatin1Encoding known not {
 %%EndProlog
 EOT
 
-    push(@prolog, "\n%%BeginSetup\n");
-    for $full (sort keys %{$self->{fonts}}) {
-	my $short = $self->{fonts}{$full};
-	$full =~ s/-(\d+)$//;
-	my $size = $1;
-	push(@prolog, "ISOLatin1Encoding/$full-ISO/$full NE\n");
-	push(@prolog, "/$short/$full-ISO findfont $size scalefont def\n");
+    push( @prolog, "\n%%BeginSetup\n" );
+    for my $full ( sort keys %{ $self->{fonts} } ) {
+        my $short = $self->{fonts}{$full};
+        $full =~ s/-(\d+)$//;
+        my $size = $1;
+        push( @prolog, "ISOLatin1Encoding/$full-ISO/$full NE\n" );
+        push( @prolog, "/$short/$full-ISO findfont $size scalefont def\n" );
     }
-    push(@prolog, "%%EndSetup\n");
+    push( @prolog, "%%EndSetup\n" );
 
-    $self->collect("\n%%Trailer\n%%EOF\n");
-    unshift(@{$self->{output}}, @prolog);
+    $self->collect("\n%%Trailer\n%%EOF\n")
+        unless $self->{'no_trailer'};
+
+    unshift( @{ $self->{output} }, @prolog )
+        unless $self->{'no_prolog'};
 }
 
+# ------------------------------------------------------------------------
+sub header_start {
+    my ( $self, $level ) = @_;
 
-sub header_start
-{
-    my($self, $level, $node) = @_;
     # If we are close enough to be bottom of the page, start a new page
     # instead of this:
-    $self->vspace(1 + (6-$level) * 0.4);
+    ### Heading of level: $level
+    $self->vspace( 1 + ( 6 - $level ) * 0.4 );
     $self->{bold}++;
-    push(@{$self->{font_size}}, 8 - $level);
+    push( @{ $self->{font_size} }, 8 - $level );
     1;
 }
 
+# ------------------------------------------------------------------------
+sub header_end {
+    my ($self) = @_;
 
-sub header_end
-{
-    my($self, $level, $node) = @_;
     $self->vspace(1);
     $self->{bold}--;
-    pop(@{$self->{font_size}});
+    pop( @{ $self->{font_size} } );
     1;
 }
 
-sub hr_start
-{
+# ------------------------------------------------------------------------
+sub hr_start {
     my $self = shift;
+
+    ### Making an HR...
     $self->showline;
     $self->vspace(0.5);
     $self->skip_vspace;
     my $lm = $self->{lm};
     my $rm = $self->{rm};
-    my $y = $self->{ypos};
-    $self->collect(sprintf "newpath %.1f %.1f M %.1f %.1f lineto stroke\n",
-		   $lm, $y, $rm, $y);
+    my $y  = $self->{ypos};
+    $self->collect( sprintf "newpath %.1f %.1f M %.1f %.1f lineto stroke\n", $lm, $y, $rm, $y );
     $self->vspace(0.5);
 }
 
-
-sub skip_vspace
-{
+# ------------------------------------------------------------------------
+sub skip_vspace {
     my $self = shift;
-    if (defined $self->{vspace}) {
-	$self->showline;
-	if ($self->{out}) {
-	    $self->{ypos} -= $self->{vspace} * 10 * $self->{fontscale};
-	    if ($self->{ypos} < $self->{bm}) {
-		$self->newpage;
-	    }
-	}
-	$self->{xpos} = $self->{lm};
-	$self->{vspace} = undef;
-	$self->{hspace} = undef;
+
+    ### Skipping some amount of vspace...
+    if ( defined $self->{vspace} ) {
+        $self->showline;
+        if ( $self->{'out'} ) {
+            $self->{ypos} -= $self->{vspace} * 10 * $self->{fontscale};
+
+            if ( $self->{ypos} < $self->{bm} ) {
+                ### vspace skip forced new page...
+                $self->newpage;
+            }
+            else {
+                ### Skipped vspace making y: $self->{'ypos'},
+            }
+        }
+        else {
+            ### Not skipping vspace as out is false: $self->{ypos}
+        }
+        $self->{xpos}   = $self->{lm};
+        $self->{vspace} = undef;
+        $self->{hspace} = undef;
+    }
+    else {
+        ### No vspace to skip...
     }
-}
 
+    return;
+}
 
-sub show
-{
+# ------------------------------------------------------------------------
+sub show {
     my $self = shift;
+
     my $str = $self->{showstring};
+    $str =~ tr/\x01//d;
     return unless length $str;
-    $str =~ s/([\(\)\\])/\\$1/g;    # must escape parentesis
-    $self->{line} .= "($str)S\n";
+
+    # must escape parentheses and backslash
+    $str =~ s/([\(\)\\])/\\$1/g;
+
+    # encode output to latin1 when pushing it out
+    $self->{line} .= "(" . $self->encode_string($str) . ")S\n";
     $self->{showstring} = "";
 }
 
-
-sub showline
-{
+# ------------------------------------------------------------------------
+sub showline {
     my $self = shift;
+
     $self->show;
     my $line = $self->{line};
-    return unless length $line;
+    unless ( length $line ) {
+        ### Showline is a no-op because line buffer is empty...
+        return;
+    }
+
+    ### Showline emitting: $line
+
     $self->{ypos} -= $self->{largest_pointsize} || $self->{pointsize};
-    if ($self->{ypos} < $self->{bm}) {
-	$self->newpage;
-	$self->{ypos} -= $self->{pointsize};
-	# must set current font again
-	my $font = $self->{prev_currentfont};
-	if ($font) {
-	    $self->collect("$self->{fonts}{$font} SF\n");
-	}
+    if ( $self->{ypos} < $self->{bm} ) {
+        ### Showline forcing new page...
+        $self->newpage;
+
+        # newpage might alter currentfont!
+        ### Showline sets vspace: $self->{vspace} || 0
+
+        $self->{ypos} -= $self->{pointsize};
+        #### Showline/Newpage x: $self->{xpos}
+        #### Showline/Newpage y: $self->{ypos}
+
+        # must set current font again
+        my $font = $self->{prev_currentfont};
+        if ($font) {
+            $self->collect("$self->{fonts}{$font} SF\n\n");
+        }
+
+        ### End of doing newpage...
     }
+
     my $lm = $self->{lm};
-    my $x = $lm;
-    if ($self->{center}) {
-	# Unfortunately, the center attribute is gone when we get here,
-	# so this code is never activated
-	my $linewidth = $self->{xpos} - $lm;
-	$x += ($self->{rm} - $lm - $linewidth) / 2;
+    my $x  = $lm;
+    if ( $self->{center} ) {
+
+        # Unfortunately, the center attribute is gone when we get here,
+        # so this code is never activated
+        my $linewidth = $self->{xpos} - $lm;
+        $x += ( $self->{rm} - $lm - $linewidth ) / 2;
     }
 
-    $self->collect(sprintf "%.1f %.1f M\n", $x, $self->{ypos});  # moveto
-    $line =~ s/\s\)S$/)S/;  # many lines will end with space
+    $self->collect( sprintf "%.1f %.1f M\n", $x, $self->{ypos} );    # moveto
+    $line =~ s/\s\)S$/)S/;                                           # many lines will end uselessly with space
     $self->collect($line);
+    $self->{'out'}++;
 
-    if ($self->{bullet}) {
-	# Putting this behind the first line of the list item
-	# makes it more likely that we get the right font.  We should
-	# really set the font that we want to use.
-	my $bullet = $self->{bullet};
-	if ($bullet eq '*') {
-	    # There is no character that is really suitable.  Lets make
-	    # filled cirle ourself.
-	    my $radius = $self->{pointsize} / 4;
-	    $self->collect(sprintf "newpath %.1f %.1f %.1f 0 360 arc fill\n",
-		       $self->{bullet_pos} + $radius,
-		       $self->{ypos} + $radius, $radius);
-	} else {
-	    $self->collect(sprintf "%.1f %.1f M\n", # moveto
-			   $self->{bullet_pos},
-			   $self->{ypos});
-	    $self->collect("($bullet)S\n");
-	}
-	$self->{bullet} = '';
+    if ( $self->{bullet} ) {
+
+        # Putting this behind the first line of the list item
+        # makes it more likely that we get the right font.  We should
+        # really set the font that we want to use.
+        my $bullet = $self->{bullet};
+        if ( $bullet eq '*' ) {
+
+            # There is no character that is really suitable.  Let's make
+            # a medium-sized filled circle ourself.
+            my $radius = $self->{pointsize} / 8;
+            ### Adding code for a '*' bullet for that line...
+
+            $self->collect(
+                sprintf "newpath %.1f %.1f %.1f 0 360 arc fill\n",
+                $self->{bullet_pos} + $radius,
+                $self->{ypos} + $radius * 2, $radius,
+            );
+        }
+        else {
+            ### Adding code for other bullet for that line...
+
+            $self->collect(
+                sprintf "%.1f (%s) stringwidth pop sub %.1f add %.1f M\n",    # moveto
+                $self->{bullet_pos},
+                $bullet,
+                $self->{pointsize} * 0.62,
+                $self->{ypos},
+            );
+            $self->collect("($bullet)S\n");
+        }
+        $self->{bullet} = '';
 
     }
 
-    $self->{prev_currentfont} = $self->{currentfont};
+    $self->{prev_currentfont}  = $self->{currentfont};
     $self->{largest_pointsize} = 0;
-    $self->{line} = "";
-    $self->{xpos} = $lm;
+    $self->{line}              = "";
+    $self->{xpos}              = $lm;
+
     # Additional linespacing
+
     $self->{ypos} -= $self->{leading} * $self->{pointsize};
-}
+    #### Showline/end x: $self->{xpos}
+    #### Showline/end y: $self->{ypos}
 
+    return;
+}
 
-sub endpage
-{
+# ------------------------------------------------------------------------
+sub endpage {
     my $self = shift;
+
+    ### End page: $self->{pageno}
     # End previous page
     $self->collect("showpage\n");
+    $self->{visible_page_number}++;
     $self->{pageno}++;
 }
 
-
-sub newpage
-{
+# ------------------------------------------------------------------------
+sub newpage {
     my $self = shift;
-    if ($self->{'out'}) {
-	$self->endpage;
+
+    local $self->{'pointsize'} = $self->{'pointsize'};
+
+    # That's needed for protecting against one bit of the
+    # potential side-effects from page-numbering code
+
+    if ( $self->{'out'} ) {    # whether we've sent anything to the current page so far.
+        ### Newpage calls endpage...
+        $self->endpage;
+        $self->collect( sprintf "%% %s has sent %s write-events to the above page.\n", ref($self), $self->{'out'}, );
     }
+
     $self->{'out'} = 0;
-    my $pageno = $self->{pageno};
-    $self->collect("\n%%Page: $pageno $pageno\n");
+    my $pageno              = $self->{pageno};
+    my $visible_page_number = $self->{visible_page_number};
 
-    # Print area marker (just for debugging)
-    if ($DEBUG) {
-	my($llx, $lly, $urx, $ury) = map { sprintf "%.1f", $_}
-				     @{$self}{qw(lm bm rm tm)};
-	$self->collect("gsave 0.1 setlinewidth\n");
-	$self->collect("clippath 0.9 setgray fill 1 setgray\n");
-	$self->collect("$llx $lly moveto $urx $lly lineto $urx $ury lineto $llx $ury lineto closepath fill\n");
-	$self->collect("grestore\n");
-    }
+    $self->collect("\n%%Page: $pageno $pageno\n");
+    ### Starting page: $pageno
 
     # Print page number
-    if ($self->{printpageno}) {
-	$self->collect("%% Title and pageno\n");
-	my $f = $self->findfont(8);
-	$self->collect("$f\n") if $f;
+    if ( $self->{printpageno} ) {
+        ### Printing page number: $visible_page_number
+        $self->collect("%% Title and pageno\n");
+        my $f = $self->findfont(8);
+        $self->collect("$f\n") if $f;
         my $x = $self->{paperwidth};
-        if ($x) { $x -= 30; } else { $x = 30; }
-        $self->collect(sprintf "%.1f 30.0 M($pageno)S\n", $x);
-	$x = $self->{lm};
-	$self->collect(sprintf "%.1f 30.0 M($self->{title})S\n", $x);
+        if ($x) { $x -= 30; }
+        else    { $x = 30; }
+        $self->collect( sprintf "%.1f 30.0 M($visible_page_number)S\n", $x );
+        $x = $self->{lm};
+        $self->{title} =~ tr/\x01//d;
+        $self->collect( sprintf "%.1f 30.0 M($self->{title})S\n", $x );
+    }
+    else {
+        ### Pointedly not printing page number...
     }
     $self->collect("\n");
 
     $self->{xpos} = $self->{lm};
     $self->{ypos} = $self->{tm};
+    #### Newpage/end x: $self->{xpos}
+    #### Newpage/end y: $self->{ypos}
 }
 
+# ------------------------------------------------------------------------
+sub encode_string {    # converts string into latin1 charset
+    my ( $self, $str ) = @_;
+
+    # the string from the parser is normally unicode, and may contain
+    # some punctuation characters in the 'General Punctuation' block
+    # which can be expressed in latin1, but Encode module fails on them
+    # so we will manually hack these...
+    # Theres no usable latin1 for the double quote chars so map to "
+    if ( utf8::is_utf8($str) ) {
+        $str =~ tr/\x{2018}\x{2019}\x{201A}\x{201C}\x{201D}\x{201F}\x{2033}\x{2036}/`',"""""/;
+    }
+
+    return $self->{encoder}->encode($str);
+}
+
+# ------------------------------------------------------------------------
+sub out {    # Output a word
+    my ( $self, $text ) = @_;
+
+    $text =~ tr/\xA0\xAD/ /d;
+    ### Trapping new word: $text
+
+    if ( $self->{collectingTheTitle} ) {
 
-sub out
-{
-    my($self, $text) = @_;
-    if ($self->{collectingTheTitle}) {
         # Both collect and print the title
-    	$text =~ s/([\(\)\\])/\\$1/g; # Escape parens.
+        $text =~ s/([\(\)\\])/\\$1/g;    # Escape parens and the backslash
         $self->{title} .= $text;
-	return;
+        return;
     }
 
     my $fontid = $self->setfont();
-    my $w = $self->width($text);
+    my $w      = $self->width($text);
 
-    if ($text =~ /^\s*$/) {
+    if ( $text =~ /^\s*$/ ) {
         $self->{hspace} = [ " ", $fontid, $w ];
         return;
     }
@@ -681,99 +718,382 @@ sub out
     $self->skip_vspace;
 
     # determine spacing / line breaks needed before text
-    if ($self->{hspace}) {
-	my ($stext, $sfont, $swidth) = @{$self->{hspace}};
-	if ($self->{xpos} + $swidth + $w > $self->{rm}) {
-	    # line break
-	    $self->showline;
-	} else {
-	    # no line break; output a space
-            $self->show_with_font($stext, $sfont, $swidth);
-	}
-	$self->{hspace} = undef;
+    if ( $self->{hspace} ) {
+        my ( $stext, $sfont, $swidth ) = @{ $self->{hspace} };
+        if ( $self->{xpos} + $swidth + $w > $self->{rm} ) {
+
+            # line break
+            $self->showline;
+        }
+        else {
+
+            # no line break; output a space
+            $self->show_with_font( $stext, $sfont, $swidth );
+        }
+        $self->{hspace} = undef;
     }
 
     # output the text
-    $self->show_with_font($text, $fontid, $w);
+    $self->show_with_font( $text, $fontid, $w );
 }
 
-
+# ------------------------------------------------------------------------
 sub show_with_font {
-    my ($self, $text, $fontid, $w) = @_;
+    my ( $self, $text, $fontid, $w ) = @_;
 
     my $fontps = $self->switchfont($fontid);
-    if (length $fontps) {
-	$self->show;
-	$self->{line} .= "$fontps\n";
+    if ( length $fontps ) {
+        $self->show;
+        $self->{line} .= "$fontps\n";
     }
 
     $self->{xpos} += $w;
     $self->{showstring} .= $text;
+
+    #### Append to string buffer: $text
+    #### with font: $fontid
+    #### with xpos: $self->{xpos}
+
     $self->{largest_pointsize} = $self->{pointsize}
-      if $self->{largest_pointsize} < $self->{pointsize};
+        if $self->{largest_pointsize} < $self->{pointsize};
     $self->{'out'}++;
 }
 
+# ------------------------------------------------------------------------
+sub pre_out {
+    my ( $self, $text ) = @_;
 
-sub pre_out
-{
-    my($self, $text) = @_;
     $self->skip_vspace;
     $self->tt_start;
     my $font = $self->findfont();
-    if (length $font) {
-	$self->show;
-	$self->{line} .= "$font\n";
+    if ( length $font ) {
+        $self->show;
+        $self->{line} .= "$font\n";
     }
-    while ($text =~ s/(.*)\n//) {
-    	$self->{'out'}++;
-	$self->{showstring} .= $1;
-	$self->showline;
+    while ( $text =~ s/(.*)\n// ) {
+        $self->{'out'}++;
+        $self->{showstring} .= $1;
+        $self->showline;
     }
     $self->{showstring} .= $text;
     $self->tt_end;
+    1;
 }
 
-sub bullet
-{
-    my($self, $bullet) = @_;
-    $self->{bullet} = $bullet;
+# ------------------------------------------------------------------------
+sub bullet {
+    my ( $self, $bullet ) = @_;
+
+    $self->{bullet}     = $bullet;
     $self->{bullet_pos} = $self->{lm};
 }
 
-sub adjust_lm
-{
+# ------------------------------------------------------------------------
+sub adjust_lm {
     my $self = shift;
+
     $self->showline;
+
     $self->{lm} += $_[0] * $self->{en};
+    1;
 }
 
-
-sub adjust_rm
-{
+# ------------------------------------------------------------------------
+sub adjust_rm {
     my $self = shift;
+
     $self->showline;
-    $self->{rm} += $_[0] * $self->{en};
-}
 
-sub head_start {
-    1;
+    $self->{rm} += $_[0] * $self->{en};
 }
 
-sub head_end {
-    1;
-}
+# ------------------------------------------------------------------------
+sub head_start { 1; }
+sub head_end   { 1; }
 
 sub title_start {
-    my($self) = @_;
+    my ($self) = @_;
+
     $self->{collectingTheTitle} = 1;
     1;
 }
 
 sub title_end {
-    my($self) = @_;
+    my ($self) = @_;
+
     $self->{collectingTheTitle} = 0;
     1;
 }
 
+# ------------------------------------------------------------------------
+my ( $counter, $last_state_filename );
+
+# For use in circumstances of total desperation:
+
+sub dump_state {
+    my $self = shift;
+    require Data::Dumper;
+
+    ++$counter;
+    my $filename = sprintf( "state%04d.txt", $counter );
+    my $state = IO::File->new( $filename, 'w' ) or die "Can't write-open $filename: $!";
+    $state->printf( "%s line %s\n", ( caller(1) )[ 3, 2 ] );
+    {
+        local ( $self->{'wx'} )     = '<SUPPRESSED>';
+        local ( $self->{'output'} ) = '<SUPPRESSED>';
+        $state->print( Data::Dumper::Dumper($self) );
+    }
+    $state->close;
+    sleep 0;
+
+    if ($last_state_filename) {
+        system("perl -S diff.bat $last_state_filename $filename > $filename.diff");
+    }
+
+    $last_state_filename = $filename;
+    return 1;
+}
+
+# ------------------------------------------------------------------------
+
+
 1;
+
+__END__
+
+=pod
+
+=for test_synopsis 1;
+__END__
+
+=for stopwords bottommargin fontfamily fontscale helvetica
+    horizontalmargin leftmargin noprolog notrailer pageno
+    paperheight papersize paperwidth postscript rightmargin
+    startpage topmargin verticalmargin ledding leeding prolog
+    CPAN Quarto Tabloid Unicode homepage
+
+=head1 NAME
+
+HTML::FormatPS - Format HTML as PostScript
+
+=head1 VERSION
+
+version 2.14
+
+=head1 SYNOPSIS
+
+    use HTML::TreeBuilder;
+    $tree = HTML::TreeBuilder->new->parse_file("test.html");
+
+    use HTML::FormatPS;
+    $formatter = HTML::FormatPS->new(
+        FontFamily => 'Helvetica',
+        PaperSize  => 'Letter',
+    );
+    print $formatter->format($tree);
+
+Or, for short:
+
+    use HTML::FormatPS;
+    print HTML::FormatPS->format_file(
+        "test.html",
+        'FontFamily' => 'Helvetica',
+        'PaperSize'  => 'Letter',
+    );
+
+=head1 DESCRIPTION
+
+The HTML::FormatPS is a formatter that outputs PostScript code. Formatting of
+HTML tables and forms is not implemented.
+
+HTML::FormatPS is built on top of L<HTML::Formatter> and so further detail may
+be found in the documentation for L<HTML::Formatter>.
+
+You might specify the following parameters when constructing the formatter
+object (or when calling format_file or format_string):
+
+=over 4
+
+=item PaperSize
+
+What kind of paper should we format for.  The value can be one of these: A3,
+A4, A5, B4, B5, Letter, Legal, Executive, Tabloid, Statement, Folio, 10x14,
+Quarto.
+
+The default is "A4".
+
+=item PaperWidth
+
+The width of the paper, in points.  Setting PaperSize also defines this value.
+
+=item PaperHeight
+
+The height of the paper, in points.  Setting PaperSize also defines this value.
+
+=item LeftMargin
+
+The left margin, in points.
+
+=item RightMargin
+
+The right margin, in points.
+
+=item HorizontalMargin
+
+Both left and right margin at the same time.  The default value is 4 cm.
+
+=item TopMargin
+
+The top margin, in points.
+
+=item BottomMargin
+
+The bottom margin, in points.
+
+=item VerticalMargin
+
+Both top and bottom margin at the same time.  The default value is 2 cm,
+
+=item PageNo
+
+This parameter determines if we should put page numbers on the pages. The
+default value is true; so you have to set this value to 0 in order to suppress
+page numbers.  (The "No" in "PageNo" means number/numero!)
+
+=item FontFamily
+
+This parameter specifies which family of fonts to use for the formatting. Legal
+values are "Courier", "Helvetica" and "Times".  The default is "Times".
+
+=item FontScale
+
+This is a scaling factor for all the font sizes.  The default value is 1.
+
+For example, if you want everything to be almost three times as large, you
+could set this to 2.7.  If you wanted things just a bit smaller than normal,
+you could set it to .92.
+
+=item Leading
+
+This option (pronounced "ledding", not "leeding") controls how much is space
+between lines. This is a factor of the font size used for that line.  Default
+is 0.1 -- so between two 12-point lines, there will be 1.2 points of space.
+
+=item StartPage
+
+Assuming you have PageNo on, StartPage controls what the page number of the
+first page will be. By default, it is 1. So if you set this to 87, the first
+page would say "87" on it, the next "88", and so on.
+
+=item NoProlog
+
+If this option is set to a true value, HTML::FormatPS will make a point of
+I<not> emitting the PostScript prolog before the document. By default, this is
+off, meaning that HTML::FormatPS I<will> emit the prolog. This option is of
+interest only to advanced users.
+
+=item NoTrailer
+
+If this option is set to a true value, HTML::FormatPS will make a point of
+I<not> emitting the PostScript trailer at the end of the document. By default,
+this is off, meaning that HTML::FormatPS I<will> emit the bit of PostScript
+that ends the document. This option is of interest only to advanced users.
+
+=back
+
+=head1 METHODS
+
+=head2 new
+
+    my $formatter = FormatterClass->new(
+        option1 => value1, option2 => value2, ...
+    );
+
+This creates a new formatter object with the given options.
+
+=head1 SEE ALSO
+
+L<HTML::Formatter>
+
+=head1 ISSUES
+
+=over
+
+=item *
+
+Output is in ISO Latin1 format. The underlying HTML parsers tend to now work in
+Unicode (perl native) code points. There is an impedance mismatch between
+these, which may give issues with complex characters within HTML.
+
+=back
+
+=head1 TO DO
+
+=over
+
+=item *
+
+Support for some more character styles, notably including: strike-through,
+underlining, superscript, and subscript.
+
+=item *
+
+Support for Unicode.
+
+=item *
+
+Support for Win-1252 encoding, since that's what most people mean when they use
+characters in the range 0x80-0x9F in HTML.
+
+=item *
+
+And, if it's ever even reasonably possible, support for tables.
+
+=back
+
+I would welcome email from people who can help me out or advise me on the
+above.
+
+=head1 INSTALLATION
+
+See perlmodinstall for information and options on installing Perl modules.
+
+=head1 BUGS AND LIMITATIONS
+
+You can make new bug reports, and view existing ones, through the
+web interface at L<http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Formatter>.
+
+=head1 AVAILABILITY
+
+The project homepage is L<https://metacpan.org/release/HTML-Formatter>.
+
+The latest version of this module is available from the Comprehensive Perl
+Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
+site near you, or see L<https://metacpan.org/module/HTML::Formatter/>.
+
+=head1 AUTHORS
+
+=over 4
+
+=item *
+
+Nigel Metheringham <nigelm@cpan.org>
+
+=item *
+
+Sean M Burke <sburke@cpan.org>
+
+=item *
+
+Gisle Aas <gisle@ActiveState.com>
+
+=back
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2015 by Nigel Metheringham, 2002-2005 Sean M Burke, 1999-2002 Gisle Aas.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
diff --git a/lib/site/HTML/FormatRTF.pm b/lib/site/HTML/FormatRTF.pm
new file mode 100644
index 000000000..7ccbd5e90
--- /dev/null
+++ b/lib/site/HTML/FormatRTF.pm
@@ -0,0 +1,675 @@
+package HTML::FormatRTF;
+
+# ABSTRACT: Format HTML as RTF
+
+
+use 5.006_001;
+use strict;
+use warnings;
+
+# We now use Smart::Comments in place of the old DEBUG framework.
+# this should be commented out in release versions....
+##use Smart::Comments;
+
+use base 'HTML::Formatter';
+
+our $VERSION = '2.14'; # VERSION
+our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
+
+# ------------------------------------------------------------------------
+my %Escape = (
+    map( ( chr($_), chr($_) ),    # things not apparently needing escaping
+        0x20 .. 0x7E ),
+    map( ( chr($_), sprintf( "\\'%02x", $_ ) ),    # apparently escapeworthy things
+        0x00 .. 0x1F, 0x5c, 0x7b, 0x7d, 0x7f .. 0xFF, 0x46 ),
+
+    # We get to escape out 'F' so that we can send RTF files thru the mail
+    # without the slightest worry that paragraphs beginning with "From"
+    # will get munged.
+
+    # And some refinements:
+    #"\n"   => "\n\\line ",
+    #"\cm"  => "\n\\line ",
+    #"\cj"  => "\n\\line ",
+
+    "\t" => "\\tab ",    # Tabs (altho theoretically raw \t's are okay)
+
+    # "\f"   => "\n\\page\n", # Formfeed
+    "-"    => "\\_",     # Turn plaintext '-' into a non-breaking hyphen
+    "\xA0" => "\\~",     # Latin-1 non-breaking space
+    "\xAD" => "\\-",     # Latin-1 soft (optional) hyphen
+
+    # CRAZY HACKS:
+    "\n" => "\\line\n",
+    "\r" => "\n",
+
+    # "\cb" => "{\n\\cs21\\lang1024\\noproof ",  # \\cf1
+    # "\cc" => "}",
+);
+
+# ------------------------------------------------------------------------
+sub default_values {
+    (   shift->SUPER::default_values(),
+        'lm' => 0,    # left margin
+        'rm' => 0,    # right margin (actually, maximum text width)
+
+        'head1_halfpoint_size'     => 32,
+        'head2_halfpoint_size'     => 28,
+        'head3_halfpoint_size'     => 25,
+        'head4_halfpoint_size'     => 22,
+        'head5_halfpoint_size'     => 20,
+        'head6_halfpoint_size'     => 18,
+        'codeblock_halfpoint_size' => 18,
+        'header_halfpoint_size'    => 17,
+        'normal_halfpoint_size'    => 22,
+    );
+}
+
+# ------------------------------------------------------------------------
+sub configure {
+    my ( $self, $hash ) = shift;
+
+    $self->{lm} = 0;
+    $self->{rm} = 0;
+
+    # include the hash parameters into self - as RT#56278
+    map { $self->{$_} = $hash->{$_} } keys %$hash if ( ref($hash) );
+    $self;
+}
+
+# ------------------------------------------------------------------------
+sub begin {
+    my $self = shift;
+
+    ### Start document...
+    $self->SUPER::begin;
+
+    $self->collect( $self->doc_init, $self->font_table, $self->stylesheet, $self->color_table, $self->doc_info,
+        $self->doc_really_start, "\n" )
+        unless $self->{'no_prolog'};
+
+    $self->{'Para'}       = '';
+    $self->{'quotelevel'} = 0;
+
+    return;
+}
+
+# ------------------------------------------------------------------------
+sub end {
+    my $self = shift;
+
+    $self->vspace(0);
+    $self->out('THIS IS NEVER SEEN');
+
+    # just to force the previous para to be written out.
+    $self->collect("}") unless $self->{'no_trailer'};    # ends the document
+
+    ### End document...
+    return;
+}
+
+# ------------------------------------------------------------------------
+sub vspace {
+    my $self = shift;
+
+    #$self->emit_para if defined $self->{'vspace'};
+    my $rv = $self->SUPER::vspace(@_);
+    $self->emit_para if defined $self->{'vspace'};
+    $rv;
+}
+
+# ------------------------------------------------------------------------
+sub stylesheet {
+
+    # TODO: maybe actually /use/ the character styles?
+
+    return sprintf <<'END',    # snazzy styles
+{\stylesheet
+{\snext0 Normal;}
+{\*\cs1 \additive Default Paragraph Font;}
+{\*\cs2 \additive \i\sbasedon1 html-ital;}
+{\*\cs3 \additive \b\sbasedon1 html-bold;}
+{\*\cs4 \additive \f1\sbasedon1 html-code;}
+
+{\s20\ql \f1\fs%s\lang1024\noproof\sbasedon0 \snext0 html-pre;}
+
+{\s31\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 html-head1;}
+{\s32\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 html-head2;}
+{\s33\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 html-head3;}
+{\s34\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 html-head4;}
+{\s35\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 html-head5;}
+{\s36\ql \keepn\sb90\sa180\f2\fs%s\ul\sbasedon0 \snext0 html-head6;}
+}
+
+END
+
+        @{ $_[0] }{
+        qw<
+            codeblock_halfpoint_size
+            head1_halfpoint_size
+            head2_halfpoint_size
+            head3_halfpoint_size
+            head4_halfpoint_size
+            head5_halfpoint_size
+            head6_halfpoint_size
+            >
+        };
+}
+
+# ------------------------------------------------------------------------
+# Override these as necessary for further customization
+
+sub font_table {
+    my $self = shift;
+
+    return sprintf <<'END' ,    # text font, code font, heading font
+{\fonttbl
+{\f0\froman %s;}
+{\f1\fmodern %s;}
+{\f2\fswiss %s;}
+}
+
+END
+
+        map {
+        ;                       # custom-dumb escaper:
+        my $x = $_;
+        $x =~ s/([\x00-\x1F\\\{\}\x7F-\xFF])/sprintf("\\'%02x", $1)/g;
+        $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+        $x;
+        }
+        $self->{'fontname_body'}     || 'Times',
+        $self->{'fontname_code'}     || 'Courier New',
+        $self->{'fontname_headings'} || 'Arial',
+        ;
+}
+
+# ------------------------------------------------------------------------
+sub doc_init {
+    return <<'END';
+{\rtf1\ansi\deff0
+
+END
+}
+
+# ------------------------------------------------------------------------
+sub color_table {
+    return <<'END';
+{\colortbl;\red255\green0\blue0;\red0\green0\blue255;}
+END
+}
+
+# ------------------------------------------------------------------------
+sub doc_info {
+    my $self = $_[0];
+
+    return sprintf <<'END', $self->version_tag;
+{\info{\doccomm generated by %s}
+{\author [see doc]}{\company [see doc]}{\operator [see doc]}
+}
+
+END
+
+}
+
+# ------------------------------------------------------------------------
+sub doc_really_start {
+    my $self = $_[0];
+
+    return sprintf <<'END',
+\deflang%s\widowctrl
+{\header\pard\qr\plain\f2\fs%s
+p.\chpgn\par}
+\fs%s
+
+END
+        $self->{'document_language'} || 1033, $self->{"header_halfpoint_size"}, $self->{"normal_halfpoint_size"},;
+}
+
+# ------------------------------------------------------------------------
+sub emit_para {    # rather like showline in FormatPS
+    my $self = shift;
+
+    my $para = $self->{'Para'};
+    $self->{'Para'} = undef;
+
+    #### emit_para called by: (caller(1) )[3];
+
+    unless ( defined $para ) {
+        #### emit_para with empty buffer...
+        return;
+    }
+
+    $para =~ s/^ +//s;
+    $para =~ s/ +$//s;
+
+    # And now: a not terribly clever algorithm for inserting newlines
+    # at a guaranteed harmless place: after a block of whitespace
+    # after the 65th column.  This was copied from RTF::Writer.
+    $para =~ s/(
+       [^\cm\cj\n]{65}        # Snare 65 characters from a line
+       [^\cm\cj\n\x20]{0,50}  #  and finish any current word
+      )
+      (\x20{1,10})(?![\cm\cj\n]) # capture some spaces not at line-end
+     /$1$2\n/gx    # and put a NL before those spaces
+        ;
+
+    $self->collect(
+        sprintf(
+            '{\pard\sa%d\li%d\ri%d%s\plain' . "\n",
+
+            #100 +
+            10 * $self->{'normal_halfpoint_size'} * ( $self->{'vspace'} || 0 ),
+
+            $self->{'lm'},
+            $self->{'rm'},
+
+            $self->{'center'} ? '\qc' : '\ql',
+        ),
+
+        defined( $self->{'next_bullet'} )
+        ? do {
+            my $bullet = $self->{'next_bullet'};
+            $self->{'next_bullet'} = undef;
+            sprintf "\\fi-%d\n%s",
+                4.5 * $self->{'normal_halfpoint_size'},
+                ( $bullet eq '*' ) ? "\\'95 " : ( rtf_esc($bullet) . ". " );
+            }
+        : (),
+
+        $para,
+        "\n\\par}\n\n",
+    );
+
+    $self->{'vspace'} = undef;    # we finally get to clear it here!
+
+    return;
+}
+
+# ------------------------------------------------------------------------
+sub new_font_size {
+    my $self = $_[0];
+
+    $self->out( \sprintf "{\\fs%u\n", $self->scale_font_for( $self->{'normal_halfpoint_size'} ) );
+}
+
+# ------------------------------------------------------------------------
+sub restore_font_size { shift->out( \'}' ) }
+
+# ------------------------------------------------------------------------
+sub hr_start {
+    my $self = shift;
+
+    # A bit of a hack:
+
+    $self->vspace(.3);
+    $self->out( \( '\qc\ul\f1\fs20\nocheck\lang1024 ' . ( '\~' x ( $self->{'hr_width'} || 50 ) ) ) );
+    $self->vspace(.7);
+    1;
+}
+
+# ------------------------------------------------------------------------
+
+sub br_start {
+    $_[0]->out( \"\\line\n" );
+}
+
+# ------------------------------------------------------------------------
+sub header_start {
+    my ( $self, $level ) = @_;
+
+    # for h1 ... h6's
+    # This really should have been called heading_start, but it's too late
+    #  to change now.
+
+    ### Heading of level: $level
+    #$self->adjust_lm(0); # assert new paragraph
+    $self->vspace(1.5);
+
+    $self->out(
+        \(  sprintf '\s3%s\ql\keepn\f2\fs%s\ul' . "\n", $level, $self->{ 'head' . $level . '_halfpoint_size' }, $level,
+        )
+    );
+
+    return 1;
+}
+
+# ------------------------------------------------------------------------
+sub header_end {
+
+    # This really should have been called heading_end but it's too late
+    #  to change now.
+
+    $_[0]->vspace(1);
+    1;
+}
+
+# ------------------------------------------------------------------------
+sub bullet {
+    my ( $self, $bullet ) = @_;
+
+    $self->{'next_bullet'} = $bullet;
+    return;
+}
+
+# ------------------------------------------------------------------------
+sub adjust_lm {
+    $_[0]->emit_para();
+    $_[0]->{'lm'} += $_[1] * $_[0]->{'normal_halfpoint_size'} * 5;
+    1;
+}
+
+# ------------------------------------------------------------------------
+sub adjust_rm {
+    $_[0]->emit_para();
+    $_[0]->{'rm'} -= $_[1] * $_[0]->{'normal_halfpoint_size'} * 5;
+    1;
+}    # Yes, flip the sign on the right margin!
+
+# BTW, halfpoints * 10 = twips
+
+# ------------------------------------------------------------------------
+sub pre_start {
+    my $self = shift;
+
+    $self->SUPER::pre_start(@_);
+    $self->out( \sprintf '\s20\f1\fs%s\noproof\lang1024\lang1076 ', $self->{'codeblock_halfpoint_size'}, );
+    return 1;
+}
+
+# ------------------------------------------------------------------------
+sub b_start      { shift->out( \'{\b ' ) }
+sub b_end        { shift->out( \'}' ) }
+sub i_start      { shift->out( \'{\i ' ) }
+sub i_end        { shift->out( \'}' ) }
+sub tt_start     { shift->out( \'{\f1\noproof\lang1024\lang1076 ' ) }
+sub tt_end       { shift->out( \'}' ) }
+sub sub_start    { shift->out( \'{\sub ' ) }
+sub sub_end      { shift->out( \'}' ) }
+sub sup_start    { shift->out( \'{\super ' ) }
+sub sup_end      { shift->out( \'}' ) }
+sub strike_start { shift->out( \'{\strike ' ) }
+sub strike_end   { shift->out( \'}' ) }
+
+# ------------------------------------------------------------------------
+sub q_start {
+    my $self = $_[0];
+
+    $self->out( ( ( ++$self->{'quotelevel'} ) % 2 ) ? \'\ldblquote ' : \'\lquote ' );
+}
+
+# ------------------------------------------------------------------------
+sub q_end {
+    my $self = $_[0];
+
+    $self->out( ( ( --$self->{'quotelevel'} ) % 2 ) ? \'\rquote ' : \'\rdblquote ' );
+}
+
+# ------------------------------------------------------------------------
+sub pre_out { $_[0]->out( ref( $_[1] ) ? $_[1] : \rtf_esc_codely( $_[1] ) ) }
+
+# ------------------------------------------------------------------------
+sub out {    # output a word (or, if escaped, chunk of RTF)
+    my $self = shift;
+
+    #return $self->pre_out(@_) if $self->{pre};
+
+    #### out called by: $_[0], (caller(1) )[3]
+
+    return unless defined $_[0];    # and length $_[0];
+
+    $self->{'Para'} = '' unless defined $self->{'Para'};
+    $self->{'Para'} .= ref( $_[0] ) ? ${ $_[0] } : rtf_esc( $_[0] );
+
+    return 1;
+}
+
+# ------------------------------------------------------------------------
+use integer;
+
+sub rtf_esc {
+    my $x;                          # scratch
+    if ( !defined wantarray ) {     # void context: alter in-place!
+        for (@_) {
+            s/([F\x00-\x1F\-\\\{\}\x7F-\xFF])/$Escape{$1}/g;    # ESCAPER
+            s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+        }
+        return;
+    }
+    elsif (wantarray) {                                         # return an array
+        return map {
+            ;
+            ( $x = $_ ) =~ s/([F\x00-\x1F\-\\\{\}\x7F-\xFF])/$Escape{$1}/g;    # ESCAPER
+            $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+
+            # Hyper-escape all Unicode characters.
+            $x;
+        } @_;
+    }
+    else {                                                                     # return a single scalar
+        ( $x = ( ( @_ == 1 ) ? $_[0] : join '', @_ ) ) =~ s/([F\x00-\x1F\-\\\{\}\x7F-\xFF])/$Escape{$1}/g;    # ESCAPER
+                 # Escape \, {, }, -, control chars, and 7f-ff.
+        $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+
+        # Hyper-escape all Unicode characters.
+        return $x;
+    }
+}
+
+# ------------------------------------------------------------------------
+sub rtf_esc_codely {
+
+    # Doesn't change "-" to hard-hyphen, nor apply computerese style
+
+    my $x;    # scratch
+    if ( !defined wantarray ) {    # void context: alter in-place!
+        for (@_) {
+            s/([F\x00-\x1F\\\{\}\x7F-\xFF])/$Escape{$1}/g;
+            s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+
+            # Hyper-escape all Unicode characters.
+        }
+        return;
+    }
+    elsif (wantarray) {            # return an array
+        return map {
+            ;
+            ( $x = $_ ) =~ s/([F\x00-\x1F\\\{\}\x7F-\xFF])/$Escape{$1}/g;
+            $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+
+            # Hyper-escape all Unicode characters.
+            $x;
+        } @_;
+    }
+    else {                         # return a single scalar
+        ( $x = ( ( @_ == 1 ) ? $_[0] : join '', @_ ) ) =~ s/([F\x00-\x1F\\\{\}\x7F-\xFF])/$Escape{$1}/g;
+
+        # Escape \, {, }, -, control chars, and 7f-ff.
+        $x =~ s/([^\x00-\xFF])/'\\uc1\\u'.((ord($1)<32768)?ord($1):(ord($1)-65536)).'?'/eg;
+
+        # Hyper-escape all Unicode characters.
+        return $x;
+    }
+}
+
+1;
+
+__END__
+
+=pod
+
+=for test_synopsis 1;
+__END__
+
+=for stopwords arial bookman lm pagenumber prolog rtf tahoma verdana CPAN
+    homepage rm sans serif twentieths
+
+=head1 NAME
+
+HTML::FormatRTF - Format HTML as RTF
+
+=head1 VERSION
+
+version 2.14
+
+=head1 SYNOPSIS
+
+  use HTML::FormatRTF;
+
+  my $out_file = "test.rtf";
+  open(RTF, ">$out_file")
+   or die "Can't write-open $out_file: $!\nAborting";
+
+  print RTF HTML::FormatRTF->format_file(
+    'test.html',
+      'fontname_headings' => "Verdana",
+  );
+  close(RTF);
+
+=head1 DESCRIPTION
+
+HTML::FormatRTF is a class for objects that you use to convert HTML to RTF.
+There is currently no proper support for tables or forms.
+
+This is a subclass of L<HTML::Formatter>, whose documentation you should
+consult for more information on underlying methods such as C<new>, C<format>,
+C<format_file> etc
+
+You can specify any of the following parameters in the call to C<new>,
+C<format_file>, or C<format_string>:
+
+=over
+
+=item lm
+
+Amount of I<extra> indenting to apply to the left margin, in twips
+(I<tw>entI<i>eths of a I<p>oint). Default is 0.
+
+So if you wanted the left margin to be an additional half inch larger, you'd
+set C<< lm => 720 >> (since there's 1440 twips in an inch). If you wanted it to
+be about 1.5cm larger, you'd set C<< lw => 850 >> (since there's about 567
+twips in a centimeter).
+
+=item rm
+
+Amount of I<extra> indenting to apply to the left margin, in twips
+(I<tw>entI<i>eths of a I<p>oint).  Default is 0.
+
+=item normal_halfpoint_size
+
+This is the size of normal text in the document, in I<half>-points. The default
+value is 22, meaning that normal text is in 11 point.
+
+=item header_halfpoint_size
+
+This is the size of text used in the document's page-header, in I<half>-points.
+The default value is 17, meaning that normal text is in 7.5 point.  Currently,
+the header consists just of "p. I<pagenumber>" in the upper-right-hand corner,
+and cannot be disabled.
+
+=item head1_halfpoint_size ... head6_halfpoint_size
+
+These control the font size of each heading level, in half-twips.  For example,
+the default for head3_halfpoint_size is 25, meaning that HTML C<< <h3>...</h3>
+>> text will be in 12.5 point text (in addition to being underlined and in the
+heading font).
+
+=item codeblock_halfpoint_size
+
+This controls the font size (in half-points) of the text used for C<<
+<pre>...</pre> >> text.  By default, it is 18, meaning 9 point.
+
+=item fontname_body
+
+This option controls what font is to be used for the body of the text -- that
+is, everything other than heading text and text in pre/code/tt elements. The
+default value is currently "Times".  Other handy values I can suggest using are
+"Georgia" or "Bookman Old Style".
+
+=item fontname_code
+
+This option controls what font is to be used for text in pre/code/tt elements.
+The default value is currently "Courier New".
+
+=item fontname_headings
+
+This option controls what font name is to be used for headings.  You can use
+the same font as fontname_body, but I prefer a sans-serif font, so the default
+value is currently "Arial".  Also consider "Tahoma" and "Verdana".
+
+=item document_language
+
+This option controls what Microsoft language number will be specified as the
+language for this document. The current default value is 1033, for US English.
+Consult an RTF reference for other language numbers.
+
+=item hr_width
+
+This option controls how many underline characters will be used for rendering a
+"<hr>" tag. Its default value is currently 50. You can usually leave this
+alone, but under some circumstances you might want to use a smaller or larger
+number.
+
+=item no_prolog
+
+If this option is set to a true value, HTML::FormatRTF will make a point of
+I<not> emitting the RTF prolog before the document.  By default, this is off,
+meaning that HTML::FormatRTF I<will> emit the prolog.  This option is of
+interest only to advanced users.
+
+=item no_trailer
+
+If this option is set to a true value, HTML::FormatRTF will make a point of
+I<not> emitting the RTF trailer at the end of the document.  By default, this
+is off, meaning that HTML::FormatRTF I<will> emit the bit of RTF that ends the
+document.  This option is of interest only to advanced users.
+
+=back
+
+=head1 SEE ALSO
+
+L<HTML::Formatter>, L<RTF::Writer>
+
+=head1 INSTALLATION
+
+See perlmodinstall for information and options on installing Perl modules.
+
+=head1 BUGS AND LIMITATIONS
+
+You can make new bug reports, and view existing ones, through the
+web interface at L<http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Formatter>.
+
+=head1 AVAILABILITY
+
+The project homepage is L<https://metacpan.org/release/HTML-Formatter>.
+
+The latest version of this module is available from the Comprehensive Perl
+Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
+site near you, or see L<https://metacpan.org/module/HTML::Formatter/>.
+
+=head1 AUTHORS
+
+=over 4
+
+=item *
+
+Nigel Metheringham <nigelm@cpan.org>
+
+=item *
+
+Sean M Burke <sburke@cpan.org>
+
+=item *
+
+Gisle Aas <gisle@ActiveState.com>
+
+=back
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2015 by Nigel Metheringham, 2002-2005 Sean M Burke, 1999-2002 Gisle Aas.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
diff --git a/lib/site/HTML/FormatText.pm b/lib/site/HTML/FormatText.pm
index 24a7b1b8c..4409e9425 100644
--- a/lib/site/HTML/FormatText.pm
+++ b/lib/site/HTML/FormatText.pm
@@ -1,99 +1,52 @@
 package HTML::FormatText;
 
-# $Id$
+# ABSTRACT: Format HTML as plaintext
 
-=head1 NAME
-
-HTML::FormatText - Format HTML as text
-
-=head1 SYNOPSIS
-
- require HTML::FormatText;
- $html = parse_htmlfile("test.html");
- $formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 50);
- print $formatter->format($html);
-
-=head1 DESCRIPTION
-
-The HTML::FormatText is a formatter that outputs plain latin1 text.
-All character attributes (bold/italic/underline) are ignored.
-Formatting of HTML tables and forms is not implemented.
-
-You might specify the following parameters when constructing the
-formatter:
-
-=over 4
-
-=item I<leftmargin> (alias I<lm>)
-
-The column of the left margin. The default is 3.
-
-=item I<rightmargin> (alias I<rm>)
-
-The column of the right margin. The default is 72.
-
-=back
-
-=head1 SEE ALSO
-
-L<HTML::Formatter>
-
-=head1 COPYRIGHT
-
-Copyright (c) 1995-1998 Gisle Aas. All rights reserved.
-
-This library is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
-
-=head1 AUTHOR
-
-Gisle Aas <aas@sn.no>
-
-=cut
 
+use 5.006_001;
 use strict;
-use vars qw(@ISA $VERSION);
+use warnings;
 
-                                # Make sure we override any local Formatter with our modified one
-                                #   - the default one does not look into tables
-#equire HTML::Formatter;
-require "../lib/site/HTML/Formatter.pm";
+# We now use Smart::Comments in place of the old DEBUG framework.
+# this should be commented out in release versions....
+##use Smart::Comments;
 
-@ISA = qw(HTML::Formatter);
+use base 'HTML::Formatter';
 
-($VERSION) = q$Revision$ =~ /: (\d+)/;
+our $VERSION = '2.14'; # VERSION
+our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
 
-
-sub default_values
-{
-    (
-     lm =>  3, # left margin
-     rm => 72, # right margin (actually, maximum text width)
+# ------------------------------------------------------------------------
+sub default_values {
+    (   shift->SUPER::default_values(),
+        lm => 3,     # left margin
+        rm => 72,    # right margin (actually, maximum text width)
     );
 }
 
-sub configure
-{
-    my($self,$hash) = @_;
+# ------------------------------------------------------------------------
+sub configure {
+    my ( $self, $hash ) = @_;
+
     my $lm = $self->{lm};
     my $rm = $self->{rm};
 
-    $lm = delete $hash->{lm} if exists $hash->{lm};
-    $lm = delete $hash->{leftmargin} if exists $hash->{leftmargin};
-    $rm = delete $hash->{rm} if exists $hash->{rm};
+    $lm = delete $hash->{lm}          if exists $hash->{lm};
+    $lm = delete $hash->{leftmargin}  if exists $hash->{leftmargin};
+    $rm = delete $hash->{rm}          if exists $hash->{rm};
     $rm = delete $hash->{rightmargin} if exists $hash->{rightmargin};
 
     my $width = $rm - $lm;
-    if ($width < 1) {
-	warn "Bad margins, ignored" if $^W;
-	return;
+    if ( $width < 1 ) {
+        warn "Bad margins, ignored" if $^W;
+        return;
     }
-    if ($width < 20) {
-	warn "Page probably too narrow" if $^W;
+    if ( $width < 20 ) {
+        warn "Page probably too narrow" if $^W;
     }
 
-    for (keys %$hash) {
-	warn "Unknown configure option '$_'" if $^W;
+    for ( keys %$hash ) {
+        warn "Unknown configure option '$_'" if $^W;
     }
 
     $self->{lm} = $lm;
@@ -101,103 +54,114 @@ sub configure
     $self;
 }
 
-
-sub begin
-{
+# ------------------------------------------------------------------------
+sub begin {
     my $self = shift;
-    $self->HTML::Formatter::begin;
-    $self->{curpos} = 0;  # current output position.
-    $self->{maxpos} = 0;  # highest value of $pos (used by header underliner)
-    $self->{hspace} = 0;  # horizontal space pending flag
-}
 
+    $self->SUPER::begin;
+    $self->{curpos} = 0;    # current output position.
+    $self->{maxpos} = 0;    # highest value of $pos (used by header underliner)
+    $self->{hspace} = 0;    # horizontal space pending flag
+}
 
-sub end
-{
+# ------------------------------------------------------------------------
+sub end {
     shift->collect("\n");
 }
 
+# ------------------------------------------------------------------------
+sub header_start {
+    my ( $self, $level ) = @_;
 
-sub header_start
-{
-    my($self, $level, $node) = @_;
-    $self->vspace(1 + (6-$level) * 0.4);
+    $self->vspace( 1 + ( 6 - $level ) * 0.4 );
     $self->{maxpos} = 0;
     1;
 }
 
-sub header_end
-{
-    my($self, $level, $node) = @_;
-    if ($level <= 2) {
-	my $line;
-	$line = '=' if $level == 1;
-	$line = '-' if $level == 2;
-	$self->vspace(0);
-	$self->out($line x ($self->{maxpos} - $self->{lm}));
+# ------------------------------------------------------------------------
+sub header_end {
+    my ( $self, $level ) = @_;
+
+    if ( $level <= 2 ) {
+        my $line;
+        $line = '=' if $level == 1;
+        $line = '-' if $level == 2;
+        $self->vspace(0);
+        $self->out( $line x ( $self->{maxpos} - $self->{lm} ) );
     }
     $self->vspace(1);
     1;
 }
 
+# ------------------------------------------------------------------------
+sub bullet {
+    my $self = shift;
+
+    $self->SUPER::bullet( $_[0] . ' ' );
+}
 
-sub hr_start
-{
+# ------------------------------------------------------------------------
+sub hr_start {
     my $self = shift;
+
     $self->vspace(1);
-    $self->out('-' x ($self->{rm} - $self->{lm}));
+    $self->out( '-' x ( $self->{rm} - $self->{lm} ) );
     $self->vspace(1);
 }
 
-
-sub pre_out
-{
+# ------------------------------------------------------------------------
+sub pre_out {
     my $self = shift;
+
     # should really handle bold/italic etc.
-    if (defined $self->{vspace}) {
-	if ($self->{out}) {
-	    $self->nl() while $self->{vspace}-- >= 0;
-	    $self->{vspace} = undef;
-	}
+    if ( defined $self->{vspace} ) {
+        if ( $self->{out} ) {
+            $self->nl() while $self->{vspace}-- >= 0;
+            $self->{vspace} = undef;
+        }
     }
     my $indent = ' ' x $self->{lm};
-    my $pre = shift;
+    my $pre    = shift;
     $pre =~ s/^/$indent/mg;
     $self->collect($pre);
     $self->{out}++;
 }
 
-
-sub out
-{
+# ------------------------------------------------------------------------
+sub out {
     my $self = shift;
     my $text = shift;
 
-    if ($text =~ /^\s*$/) {
-	$self->{hspace} = 1;
-	return;
+    $text =~ tr/\xA0\xAD/ /d;
+
+    if ( $text =~ /^\s*$/ ) {
+        $self->{hspace} = 1;
+        return;
     }
 
-    if (defined $self->{vspace}) {
-	if ($self->{out}) {
-	    $self->nl while $self->{vspace}-- >= 0;
+    if ( defined $self->{vspace} ) {
+        if ( $self->{out} ) {
+            $self->nl while $self->{vspace}-- >= 0;
         }
-	$self->goto_lm;
-	$self->{vspace} = undef;
-	$self->{hspace} = 0;
+        $self->goto_lm;
+        $self->{vspace} = undef;
+        $self->{hspace} = 0;
     }
 
-    if ($self->{hspace}) {
-	if ($self->{curpos} + length($text) > $self->{rm}) {
-	    # word will not fit on line; do a line break
-	    $self->nl;
-	    $self->goto_lm;
-	} else {
-	    # word fits on line; use a space
-	    $self->collect(' ');
-	    ++$self->{curpos};
-	}
-	$self->{hspace} = 0;
+    if ( $self->{hspace} ) {
+        if ( $self->{curpos} + length($text) > $self->{rm} ) {
+
+            # word will not fit on line; do a line break
+            $self->nl;
+            $self->goto_lm;
+        }
+        else {
+
+            # word fits on line; use a space
+            $self->collect(' ');
+            ++$self->{curpos};
+        }
+        $self->{hspace} = 0;
     }
 
     $self->collect($text);
@@ -206,39 +170,145 @@ sub out
     $self->{'out'}++;
 }
 
-
-sub goto_lm
-{
+# ------------------------------------------------------------------------
+sub goto_lm {
     my $self = shift;
+
     my $pos = $self->{curpos};
     my $lm  = $self->{lm};
-    if ($pos < $lm) {
-	$self->{curpos} = $lm;
-	$self->collect(" " x ($lm - $pos));
+    if ( $pos < $lm ) {
+        $self->{curpos} = $lm;
+        $self->collect( " " x ( $lm - $pos ) );
     }
 }
 
-
-sub nl
-{
+# ------------------------------------------------------------------------
+sub nl {
     my $self = shift;
+
     $self->{'out'}++;
     $self->{curpos} = 0;
     $self->collect("\n");
 }
 
-
-sub adjust_lm
-{
+# ------------------------------------------------------------------------
+sub adjust_lm {
     my $self = shift;
+
     $self->{lm} += $_[0];
     $self->goto_lm;
 }
 
-
-sub adjust_rm
-{
+# ------------------------------------------------------------------------
+sub adjust_rm {
     shift->{rm} += $_[0];
 }
 
+
 1;
+
+__END__
+
+=pod
+
+=for test_synopsis 1;
+__END__
+
+=for stopwords latin1 leftmargin lm plaintext rightmargin rm CPAN homepage
+
+=head1 NAME
+
+HTML::FormatText - Format HTML as plaintext
+
+=head1 VERSION
+
+version 2.14
+
+=head1 SYNOPSIS
+
+    use HTML::TreeBuilder;
+    $tree = HTML::TreeBuilder->new->parse_file("test.html");
+
+    use HTML::FormatText;
+    $formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 50);
+    print $formatter->format($tree);
+
+or, more simply:
+
+    use HTML::FormatText;
+    my $string = HTML::FormatText->format_file(
+        'test.html',
+        leftmargin => 0, rightmargin => 50
+        );
+
+=head1 DESCRIPTION
+
+HTML::FormatText is a formatter that outputs plain latin1 text. All character
+attributes (bold/italic/underline) are ignored. Formatting of HTML tables and
+forms is not implemented.
+
+HTML::FormatText is built on L<HTML::Formatter> and documentation for that
+module applies to this - especially L<HTML::Formatter/new>,
+L<HTML::Formatter/format_file> and L<HTML::Formatter/format_string>.
+
+You might specify the following parameters when constructing the formatter:
+
+=over 4
+
+=item I<leftmargin> (alias I<lm>)
+
+The column of the left margin. The default is 3.
+
+=item I<rightmargin> (alias I<rm>)
+
+The column of the right margin. The default is 72.
+
+=back
+
+=head1 SEE ALSO
+
+L<HTML::Formatter>
+
+=head1 INSTALLATION
+
+See perlmodinstall for information and options on installing Perl modules.
+
+=head1 BUGS AND LIMITATIONS
+
+You can make new bug reports, and view existing ones, through the
+web interface at L<http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Formatter>.
+
+=head1 AVAILABILITY
+
+The project homepage is L<https://metacpan.org/release/HTML-Formatter>.
+
+The latest version of this module is available from the Comprehensive Perl
+Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
+site near you, or see L<https://metacpan.org/module/HTML::Formatter/>.
+
+=head1 AUTHORS
+
+=over 4
+
+=item *
+
+Nigel Metheringham <nigelm@cpan.org>
+
+=item *
+
+Sean M Burke <sburke@cpan.org>
+
+=item *
+
+Gisle Aas <gisle@ActiveState.com>
+
+=back
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2015 by Nigel Metheringham, 2002-2005 Sean M Burke, 1999-2002 Gisle Aas.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
diff --git a/lib/site/HTML/Formatter.pm b/lib/site/HTML/Formatter.pm
index fba28ecdf..1a9381eb1 100644
--- a/lib/site/HTML/Formatter.pm
+++ b/lib/site/HTML/Formatter.pm
@@ -1,107 +1,179 @@
+package HTML::Formatter;
 
-# 11/29/98 bwinter ... modified to allow for tables.  Other HTML code is in perl directory
-#  - make sure this replaces the HTML/Formatter.pm file, so tables will be parsed
+# ABSTRACT: Base class for HTML formatters
 
-package HTML::Formatter;
 
-# $Id$
+use 5.006_001;
+use strict;
+use warnings;
 
-=head1 NAME
+use Carp;
+use HTML::Element 3.15 ();
 
-HTML::Formatter - Base class for HTML formatters
+# We now use Smart::Comments in place of the old DEBUG framework.
+# this should be commented out in release versions....
+##use Smart::Comments;
 
-=head1 SYNOPSIS
+our $VERSION = '2.14'; # VERSION
+our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
 
- package HTML::FormatXX;
- require HTML::Formatter;
- @ISA=qw(HTML::Formatter);
+#
+# A typical formatter will not use all of the features of this
+# class.  But it will use some, as best fits the mapping
+# of HTML to the particular output format.
+#
 
-=head1 DESCRIPTION
+# ------------------------------------------------------------------------
 
-HTML formatters are able to format a HTML syntax tree into various
-printable formats.  Different formatters produce output for different
-output media.  Common for all formatters are that they will return the
-formatted output when the format() method is called.  Format() takes a
-HTML::Element as parameter.
 
-=head1 SEE ALSO
+sub new {
+    my ( $class, %arg ) = @_;
 
-L<HTML::FormatText>, L<HTML::FormatPS>, L<HTML::Element>
+    my $self = bless { $class->default_values }, $class;
+    $self->configure( \%arg ) if keys %arg;
 
-=head1 COPYRIGHT
+    return $self;
+}
 
-Copyright (c) 1995-1998 Gisle Aas. All rights reserved.
+# ------------------------------------------------------------------------
+sub default_values {
+    ();
+}
 
-This library is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
+# ------------------------------------------------------------------------
+sub configure {
+    my ( $self, $arg ) = @_;
 
-=head1 AUTHOR
+    for ( keys %$arg ) {
+        warn "Unknown configure argument '$_'" if $^W;
+    }
 
-Gisle Aas <aas@sn.no>
+    return $self;
+}
 
-=cut
+# ------------------------------------------------------------------------
+sub massage_tree {
+    my ( $self, $html ) = @_;
 
+    return if $html->tag eq 'p';    # sanity
 
-require HTML::Element;
+    ### Before massaging: $html->dump()
 
-use strict;
-use Carp;
-use UNIVERSAL qw(can);
+    $html->simplify_pres();
 
-use vars qw($VERSION);
-($VERSION) = q$Revision$ =~ /: (\d+)/;
+    # Does anything else need doing?
+    ### After massaging: $html->dump()
 
-sub new
-{
-    my($class,%arg) = @_;
-    my $self = bless { $class->default_values }, $class;
-    $self->configure(\%arg) if scalar(%arg);
-    $self;
+    return;
 }
 
-sub default_values
-{
-    ();
+# ------------------------------------------------------------------------
+
+
+sub format_from_file { return shift->format_file(@_); }
+
+sub format_file {
+    my ( $self, $filename, @params ) = @_;
+
+    $self = $self->new(@params) unless ref $self;
+
+    croak "What filename to format from?"
+        unless ( defined($filename) and length($filename) );
+
+    my $tree = $self->_default_tree();
+    $tree->parse_file($filename);
+
+    my $out = $self->format($tree);
+    $tree->delete;
+
+    return $out;
 }
 
-sub configure
-{
-    my($self, $arg) = @_;
-    for (keys %$arg) {
-	warn "Unknown configure argument '$_'" if $^W;
-    }
-    $self;
+# ------------------------------------------------------------------------
+
+
+# ------------------------------------------------------------------------
+sub format_from_string { shift->format_string(@_) }
+
+sub format_string {
+    my ( $self, $content, @params ) = @_;
+
+    $self = $self->new(@params) unless ref $self;
+
+    croak "What string to format?" unless defined $content;
+
+    my $tree = $self->_default_tree();
+    $tree->parse($content);
+    $tree->eof();
+    undef $content;
+
+    my $out = $self->format($tree);
+    $tree->delete;
+
+    return $out;
 }
 
-sub format
-{
-    my($self, $html) = @_;
-    $self->begin();
+# ------------------------------------------------------------------------
+sub _default_tree {
+    require HTML::TreeBuilder;
+    my $t = HTML::TreeBuilder->new;
+
+    # If nothing else works, try using these parser options:s
+    #$t->implicit_body_p_tag(1);
+    #$t->p_strict(1);
+
+    return $t;
+}
+
+# ------------------------------------------------------------------------
+
+
+sub format {
+    my ( $self, $html ) = @_;
+
+    croak "Usage: \$formatter->format(\$tree)" unless ( defined($html) and ref($html) and $html->can('tag') );
+
+    #### Tree to format: $html->dump
+
+    $self->set_version_tag($html);
+    $self->massage_tree($html);
+    $self->begin($html);
+    $html->number_lists();
+
+    # Per-iteration scratch:
+    my ( $node, $start, $depth, $tag, $func );
     $html->traverse(
-	sub {
-	    my($node, $start, $depth) = @_;
-	    if (ref $node) {
-		my $tag = $node->tag;
-		my $func = $tag . '_' . ($start ? "start" : "end");
-		# Use UNIVERSAL::can so that we can recover if
-		# a handler is not defined for the tag.
-		if (can($self, $func)) {
-		    return $self->$func($node);
-		} else {
-		    return 1;
-		}
-	    } else {
-		$self->textflow($node);
-	    }
-	    1;
-	}
-     );
-    $self->end();
-    join('', @{$self->{output}});
-}
-
-sub begin
-{
+        sub {
+            ( $node, $start, $depth ) = @_;
+            if ( ref $node ) {
+                $tag = $node->tag;
+                $func = $tag . '_' . ( $start ? "start" : "end" );
+
+                # Use ->can so that we can recover if
+                # a handler is not defined for the tag.
+                if ( $self->can($func) ) {
+                    ### Calling : ('  ' x $depth) . $func
+                    return $self->$func($node);
+                }
+                else {
+                    ### Skipping: ('  ' x $depth) . $func
+                    return 1;
+                }
+            }
+            else {
+                $self->textflow($node);
+            }
+            1;
+        }
+    );
+
+    $self->end($html);
+
+    return join( '', @{ $self->{output} } );
+}
+
+# ------------------------------------------------------------------------
+sub begin {
     my $self = shift;
 
     # Flags
@@ -110,483 +182,790 @@ sub begin
     $self->{bold}      = 0;
     $self->{italic}    = 0;
     $self->{center}    = 0;
-    $self->{nobr}      = 0;
 
-    $self->{font_size}     = [3];   # last element is current size
+    $self->{superscript}   = 0;
+    $self->{subscript}     = 0;
+    $self->{strikethrough} = 0;
+
+    $self->{center_stack} = [];    # push and pop 'center' states to it
+    $self->{nobr}         = 0;
+
+    $self->{'font_size'} = [3];     # last element is current size
     $self->{basefont_size} = [3];
 
-    $self->{markers} = [];          # last element is current marker
     $self->{vspace} = undef;        # vertical space (dimension)
 
     $self->{output} = [];
 }
 
-sub end
-{
-}
+# ------------------------------------------------------------------------
+sub end { }
 
-sub html_start { 1; }  sub html_end {}
-sub head_start { 0; }
-sub body_start { 1; }  sub body_end {}
+# ------------------------------------------------------------------------
+sub set_version_tag {
+    my ( $self, $html ) = @_;
 
-sub header_start
-{
-    my($self, $level, $node) = @_;
-    my $align = $node->attr('align');
-    if (defined($align) && lc($align) eq 'center') {
-	$self->{center}++;
+    if ($html) {
+        $self->{'version_tag'} = sprintf(
+            "%s (v%s, using %s v%s%s)",
+            ref($self), $self->VERSION || '?',
+            ref($html),
+            $html->VERSION || '?',
+            $HTML::Parser::VERSION ? ", and HTML::Parser v$HTML::Parser::VERSION" : ''
+        );
     }
-    1,
-}
-
-sub header_end
-{
-    my($self, $level, $node) = @_;
-    my $align = $node->attr('align');
-    if (defined($align) && lc($align) eq 'center') {
-	$self->{center}--;
+    elsif ($HTML::Parser::VERSION) {
+        $self->{'version_tag'} =
+            sprintf( "%s (v%s, using %s)", ref($self), $self->VERSION || "?", "HTML::Parser v$HTML::Parser::VERSION", );
+    }
+    else {
+        $self->{'version_tag'} = sprintf( "%s (v%s)", ref($self), $self->VERSION || '?', );
     }
 }
 
-sub h1_start { shift->header_start(1, @_) }
-sub h2_start { shift->header_start(2, @_) }
-sub h3_start { shift->header_start(3, @_) }
-sub h4_start { shift->header_start(4, @_) }
-sub h5_start { shift->header_start(5, @_) }
-sub h6_start { shift->header_start(6, @_) }
-
-sub h1_end   { shift->header_end(1, @_) }
-sub h2_end   { shift->header_end(2, @_) }
-sub h3_end   { shift->header_end(3, @_) }
-sub h4_end   { shift->header_end(4, @_) }
-sub h5_end   { shift->header_end(5, @_) }
-sub h6_end   { shift->header_end(6, @_) }
-
-sub br_start
-{
-    my $self = shift;
-    $self->vspace(0, 1);
-}
+# ------------------------------------------------------------------------
+sub version_tag { shift->{'version_tag'} }
 
-sub hr_start
-{
-    my $self = shift;
-    $self->vspace(1);
-}
+# ------------------------------------------------------------------------
+sub html_start     { 1; }
+sub html_end       { }
+sub body_start     { 1; }
+sub body_end       { }
+sub head_start     { 0; }
+sub script_start   { 0; }
+sub style_start    { 0; }
+sub frameset_start { 0; }
 
-sub img_start
-{
-    shift->out(shift->attr('alt') || "[IMAGE]");
-}
+# ------------------------------------------------------------------------
+sub header_start {
+    my ( $self, undef, $node ) = @_;
 
-sub a_start
-{
-    shift->{anchor}++;
+    my $align = $node->attr('align');
+    if ( defined($align) && lc($align) eq 'center' ) {
+        $self->{center}++;
+    }
     1;
 }
 
-sub a_end
-{
-    shift->{anchor}--;
-}
+# ------------------------------------------------------------------------
+sub header_end {
+    my ( $self, undef, $node ) = @_;
 
-sub u_start
-{
-    shift->{underline}++;
-    1;
+    my $align = $node->attr('align');
+    if ( defined($align) && lc($align) eq 'center' ) {
+        $self->{center}--;
+    }
 }
 
-sub u_end
-{
-    shift->{underline}--;
-}
+# ------------------------------------------------------------------------
+sub h1_start { shift->header_start( 1, @_ ) }
+sub h2_start { shift->header_start( 2, @_ ) }
+sub h3_start { shift->header_start( 3, @_ ) }
+sub h4_start { shift->header_start( 4, @_ ) }
+sub h5_start { shift->header_start( 5, @_ ) }
+sub h6_start { shift->header_start( 6, @_ ) }
+
+# ------------------------------------------------------------------------
+sub h1_end { shift->header_end( 1, @_ ) }
+sub h2_end { shift->header_end( 2, @_ ) }
+sub h3_end { shift->header_end( 3, @_ ) }
+sub h4_end { shift->header_end( 4, @_ ) }
+sub h5_end { shift->header_end( 5, @_ ) }
+sub h6_end { shift->header_end( 6, @_ ) }
+
+sub br_start { my $self = shift; $self->vspace( 0, 1 ); }
+sub hr_start { my $self = shift; $self->vspace(1); 1; }
+
+# ------------------------------------------------------------------------
+sub img_start {
+    my ( $self, $node ) = @_;
+
+    my $alt = $node->attr('alt');
+    $self->out( defined($alt) ? $alt : "[IMAGE]" );
+}
+
+# ------------------------------------------------------------------------
+sub a_start      { shift->{anchor}++;    1; }
+sub a_end        { shift->{anchor}--; }
+sub u_start      { shift->{underline}++; 1; }
+sub u_end        { shift->{underline}--; }
+sub b_start      { shift->{bold}++;      1; }
+sub b_end        { shift->{bold}--; }
+sub tt_start     { shift->{teletype}++;  1; }
+sub tt_end       { shift->{teletype}--; }
+sub i_start      { shift->{italic}++;    1; }
+sub i_end        { shift->{italic}--; }
+sub center_start { shift->{center}++;    1; }
+sub center_end   { shift->{center}--; }
+
+# ------------------------------------------------------------------------
+sub div_start {    # interesting only for its 'align' attribute
+    my ( $self, $node ) = @_;
 
-sub b_start
-{
-    shift->{bold}++;
+    my $align = $node->attr('align');
+    if ( defined($align) && lc($align) eq 'center' ) {
+        return $self->center_start;
+    }
     1;
 }
 
-sub b_end
-{
-    shift->{bold}--;
-}
+# ------------------------------------------------------------------------
+sub div_end {
+    my ( $self, $node ) = @_;
 
-sub tt_start
-{
-    shift->{teletype}++;
-    1;
+    my $align = $node->attr('align');
+    if ( defined($align) && lc($align) eq 'center' ) {
+        return $self->center_end;
+    }
 }
 
-sub tt_end
-{
-    shift->{teletype}--;
-}
+# ------------------------------------------------------------------------
+sub nobr_start { shift->{nobr}++; 1; }
+sub nobr_end   { shift->{nobr}--; }
+sub wbr_start  { 1; }
 
-sub i_start
-{
-    shift->{italic}++;
-    1;
-}
+# ------------------------------------------------------------------------
+sub font_start {
+    my ( $self, $elem ) = @_;
 
-sub i_end
-{
-    shift->{italic}--;
-}
+    my $size = $elem->attr('size');
+    return 1 unless ( defined($size) );
+    if ( $size =~ /^\s*[+\-]/ ) {
+        my $base = $self->{basefont_size}[-1];
 
-sub center_start
-{
-    shift->{center}++;
+        # yes, base it on the most recent one
+        $size = $base + $size;
+    }
+    push @{ $self->{'font_size'} }, $size;
+    $self->new_font_size($size);
     1;
 }
 
-sub center_end
-{
-    shift->{center}--;
+# ------------------------------------------------------------------------
+sub font_end {
+    my ( $self, $elem ) = @_;
+    my $size = $elem->attr('size');
+    return unless defined $size;
+    pop @{ $self->{'font_size'} };
+    $self->restore_font_size( $self->{'font_size'}[-1] );
 }
 
-sub nobr_start
-{
-    shift->{nobr}++;
+# ------------------------------------------------------------------------
+sub big_start {
+    my $self = $_[0];
+    push @{ $self->{'font_size'} }, $self->{basefont_size}[-1] + 1;    # same as font size="+1"
+    $self->new_font_size( $self->{'font_size'}[-1] );
     1;
 }
 
-sub nobr_end
-{
-    shift->{nobr}--;
-}
-
-sub wbr_start
-{
+# ------------------------------------------------------------------------
+sub small_start {
+    my $self = $_[0];
+    push @{ $self->{'font_size'} }, $self->{basefont_size}[-1] - 1,    # same as font size="-1"
+        ;
+    $self->new_font_size( $self->{'font_size'}[-1] );
     1;
 }
 
-sub font_start
-{
-    my($self, $elem) = @_;
-    my $size = $elem->attr('size');
-    return 1 unless defined $size;
-    if ($size =~ /^\s*[+\-]/) {
-	my $base = $self->{basefont_size}[-1];
-	$size = $base + $size;
-    }
-    push(@{$self->{font_size}}, $size);
+# ------------------------------------------------------------------------
+sub big_end {
+    my $self = $_[0];
+    pop @{ $self->{'font_size'} };
+    $self->restore_font_size( $self->{'font_size'}[-1] );
     1;
 }
 
-sub font_end
-{
-    my($self, $elem) = @_;
-    my $size = $elem->attr('size');
-    return unless defined $size;
-    pop(@{$self->{font_size}});
+# ------------------------------------------------------------------------
+sub small_end {
+    my $self = $_[0];
+    pop @{ $self->{'font_size'} };
+    $self->restore_font_size( $self->{'font_size'}[-1] );
+    1;
 }
 
-sub basefont_start
-{
-    my($self, $elem) = @_;
+# ------------------------------------------------------------------------
+sub basefont_start {
+    my ( $self, $elem ) = @_;
     my $size = $elem->attr('size');
     return unless defined $size;
-    push(@{$self->{basefont_size}}, $size);
+    push( @{ $self->{basefont_size} }, $size );
     1;
 }
 
-sub basefont_end
-{
-    my($self, $elem) = @_;
+# ------------------------------------------------------------------------
+sub basefont_end {
+    my ( $self, $elem ) = @_;
     my $size = $elem->attr('size');
     return unless defined $size;
-    pop(@{$self->{basefont_size}});
-}
-
-# Aliases for logical markup
-BEGIN {
-    *cite_start   = \&i_start;
-    *cite_end     = \&i_end;
-    *code_start   = \&tt_start;
-    *code_end     = \&tt_end;
-    *em_start     = \&i_start;
-    *em_end       = \&i_end;
-    *kbd_start    = \&tt_start;
-    *kbd_end      = \&tt_end;
-    *samp_start   = \&tt_start;
-    *samp_end     = \&tt_end;
-    *strong_start = \&b_start;
-    *strong_end   = \&b_end;
-    *var_start    = \&tt_start;
-    *var_end      = \&tt_end;
-}
-
-sub p_start
-{
+    pop( @{ $self->{basefont_size} } );
+}
+
+# ------------------------------------------------------------------------
+#
+# Override in subclasses, if you like.
+#
+sub new_font_size     { }    #my( $self, $font_size_number ) = @_;
+sub restore_font_size { }    #my( $self, $font_size_number ) = @_;
+
+# ------------------------------------------------------------------------
+sub q_start      { shift->out(q<">);         1; }
+sub q_end        { shift->out(q<">);         1; }
+sub sup_start    { shift->{superscript}++;   1; }
+sub sup_end      { shift->{superscript}--;   1; }
+sub sub_start    { shift->{subscript}++;     1; }
+sub sub_end      { shift->{subscript}--;     1; }
+sub strike_start { shift->{strikethrough}++; 1; }
+sub strike_end   { shift->{strikethrough}--; 1; }
+sub s_start      { shift->strike_start(@_); }
+sub s_end        { shift->strike_end(@_); }
+sub dfn_start    { 1; }
+sub dfn_end      { 1; }
+sub abbr_start   { 1; }
+sub abbr_end     { 1; }
+sub acronym_start { 1; }
+sub acronym_end   { 1; }
+sub span_start    { 1; }
+sub span_end      { 1; }
+sub ins_start     { 1; }
+sub ins_end       { 1; }
+sub del_start     { 0; }    # Don't render the del'd bits
+sub del_end       { 0; }
+
+# ------------------------------------------------------------------------
+my @Size_magic_numbers = (
+    0.60, 0.75, 0.89, 1, 1.20, 1.50, 2.00, 3.00
+
+        # #0    #1    #2   #3     #4     #5     #6     #7
+        #________________ - | + _________________________
+        # -3    -2    -1    0     +1     +2     +3     +4
+);
+
+# ------------------------------------------------------------------------
+sub scale_font_for {
+    my ( $self, $reference_size ) = @_;
+
+    # Mozilla's source, at
+    # http://lxr.mozilla.org/seamonkey/source/content/html/style/src/nsStyleUtil.cpp#299
+    # says:
+    #  static PRInt32 sFontSizeFactors[8] = { 60,75,89,100,120,150,200,300 };
+    #
+    # For comparison, Gisle's earlier HTML::FormatPS has:
+    #    |           # size   0   1   2   3   4   5   6   7
+    #    | @FontSizes = ( 5,  6,  8, 10, 12, 14, 18, 24, 32);
+    # ...and gets different sizing via just a scaling factor.
+
+    my $size_number = int( defined( $_[2] ) ? $_[2] : $self->{'font_size'}[-1] );
+
+    # force the size_number into range:
+    $size_number =
+          ( $size_number < 0 )                    ? 0
+        : ( $size_number > $#Size_magic_numbers ) ? $#Size_magic_numbers
+        :                                           int($size_number);
+
+    my $result = int( .5 + $reference_size * $Size_magic_numbers[$size_number] );
+
+    ### Scale Font: sprintf("reference %s, size %s => %s",  $reference_size, $size_number, $result);
+
+    return $result;
+}
+
+# ------------------------------------------------------------------------
+# Aliases for logical markup:
+sub strong_start { shift->b_start(@_) }
+sub strong_end   { shift->b_end(@_) }
+sub cite_start   { shift->i_start(@_) }
+sub cite_end     { shift->i_end(@_) }
+sub em_start     { shift->i_start(@_) }
+sub em_end       { shift->i_end(@_) }
+sub code_start   { shift->tt_start(@_) }
+sub code_end     { shift->tt_end(@_) }
+sub kbd_start    { shift->tt_start(@_) }
+sub kbd_end      { shift->tt_end(@_) }
+sub samp_start   { shift->tt_start(@_) }
+sub samp_end     { shift->tt_end(@_) }
+sub var_start    { shift->tt_start(@_) }
+sub var_end      { shift->tt_end(@_) }
+
+# ------------------------------------------------------------------------
+sub p_start {
     my $self = shift;
+
+    #$self->adjust_lm(0); # assert new paragraph
     $self->vspace(1);
+
+    # assert one line's worth of vertical space at para-start
+    $self->out('');
     1;
 }
 
-sub p_end
-{
-    shift->vspace(1);
+# ------------------------------------------------------------------------
+sub p_end {
+    shift->vspace(1);    # assert one line's worth of vertical space at para-end
 }
 
-sub pre_start
-{
+# ------------------------------------------------------------------------
+sub pre_start {
     my $self = shift;
+
     $self->{pre}++;
-    $self->vspace(1);
+    $self->vspace(1);    # assert one line's worth of vertical space at pre-start
     1;
 }
 
-sub pre_end
-{
+# ------------------------------------------------------------------------
+sub pre_end {
     my $self = shift;
-    $self->{pre}--;
+
+    $self->{pre}--;      # assert one line's worth of vertical space at pre-end
     $self->vspace(1);
 }
 
-BEGIN {
-    *listing_start = \&pre_start;
-    *listing_end   = \&pre_end;
-    *xmp_start     = \&pre_start;
-    *xmp_end       = \&pre_end;
-}
+# ------------------------------------------------------------------------
+sub listing_start { shift->pre_start(@_) }
+sub listing_end   { shift->pre_end(@_) }
+sub xmp_start     { shift->pre_start(@_) }
+sub xmp_end       { shift->pre_end(@_) }
 
-sub blockquote_start
-{
+# ------------------------------------------------------------------------
+sub blockquote_start {
     my $self = shift;
-    $self->vspace(1);
-    $self->adjust_lm( +2 );
-    $self->adjust_rm( -2 );
+
+    $self->vspace(1);    # assert one line's worth of vertical space at blockquote-start
+    $self->adjust_lm(+2);
+    $self->adjust_rm(-2);
     1;
 }
 
-sub blockquote_end
-{
+# ------------------------------------------------------------------------
+sub blockquote_end {
     my $self = shift;
-    $self->vspace(1);
-    $self->adjust_lm( -2 );
-    $self->adjust_rm( +2 );
+
+    $self->vspace(1);    # assert one line's worth of vertical space at blockquote-end
+    $self->adjust_lm(-2);
+    $self->adjust_rm(+2);
 }
 
-sub address_start
-{
+# ------------------------------------------------------------------------
+sub address_start {
     my $self = shift;
-    $self->vspace(1);
+
+    $self->vspace(1);    # assert one line's worth of vertical space at address-para-start
     $self->i_start(@_);
     1;
 }
 
-sub address_end
-{
+# ------------------------------------------------------------------------
+sub address_end {
     my $self = shift;
-    $self->i_end(@_);
+
+    $self->i_end(@_);    # assert one line's worth of vertical space at address-para-end
     $self->vspace(1);
 }
 
+# ------------------------------------------------------------------------
 # Handling of list elements
-
-sub ul_start
-{
+sub ul_start {
     my $self = shift;
-    $self->vspace(1);
-    push(@{$self->{markers}}, "*");
-    $self->adjust_lm( +2 );
+
+    $self->vspace(1);    # assert one line's worth of vertical space at ul-start
+    $self->adjust_lm(+2);
     1;
 }
 
-sub ul_end
-{
+# ------------------------------------------------------------------------
+sub ul_end {
     my $self = shift;
-    pop(@{$self->{markers}});
-    $self->adjust_lm( -2 );
+
+    $self->adjust_lm(-2);    # assert one line's worth of vertical space at ul-end
     $self->vspace(1);
 }
 
-sub li_start
-{
+# ------------------------------------------------------------------------
+sub li_start {
     my $self = shift;
-    $self->bullet($self->{markers}[-1]);
+
+    $self->bullet( shift->attr('_bullet') || '' );
     $self->adjust_lm(+2);
     1;
 }
 
-sub bullet
-{
-    shift->out(@_);
-}
+# ------------------------------------------------------------------------
+sub bullet { shift->out(@_); }
 
-sub li_end
-{
+# ------------------------------------------------------------------------
+sub li_end {
     my $self = shift;
+
     $self->vspace(1);
-    $self->adjust_lm( -2);
-    my $markers = $self->{markers};
-    if ($markers->[-1] =~ /^\d+/) {
-	# increment ordered markers
-	$markers->[-1]++;
-    }
+    $self->adjust_lm(-2);
 }
 
-BEGIN {
-    *menu_start = \&ul_start;
-    *menu_end   = \&ul_end;
-    *dir_start  = \&ul_start;
-    *dir_end    = \&ul_end;
-}
+# ------------------------------------------------------------------------
+sub menu_start { shift->ul_start(@_) }
+sub menu_end   { shift->ul_end(@_) }
+sub dir_start  { shift->ul_start(@_) }
+sub dir_end    { shift->ul_end(@_) }
 
-sub ol_start
-{
+# ------------------------------------------------------------------------
+sub ol_start {
     my $self = shift;
 
     $self->vspace(1);
-    push(@{$self->{markers}}, 1);
     $self->adjust_lm(+2);
     1;
 }
 
-sub ol_end
-{
+# ------------------------------------------------------------------------
+sub ol_end {
     my $self = shift;
+
     $self->adjust_lm(-2);
-    pop(@{$self->{markers}});
     $self->vspace(1);
 }
 
-
-sub dl_start
-{
+# ------------------------------------------------------------------------
+sub dl_start {
     my $self = shift;
-    $self->adjust_lm(+2);
-    $self->vspace(1);
+
+    # $self->adjust_lm(+2);
+    $self->vspace(1);    # assert one line's worth of vertical space at dl-start
     1;
 }
 
-sub dl_end
-{
+# ------------------------------------------------------------------------
+sub dl_end {
     my $self = shift;
-    $self->adjust_lm(-2);
-    $self->vspace(1);
+
+    # $self->adjust_lm(-2);
+    $self->vspace(1);    # assert one line's worth of vertical space at dl-end
 }
 
-sub dt_start
-{
+# ------------------------------------------------------------------------
+sub dt_start {
     my $self = shift;
-    $self->vspace(1);
+
+    $self->vspace(1);    # assert one line's worth of vertical space at dt-start
     1;
 }
 
-sub dt_end
-{
-}
+# ------------------------------------------------------------------------
+sub dt_end { }
 
-sub dd_start
-{
+# ------------------------------------------------------------------------
+sub dd_start {
     my $self = shift;
+
     $self->adjust_lm(+6);
-    $self->vspace(0);
+    $self->vspace(0);    # hm, what's that do?  nothing?
     1;
 }
 
-sub dd_end
-{
-    shift->adjust_lm(-6);
+# ------------------------------------------------------------------------
+sub dd_end {
+    my $self = shift;
+
+    $self->vspace(1);    # assert one line's worth of vertical space at dd-end
+    $self->adjust_lm(-6);
 }
 
+# ------------------------------------------------------------------------
 
-#-----------------------------
-# 11/29/98 bwinter Updated the following to allow for table text data
-#sub table_start { shift->out('[TABLE NOT SHOWN]'); 0; }
-sub table_start { 
-    shift->vspace(1);
-    1;
-}
-sub table_end {
-    shift->vspace(1);
-}
+# And now some things that are basically sane fall-throughs for classes
+#  that don't really handle tables or forms specially...
 
-sub tr_start { 
-    shift->vspace(1);
-    1;
+# Things not formatted at all
+sub input_start    { 0; }
+sub textarea_start { 0; }
+sub select_start   { 0; }
+sub option_start   { 0; }
+
+# ------------------------------------------------------------------------
+sub td_start {
+    my $self = shift;
+
+    push @{ $self->{'center_stack'} }, $self->{'center'};
+    $self->{center} = 0;
+
+    $self->p_start(@_);
 }
-sub tr_end {}
 
-# 09/12/01 bwinter Add space to table elements ... somehow perl 5.6 did not have them??
-sub td_start { 
-    shift->hspace(1);
-    1;
+# ------------------------------------------------------------------------
+sub td_end {
+    my $self = shift;
+
+    $self->{'center'} = pop @{ $self->{'center_stack'} };
+    $self->p_end(@_);
 }
-sub td_end {}
-#-----------------------------
 
+# ------------------------------------------------------------------------
+sub th_start {
+    my $self = shift;
+
+    push @{ $self->{'center_stack'} }, $self->{'center'};
+    $self->{center} = 0;
+
+    $self->p_start(@_);
+    $self->b_start(@_);
+}
 
-# Things not formated at all
-sub form_start  { shift->out('[FORM NOT SHOWN]');  0; }
+# ------------------------------------------------------------------------
+sub th_end {
+    my $self = shift;
 
+    $self->b_end(@_);
+    $self->{'center'} = pop @{ $self->{'center_stack'} };
+    $self->p_end(@_);
+}
 
+# But if you wanted to just SKIP tables and forms, you'd do this:
+#  sub table_start { shift->out('[TABLE NOT SHOWN]'); 0; }
+#  sub form_start  { shift->out('[FORM NOT SHOWN]');  0; }
 
-sub textflow
-{
+# ------------------------------------------------------------------------
+sub textflow {
     my $self = shift;
-    if ($self->{pre}) {
-	# strip leading and trailing newlines so that the <pre> tags 
-	# may be placed on lines of their own without causing extra
-	# vertical space as part of the preformatted text
-	$_[0] =~ s/\n$//;
-	$_[0] =~ s/^\n//;
-	$self->pre_out($_[0]);
-    } else {
-	for (split(/(\s+)/, $_[0])) {
-	    next unless length $_;
-	    $self->out($_);
-	}
+
+    if ( $self->{pre} ) {
+
+        # Strip one leading and one trailing newline so that a <pre>
+        #  tag can be placed on a line of its own without causing extra
+        #  vertical space as part of the preformatted text.
+        $_[0] =~ s/\n$//;
+        $_[0] =~ s/^\n//;
+        $self->pre_out( $_[0] );
+    }
+    elsif ( $self->{blockquote} ) {
+        $_[0] =~ s/\A\s//;
+        $self->blockquote_out( $_[0] );
+    }
+    else {
+        for ( split( /(\s+)/, $_[0] ) ) {
+            next unless length $_;
+            $self->out($_);
+        }
     }
 }
 
+# ------------------------------------------------------------------------
+sub vspace {
+    my ( $self, $min, $add ) = @_;
 
+    # This method sets the vspace attribute.  When vspace is
+    # defined, then a new line should be started.  If vspace
+    # is a nonzero value, then that should be taken as the
+    # number of lines to be skipped before following text
+    # is written out.
+    #
+    # You may think it odd to conflate the two concepts of
+    # ending this paragraph, and asserting how much space should
+    # follow; but it happens to work out pretty well.
 
-sub vspace
-{
-    my($self, $min, $add) = @_;
     my $old = $self->{vspace};
-    if (defined $old) {
-	my $new = $old;
-	$new += $add || 0;
-	$new = $min if $new < $min;
-	$self->{vspace} = $new;
-    } else {
-	$self->{vspace} = $min;
+    if ( defined $old ) {
+        my $new = $old;
+        $new += $add || 0;
+        $new = $min if $new < $min;
+        $self->{vspace} = $new;
     }
-    $old;
-}
-
-sub hspace
-{
-    my($self, $min, $add) = @_;
-    my $old = $self->{hspace};
-    if (defined $old) {
-	my $new = $old;
-	$new += $add || 0;
-	$new = $min if $new < $min;
-	$self->{hspace} = $new;
-    } else {
-	$self->{hspace} = $min;
+    else {
+        $self->{vspace} = $min;
     }
+    ### vspace: $self->{vspace}
     $old;
 }
 
-sub collect
-{
-    push(@{shift->{output}}, @_);
-}
+# ------------------------------------------------------------------------
+sub collect { push( @{ shift->{output} }, @_ ); }
 
-sub out
-{
-    confess "Must be overridden my subclass";
-}
+# ------------------------------------------------------------------------
+sub out       { confess "Must be overridden by subclass"; }    # Output a word
+sub pre_out   { confess "Must be overridden by subclass"; }
+sub adjust_lm { confess "Must be overridden by subclass"; }
+sub adjust_rm { confess "Must be overridden by subclass"; }
+
+# ------------------------------------------------------------------------
 
-sub pre_out
-{
-    confess "Must be overridden my subclass";
-}
 
 1;
+
+__END__
+
+=pod
+
+=for test_synopsis 1;
+__END__
+
+=for stopwords formatters CPAN homepage
+
+=for HTML <a href="https://travis-ci.org/nigelm/html-formatter"><img src="https://travis-ci.org/nigelm/html-formatter.svg?branch=master"></a>
+
+=head1 NAME
+
+HTML::Formatter - Base class for HTML formatters
+
+=head1 VERSION
+
+version 2.14
+
+=head1 SYNOPSIS
+
+  use HTML::FormatSomething;
+  my $infile  = "whatever.html";
+  my $outfile = "whatever.file";
+  open OUT, ">$outfile"
+   or die "Can't write-open $outfile: $!\n";
+
+  print OUT HTML::FormatSomething->format_file(
+    $infile,
+      'option1' => 'value1',
+      'option2' => 'value2',
+      ...
+  );
+  close(OUT);
+
+=head1 DESCRIPTION
+
+HTML::Formatter is a base class for classes that take HTML and format it to
+some output format.  When you take an object of such a base class and call
+C<$formatter->format( $tree )> with an L<HTML::TreeBuilder> (or
+L<HTML::Element>) object, they return the appropriately formatted string for
+the input HTML.
+
+HTML formatters are able to format a HTML syntax tree into various printable
+formats.  Different formatters produce output for different output media.
+Common for all formatters are that they will return the formatted output when
+the format() method is called.  The format() method takes a HTML::Element
+object (usually the HTML::TreeBuilder root object) as parameter.
+
+The distribution name has been changed to C<HTML-Formatter> as detailed in
+L<DISTRIBUTION NAME>
+
+=head1 METHODS
+
+=head2 new
+
+    my $formatter = FormatterClass->new(
+        option1 => value1, option2 => value2, ...
+    );
+
+This creates a new formatter object with the given options.
+
+=head2 format_file
+
+=head2 format_from_file
+
+    $string = FormatterClass->format_file(
+        $html_source,
+        option1 => value1, option2 => value2, ...
+        );
+
+Return a string consisting of the result of using the given class to format the
+given HTML file according to the given (optional) options. Internally it calls
+C<< SomeClass->new( ... )->format( ... ) >> on a new HTML::TreeBuilder object
+based on the given HTML file.
+
+=head2 format_string
+
+=head2 format_from_string
+
+    $string = FormatterClass->format_string(
+        $html_source,
+        option1 => value1, option2 => value2, ...
+        );
+
+Return a string consisting of the result of using the given class to format the
+given HTML source according to the given (optional) options. Internally it
+calls C<< SomeClass->new( ... )->format( ... ) >> on a new HTML::TreeBuilder
+object based on the given source.
+
+=head2 format
+
+    my $render_string = $formatter->format( $html_tree_object );
+
+This renders the given HTML object according to the options set for $formatter.
+
+After you've used a particular formatter object to format a particular HTML
+tree object, you probably should not use either again.
+
+=head1 DISTRIBUTION NAME
+
+This module was originally named C<HTML-Format> despite not containing a
+C<HTML::Format> module within it.  As rules on naming have been taken more
+seriously, and the L<PAUSE|https://pause.perl.org/> toolchain adapted so that
+getting the distribution indexed was more difficult, it became obvious that I
+should rename the distribution to C<HTML-Formatter> matching the base
+L<HTML::Formatter> module.
+
+As of release 2.13 this is released as the C<HTML-Formatter> distribution with
+corresponding changes to the git repository name and associated items.
+
+Due to the way that the module is put together this should have no effect on
+code using the module.  The only issues will be where the distribution name was
+used within dependancies.
+
+=head1 SEE ALSO
+
+The three specific formatters:-
+
+=over
+
+=item L<HTML::FormatText>
+
+Format HTML into plain text
+
+=item L<HTML::FormatPS>
+
+Format HTML into postscript
+
+=item L<HTML::FormatRTF>
+
+Format HTML into Rich Text Format
+
+=back
+
+Also the HTML manipulation libraries used - L<HTML::TreeBuilder>,
+L<HTML::Element> and L<HTML::Tree>
+
+=head1 INSTALLATION
+
+See perlmodinstall for information and options on installing Perl modules.
+
+=head1 BUGS AND LIMITATIONS
+
+You can make new bug reports, and view existing ones, through the
+web interface at L<http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Formatter>.
+
+=head1 AVAILABILITY
+
+The project homepage is L<https://metacpan.org/release/HTML-Formatter>.
+
+The latest version of this module is available from the Comprehensive Perl
+Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
+site near you, or see L<https://metacpan.org/module/HTML::Formatter/>.
+
+=head1 AUTHORS
+
+=over 4
+
+=item *
+
+Nigel Metheringham <nigelm@cpan.org>
+
+=item *
+
+Sean M Burke <sburke@cpan.org>
+
+=item *
+
+Gisle Aas <gisle@ActiveState.com>
+
+=back
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2015 by Nigel Metheringham, 2002-2005 Sean M Burke, 1999-2002 Gisle Aas.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut