/
strip_html.filter
34 lines (27 loc) · 1.09 KB
/
strip_html.filter
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# Copyright 2009 Interchange Development Group and others
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version. See the LICENSE file for details.
CodeDef strip_html Filter
CodeDef strip_html Description Strip HTML
CodeDef strip_html Routine <<EOR
sub {
my $val = shift;
# get rid of HTML comments
$val =~ s/<!(?:--(?:[^-]*|-[^-]+)*--\s*)>//s;
# replace these container tags with a space
$val =~ s{</?(?:p|ol|ul|li|div|h[123456]|pre|dl|dd|dt|form|option|textarea|blockquote)(?:\s[^>]*)?>}{ }ig;
# replace these self-closing tags with a space
$val =~ s{<[bh]r(?:\s*/|\s[^>]*)?>}{ }ig;
# remove all remaining tags and leave no space
$val =~ s{</?\w[^>]*>}{}g;
# collapse all whitespace, as HTML does when rendering anyway,
# to facilitate truncating at a certain number of characters
$val =~ s{\A\s+}{};
$val =~ s{\s+\z}{};
$val =~ s{\s+}{ }g;
return $val;
}
EOR