Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

re-ported from version 1.5.0 of readability.js

  • Loading branch information...
commit 8a0612f8d6eab47c1bc40a253dd67cf14be4f1a2 1 parent 30d22cf
@cantino cantino authored
View
4 README
@@ -1,5 +1,9 @@
+This code is under the Apache License 2.0. http://www.apache.org/licenses/LICENSE-2.0
+
This is a ruby port of arc90's readability project
http://lab.arc90.com/experiments/readability/
Given a html document, it pulls out the main body text and cleans it up.
+
+Ruby port by starrhorne and iterationlabs
View
264 lib/readability.rb
@@ -3,48 +3,264 @@
module Readability
class Document
+ TEXT_LENGTH_THRESHOLD = 25
+ RETRY_LENGTH = 250
+
+ attr_accessor :options, :html
def initialize(input, options = {})
+ @input = input
@options = options
- @html = Nokogiri::HTML(input, nil, 'UTF-8')
+ make_html
+ end
+
+ def make_html
+ @html = Nokogiri::HTML(@input, nil, 'UTF-8')
end
+ REGEXES = {
+ :unlikelyCandidatesRe => /combx|comment|disqus|foot|header|menu|meta|nav|rss|shoutbox|sidebar|sponsor/i,
+ :okMaybeItsACandidateRe => /and|article|body|column|main/i,
+ :positiveRe => /article|body|content|entry|hentry|page|pagination|post|text/i,
+ :negativeRe => /combx|comment|contact|foot|footer|footnote|link|media|meta|promo|related|scroll|shoutbox|sponsor|tags|widget/i,
+ :divToPElementsRe => /<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i,
+ :replaceBrsRe => /(<br[^>]*>[ \n\r\t]*){2,}/i,
+ :replaceFontsRe => /<(\/?)font[^>]*>/i,
+ :trimRe => /^\s+|\s+$/,
+ :normalizeRe => /\s{2,}/,
+ :killBreaksRe => /(<br\s*\/?>(\s|&nbsp;?)*){1,}/,
+ :videoRe => /http:\/\/(www\.)?(youtube|vimeo)\.com/i
+ }
+
+ def content(remove_unlikely_candidates = true)
+ @html.css("script, style").each { |i| i.remove }
+
+ remove_unlikely_candidates! if remove_unlikely_candidates
+ transform_misused_divs_into_paragraphs!
+ candidates = score_paragraphs(options[:min_text_length] || TEXT_LENGTH_THRESHOLD)
+ best_candidate = select_best_candidate(candidates)
+ article = get_article(candidates, best_candidate)
+
+ cleaned_article = sanitize(article, candidates, options)
+ if remove_unlikely_candidates && article.text.strip.length < (options[:retry_length] || RETRY_LENGTH)
+ make_html
+ content(false)
+ else
+ cleaned_article
+ end
+ end
+
+ def get_article(candidates, best_candidate)
+ # Now that we have the top candidate, look through its siblings for content that might also be related.
+ # Things like preambles, content split by ads that we removed, etc.
+
+ sibling_score_threshold = [10, best_candidate[:content_score] * 0.2].max
+ output = Nokogiri::XML::Node.new('div', @html)
+ best_candidate[:elem].parent.children.each do |sibling|
+ append = false
+ append = true if sibling == best_candidate[:elem]
+ append = true if candidates[sibling] && candidates[sibling][:content_score] >= sibling_score_threshold
+
+ if sibling.name.downcase == "p"
+ link_density = get_link_density(sibling)
+ node_content = sibling.text
+ node_length = node_content.length
+
+ if node_length > 80 && link_density < 0.25
+ append = true
+ elsif node_length < 80 && link_density == 0 && node_content =~ /\.( |$)/
+ append = true
+ end
+ end
+
+ if append
+ sibling.name = "div" unless %w[div p].include?(sibling.name.downcase)
+ output << sibling
+ end
+ end
+
+ output
+ end
+
+ def select_best_candidate(candidates)
+ sorted_candidates = candidates.values.sort { |a, b| b[:content_score] <=> a[:content_score] }
+
+ debug("Top 5 canidates:")
+ sorted_candidates[0...5].each do |candidate|
+ debug("Candidate #{candidate[:elem].name}##{candidate[:elem][:id]}.#{candidate[:elem][:class]} with score #{candidate[:content_score]}")
+ end
+
+ best_candidate = sorted_candidates.first || { :elem => @html.css("body").first, :content_score => 0 }
+ debug("Best candidate #{best_candidate[:elem].name}##{best_candidate[:elem][:id]}.#{best_candidate[:elem][:class]} with score #{best_candidate[:content_score]}")
+
+ best_candidate
+ end
+
+ def get_link_density(elem)
+ link_length = elem.css("a").map {|i| i.text}.join("").length
+ text_length = elem.text.length
+ link_length / text_length.to_f
+ end
+
+ def score_paragraphs(min_text_length)
+ candidates = {}
+ @html.css("p,td").each do |elem|
+ parent_node = elem.parent
+ grand_parent_node = parent_node.respond_to?(:parent) ? parent_node.parent : nil
+ inner_text = elem.text
+
+ # If this paragraph is less than 25 characters, don't even count it.
+ next if inner_text.length < min_text_length
+
+ candidates[parent_node] ||= score_node(parent_node)
+ candidates[grand_parent_node] ||= score_node(grand_parent_node) if grand_parent_node
- def content
+ content_score = 1
+ content_score += inner_text.split(',').length
+ content_score += [(inner_text.length / 100).to_i, 3].min
- # Get all parent elements containing a <p> tag
- @parents = @html.css("p").map { |p| p.parent }.compact.uniq
+ candidates[parent_node][:content_score] += content_score
+ candidates[grand_parent_node][:content_score] += content_score / 2.0 if grand_parent_node
+ end
- sanitize(@parents.map { |p| [p, score(p)] }.max { |a, b| a[1] <=> b[1] }[0])
+ # Scale the final candidates score based on link density. Good content should have a
+ # relatively small link density (5% or less) and be mostly unaffected by this operation.
+ candidates.each do |elem, candidate|
+ candidate[:content_score] = candidate[:content_score] * (1 - get_link_density(elem))
+ end
+ candidates
end
- def score(parent)
- s = 0
+ def class_weight(e)
+ weight = 0
+ if e[:class] && e[:class] != ""
+ if e[:class] =~ REGEXES[:negativeRe]
+ weight -= 25
+ end
- # Adjust score based on parent's "class" attribute
- s -= 50 if parent[:class] =~ /(comment|meta|footer|footnote)/i
- s += 25 if parent[:class] =~ /((^|\s)(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)(\s|$))/i
+ if e[:class] =~ REGEXES[:positiveRe]
+ weight += 25
+ end
+ end
- # Adjust score based on parent id
- s -= 50 if parent[:id] =~ /(comment|meta|footer|footnote)/i
- s += 25 if parent[:id] =~ /^(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)$/i
+ if e[:id] && e[:id] != ""
+ if e[:id] =~ REGEXES[:negativeRe]
+ weight -= 25
+ end
- # Adjust score based on # of <p> elements inside parent
- s += parent.css("p").size
+ if e[:id] =~ REGEXES[:positiveRe]
+ weight += 25
+ end
+ end
- # Adjust score based on # of commas inside parent
- s += parent.text.count ","
+ weight
+ end
+
+ def score_node(elem)
+ content_score = class_weight(elem)
+ case elem.name.downcase
+ when "div":
+ content_score += 5
+ when "blockquote":
+ content_score += 3
+ when "form":
+ content_score -= 3
+ when "th":
+ content_score -= 5
+ end
+ { :content_score => content_score, :elem => elem }
+ end
- s
+ def debug(str)
+ puts str if options[:debug]
end
- def sanitize(node)
+ def remove_unlikely_candidates!
+ @html.css("*").each do |elem|
+ str = "#{elem[:class]}#{elem[:id]}"
+ if str =~ REGEXES[:unlikelyCandidatesRe] && str !~ REGEXES[:okMaybeItsACandidateRe] && elem.name.downcase != 'body'
+ debug("Removing unlikely candidate - #{str}")
+ elem.remove
+ end
+ end
+ end
- # Get rid of divs full of non-text items
- node.css("div").each do |el|
- counts = %w[p img li a embed].inject({}) { |m, kind| m[kind] = el.css(kind).length; m }
- el.remove if (el.text.count(",") < 10) && (counts["p"] == 0 || counts["embed"] > 0 || counts["a"] > counts["p"] || counts["li"] > counts["p"] || counts["img"] > counts["p"])
+ def transform_misused_divs_into_paragraphs!
+ @html.css("*").each do |elem|
+ if elem.name.downcase == "div"
+ # transform <div>s that do not contain other block elements into <p>s
+ if elem.inner_html !~ REGEXES[:divToPElementsRe]
+ debug("Altering div(##{elem[:id]}.#{elem[:class]}) to p");
+ elem.name = "p"
+ end
+ else
+ # wrap text nodes in p tags
+# elem.children.each do |child|
+# if child.text?
+## debug("wrapping text node with a p")
+# child.swap("<p>#{child.text}</p>")
+# end
+# end
+ end
+ end
+ end
+
+ def sanitize(node, candidates, options = {})
+ node.css("h1, h2, h3, h4, h5, h6").each do |header|
+ header.remove if class_weight(header) < 0 || get_link_density(header) > 0.33
+ end
+
+ node.css("form, object, iframe, embed").each do |elem|
+ elem.remove
+ end
+
+ # Conditionally clean <table>s, <ul>s, and <div>s
+ node.css("table, ul, div").each do |el|
+ weight = class_weight(el)
+ content_score = candidates[el] ? candidates[el][:content_score] : 0
+ name = el.name.downcase
+
+ if weight + content_score < 0
+ el.remove
+ debug("Conditionally cleaned #{name}##{el[:id]}.#{el[:class]} with weight #{weight} and content score #{content_score} because score + content score was less than zero.")
+ elsif el.text.count(",") < 10
+ counts = %w[p img li a embed input].inject({}) { |m, kind| m[kind] = el.css(kind).length; m }
+ counts["li"] -= 100
+
+ content_length = el.text.length
+ link_density = get_link_density(el)
+ to_remove = false
+ reason = ""
+
+ if counts["img"] > counts["p"]
+ reason = "too many images"
+ to_remove = true
+ elsif counts["li"] > counts["p"] && name != "ul" && name != "ol"
+ reason = "more <li>s than <p>s"
+ to_remove = true
+ elsif counts["input"] > (counts["p"] / 3).to_i
+ reason = "less than 3x <p>s than <input>s"
+ to_remove = true
+ elsif content_length < (options[:min_text_length] || TEXT_LENGTH_THRESHOLD) && (counts["img"] == 0 || counts["img"] > 2)
+ reason = "too short a content length without a single image"
+ to_remove = true
+ elsif weight < 25 && link_density > 0.2
+ reason = "too many links for its weight (#{weight})"
+ to_remove = true
+ elsif weight >= 25 && link_density > 0.5
+ reason = "too many links for its weight (#{weight})"
+ to_remove = true
+ elsif (counts["embed"] == 1 && content_length < 75) || counts["embed"] > 1
+ reason = "<embed>s with too short a content length, or too many <embed>s"
+ to_remove = true
+ end
+
+ if to_remove
+ debug("Conditionally cleaned #{name}##{el[:id]}.#{el[:class]} with weight #{weight} and content score #{content_score} because it has #{reason}.")
+ el.remove
+ end
+ end
end
# We'll sanitize all elements using a whitelist
@@ -59,7 +275,7 @@ def sanitize(node)
if whitelist[el.node_name]
el.attributes.each { |a, x| el.delete(a) unless @options[:attributes] && @options[:attributes].include?(a.to_s) }
- # Otherwise, replace the element with its contents
+ # Otherwise, replace the element with its contents
else
el.swap(el.text)
end
View
74 lib/readability_old.rb
@@ -0,0 +1,74 @@
+require 'rubygems'
+require 'nokogiri'
+
+module Readability
+ class Document
+
+ def initialize(input, options = {})
+ @options = options
+ @html = Nokogiri::HTML(input, nil, 'UTF-8')
+ end
+
+
+ def content
+
+ # Get all parent elements containing a <p> tag
+ @parents = @html.css("p").map { |p| p.parent }.compact.uniq
+
+ sanitize(@parents.map { |p| [p, score(p)] }.max { |a, b| a[1] <=> b[1] }[0])
+
+ end
+
+ def score(parent)
+ s = 0
+
+ # Adjust score based on parent's "class" attribute
+ s -= 50 if parent[:class] =~ /(comment|meta|footer|footnote)/i
+ s += 25 if parent[:class] =~ /((^|\s)(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)(\s|$))/i
+
+ # Adjust score based on parent id
+ s -= 50 if parent[:id] =~ /(comment|meta|footer|footnote)/i
+ s += 25 if parent[:id] =~ /^(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)$/i
+
+ # Adjust score based on # of <p> elements inside parent
+ s += parent.css("p").size
+
+ # Adjust score based on # of commas inside parent
+ s += parent.text.count ","
+
+ s
+ end
+
+ def sanitize(node)
+
+ # Get rid of divs full of non-text items
+ node.css("div").each do |el|
+ counts = %w[p img li a embed].inject({}) { |m, kind| m[kind] = el.css(kind).length; m }
+ el.remove if (el.text.count(",") < 10) && (counts["p"] == 0 || counts["embed"] > 0 || counts["a"] > counts["p"] || counts["li"] > counts["p"] || counts["img"] > counts["p"])
+ end
+
+ # We'll sanitize all elements using a whitelist
+ whitelist = @options[:tags] || %w[div p]
+
+ # Use a hash for speed (don't want to make a million calls to include?)
+ whitelist = Hash[ whitelist.zip([true] * whitelist.size) ]
+
+ ([node] + node.css("*")).each do |el|
+
+ # If element is in whitelist, delete all its attributes
+ if whitelist[el.node_name]
+ el.attributes.each { |a, x| el.delete(a) unless @options[:attributes] && @options[:attributes].include?(a.to_s) }
+
+ # Otherwise, replace the element with its contents
+ else
+ el.swap(el.text)
+ end
+
+ end
+
+ # Get rid of duplicate whitespace
+ node.to_html.gsub(/[\r\n\f]+/, "\n" ).gsub(/[\t ]+/, " ").gsub(/&nbsp;/, " ")
+ end
+
+ end
+end
View
426 spec/fixtures/cant_read.html
@@ -0,0 +1,426 @@
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html;charset=ISO-8859-1">
+
+<meta HTTP-EQUIV="Pragma" CONTENT="no-cache"></meta>
+<title>BERKELEY BREATHED - Vice Magazine</title>
+<meta name="description" content="BERKELEY BREATHED">
+<meta name="keywords" content="weird kids, Berkeley Breathed, Monty Python, Mad magazine, seminal, comic strip, Bloom County, goofiness, talking penguins, drug addict, nuclear anxiety, the evils of consumerism, Opus, Ronco, infomercials, Pulitzer Prize, Billy & the Boingers flexi-disc, Bella Abzug, Outland, children’s books, Flawed Dogs: The Shocking Raid on Westminster, nightmarish descriptions, dogfighting, animal testing, ghost of our childhoods, Bill the Cat, president, Doonesbury, cartoonists, Jules Feiffer, Garry Trudeau, To Kill a Mockingbird, Maycomb, Alabama, therapist, Scout, Antarctica, Galapagos, Milo, Binkley, Jeane Kirkpatrick, Jesse Helms, Giant Purple Snorklewacker, Jim Davis, Garfield, parody, comic-book stores, Harrison Ford, nut jobs, drunk, enchilada, mental cases, flies to horseshit, stripped, Cyndi Lauper, Highway 101, Santa Barbara, Amy Kellner, Wikipedia, nostalgic, pain of childbirth, giving oral sex, Burglars, stalkers, childless couples, Bloom County Babylon collection, Basselope, basset-hound, Captain Nemo’s organ room in the Nautilus, Cava restaurant in Montecito, California, gigolo, sexual attraction, Help!, Sgt. Pepper’s, traumatized, vice, viceland">
+<meta name="permalink" content="http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php">
+<link rel="canonical" href="http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php" />
+
+
+<script type="text/javascript" src="http://scs.viceland.com/js/scripts/jquery.min.js"></script>
+<script type="text/javascript" src="http://scs.viceland.com/js/scripts/viceland.js"></script>
+<link rel="stylesheet" href="http://scs.viceland.com/css/ros.css" TYPE="text/css" MEDIA="screen">
+<style type="text/css" media="all"><!--
+.dsR1067 { height: 12px; }
+--></style>
+
+
+<script language="JavaScript" type="text/JavaScript">
+<!--
+function MM_openBrWindow(theURL,winName,features) {
+window.open(theURL,winName,features);
+}
+//-->
+</script>
+
+<link rel="stylesheet" type="text/css" href="http://scs.viceland.com/js/shadowbox-build-3.0b/shadowbox.css">
+<script type="text/javascript" src="http://scs.viceland.com/js/shadowbox-build-3.0b/shadowbox.js"></script>
+<script type="text/javascript">
+
+Shadowbox.init({
+ resizeLgImages: true,
+ displayNav: true,
+ keysClose: ['c', 27], // c or esc
+ autoplayMovies: false
+});
+</script>
+<script type="text/javascript" src="http://scs.viceland.com/includes/swfobject.js"></script>
+
+
+</head>
+<body bgcolor="white" leftmargin=0 topmargin=0 marginwidth=0 marginheight=0>
+
+
+
+<div id="wrapper"><div align="center">
+
+
+
+
+
+
+
+
+
+<div id="header">
+<a href="http://www.viceland.com/"><img src="http://scs.viceland.com/img/logo.png" border="0" class="vice_logo" alt="VICE"></a>
+<div class="ad_leader_top">
+
+<!-- begin ad tag (tile=1) -->
+<script language="JavaScript" type="text/javascript">
+if (typeof ord=='undefined') {ord=Math.random()*10000000000000000;}
+document.write('<script language="JavaScript" src="http://ad.doubleclick.net/adj/viceland/news;tile=1;sz=728x90;ord=' + ord + '?" type="text/javascript"><\/script>');
+</script><noscript><a href="http://ad.doubleclick.net/jump/viceland/news;tile=1;sz=728x90;ord=123456789?" target="_blank"><img src="http://ad.doubleclick.net/ad/viceland/news;tile=1;sz=728x90;ord=123456789?" width="728" height="90" border="0" alt=""></a></noscript>
+<!-- End ad tag -->
+</div>
+</div>
+
+
+
+
+<div align="right">
+<style type="text/css" media="screen">
+#social_float { position: relative; top: 0; width: 1px; height: 1px; overflow: visible; z-index: 1; }
+#social_layer { position: absolute; visibility: visible; left: -163px; top: 30px; z-index: 2; width: 147px; height: 21px; overflow: visible; }
+</style>
+<div id="social_float">
+<div id="social_layer">
+<img src="http://scs.viceland.com/img/social.gif" alt="" width="147" height="21" usemap="#social" border="0" /><map name="social" id="social"><area shape="rect" coords="130,0,148,22" href="http://www.viceland.com/blogs/en/feed/" alt="" target="_blank" /><area shape="rect" coords="115,0,133,17" href="http://www.twitter.com/vicemag" alt="" target="_blank" /><area shape="rect" coords="97,0,115,22" href="http://www.facebook.com/vicemagazineus" alt="" target="_blank" /><area shape="rect" coords="76,0,99,22" href="http://builder.campaigner.com/app/campaigner/services/optinlist/processoptinrequest.jsp?oilb=54727348&builderType=classic" alt="" target="_blank" /></map>
+</div>
+</div>
+</div>
+
+
+<ul id="menu">
+ <li><a href="http://www.viceland.com/" class="first" rel="home">HOME</a></li>
+ <li><a href="http://www.viceland.com/int/dos.php" rel="dd">DOs & DON'Ts</a></li>
+ <li><a href="http://www.viceland.com/int/v17n2/htdocs/" rel="magazine_us">In The Magazine</a></li>
+ <li><a href="http://www.viceland.com/int/category.php?category=photos" rel="photos_us">PHOTOS</a></li>
+ <li><a href="http://www.viceland.com/int/category.php?category=music" rel="music_us">MUSIC</a></li>
+ <li><a href="http://www.viceland.com/int/category.php?category=fashion" rel="fashion">FASHION</a></li>
+ <li><a href="http://www.viceland.com/int/store.php" rel="store">SHOP</a></li>
+ <li><a href="http://www.vbs.tv/" target="_blank" rel="vbs">VBS</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=us_es" rel="espanol">ESPA&#209;OL</a></li>
+
+ <li class="search">
+ <form id="search_input" action="http://www.viceland.com/int/search.php" method="get">
+ <input type="hidden" name="cx" value="014486740869999609936:ydniktebni4" />
+ <input type="hidden" value="FORID:11" name="cof"/>
+ <input type="hidden" name="ie" value="UTF-8" />
+
+ <input type="text" name="q" onfocus="value=''" value="Search" class="search_box">
+ <input type="submit" name="sa" value="Go" class="submit">
+ </form>
+
+
+
+ </li>
+ </ul>
+ <div class="submenu" >
+ <ul id="magazine_us" class="sub" align='left'>
+ <li><a href="http://www.viceland.com/int/v17n2/htdocs/">Current issue</a></li>
+ <li><a href="http://www.viceland.com/int/archives.php">Archive</a></li>
+ <li><a href="http://viceland.stores.yahoo.net/subscriptions.html">Subscribe</a></li>
+ <li><a href="http://builder.campaigner.com/app/campaigner/services/optinlist/processoptinrequest.jsp?oilb=54727348&builderType=classic" onclick="window.open(this.href, '', 'width=380, height=320, left=200, top=200, resizable=no, scrollbars=no, status=no'); return false" target="newsletter">Newsletter</a></li>
+ </ul>
+
+ <ul id="music_us" class="sub" align='left'>
+ <li><a href="http://www.viceland.com/int/v17n1/htdocs/records-309.php">Reviews</a></li>
+ <li><a href="http://www.viceland.com/vicerecords/">Vice Records</a></li>
+ </ul>
+
+ <ul id="photos_us" class="sub" align='left'>
+ <li><a href="http://www.viceland.com/int/category.php?category=photos">Vice Photos</a></li>
+ <li><a href="http://www.viceland.com/blogs/photos/">Photo Blog</a></li>
+ </ul>
+
+
+ </div>
+ <div class="clear"><!-- --></div>
+
+<!-- begin ad tag (tile=3) -->
+<script language="JavaScript" type="text/javascript">
+if (typeof ord=='undefined') {ord=Math.random()*10000000000000000;}
+document.write('<script language="JavaScript" src="http://ad.doubleclick.net/adj/viceland/news;tile=3;sz=974x50;ord=' + ord + '?" type="text/javascript"><\/script>');
+</script><noscript><a href="http://ad.doubleclick.net/jump/viceland/news;tile=3;sz=974x50;ord=123456789?" target="_blank"><img src="http://ad.doubleclick.net/ad/viceland/news;tile=3;sz=974x50;ord=123456789?" width="974" height="50" border="0" alt=""></a></noscript>
+<!-- End ad tag -->
+
+
+ <table border="0" cellspacing="0" cellpadding="0" bgcolor="white">
+
+ <tr>
+ <td width="670" height="1" valign="top" bgcolor="white">
+ <img src="http://www.viceland.com/transparent.gif" width="670" height="1" alt="" border="0">
+
+ </td>
+ <td width="20" height="1" bgcolor="white"><img src="http://www.viceland.com/transparent.gif" width="20" height="1" alt="" border="0"></td>
+ <td rowspan="2" width="300" valign="top" align="left"><img src="http://www.viceland.com/transparent.gif" width="20" height="12" alt="" border="0"><div align="left">
+
+<div align=left><form class="dsR1067" action='http://cmpgnr.com/app/campaigner/services/optinlist/processoptinrequest.jsp?oilb=54727348' method='post' id="optin" name="optin" target='_blank'><table bgcolor='#f2f2f2' width=300><tr><td><h3>NEWSLETTER</h3></td><td><img width=10 height=6 src='http://www.viceland.com/transparent.gif' alt='' border='0' /></td><td><input type='HIDDEN' name='commitcontact' value='true'><input class="dsR257005" type='TEXT' name='OILB_EMAIL' size='12' onfocus="value=''" value='Enter your email'>
+</td><td><img width=1 height=6 src='http://www.viceland.com/transparent.gif' alt='' border='0' /></td><td><input type='submit' name='submitButtonName' value='Sign up' border='0'></td></tr></table></form><br>
+<img width=6 height=6 src='http://www.viceland.com/transparent.gif' alt='' border='0' /><br><div align=left><h3>DOS &amp; DON'TS</h3>
+
+<table width=300 border='0' cellspacing='0' cellpadding='0'><tr><td width=52 valign=top>
+<a href='http://www.viceland.com/int/dd.php?id=1340' target='_top'><img width=110 height=171 src='http://scs.viceland.com/img/dos_donts/1340/icon.jpg' alt='' border='0'></a>
+</td>
+<td width=5></td><td valign=top><span class='ddtext'>He likes to hide behind a mask of anonymity, but we have it on good faith that this right here is the man behind painfully boringaustrian laptopmusicguy. blogspot.com.<a href='http://www.viceland.com/int/dd.php?id=1340' target='_top'> <b>Comments/Enlarge</b></a> |
+<a href='http://www.viceland.com/int/dos.php' target='_top'><b>See all</b></a>
+
+</span></td></tr>
+
+</table><img width=12 height=12 src='http://www.viceland.com/transparent.gif' alt='' border='0' /><br><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' alt='' width='300' height='3' border='0'><br><img width=12 height=12 src='http://www.viceland.com/transparent.gif' alt='' border='0' /></div><div align=left>
+
+<table width=300 border='0' cellspacing='0' cellpadding='0'><tr><td width=52 valign=top>
+<a href='http://www.viceland.com/int/dd.php?id=854' target='_top'><img width=110 height=171 src='http://scs.viceland.com/img/dos_donts/854/icon.jpg' alt='' border='0'></a>
+</td>
+<td width=5></td><td valign=top><span class='ddtext'>The winter hat indoors is only a little worse than sunglasses but BAPE? When did models start dressing like suburban wiggers who use Wii nunchucks and say &ldquo;Get crunked&rdquo;? <br>
+
+
+
+<a href='http://www.viceland.com/int/dd.php?id=854' target='_top'> <b>Comments/Enlarge</b></a> |
+<a href='http://www.viceland.com/int/dos.php' target='_top'><b>See all</b></a>
+
+</span></td></tr>
+
+</table><img width=12 height=12 src='http://www.viceland.com/transparent.gif' alt='' border='0' /><br><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' alt='' width='300' height='3' border='0'><br><img width=12 height=12 src='http://www.viceland.com/transparent.gif' alt='' border='0' /></div>
+
+
+<!-- begin ad tag (tile=2) -->
+<script language="JavaScript" type="text/javascript">
+if (typeof ord=='undefined') {ord=Math.random()*10000000000000000;}
+document.write('<script language="JavaScript" src="http://ad.doubleclick.net/adj/viceland/;tile=2;sz=300x250;ord=' + ord + '?" type="text/javascript"><\/script>');
+</script><noscript><a href="http://ad.doubleclick.net/jump/viceland/;tile=2;sz=300x250;ord=123456789?" target="_blank"><img src="http://ad.doubleclick.net/ad/viceland/;tile=2;sz=300x250;ord=123456789?" width="300" height="250" border="0" alt=""></a></noscript>
+<!-- End ad tag --><br>
+
+
+
+
+
+
+<img width=12 height=12 src='http://www.viceland.com/transparent.gif' alt='' border='0' /><br><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' alt='' width='300' height='3' border='0'><br>
+<img width=12 height=12 src='http://www.viceland.com/transparent.gif' alt='' border='0' /><br><div align=left><h3>ALSO BY JESSE PEARSON</h3>
+<table width=300 border='0' cellspacing='0' cellpadding='0'><tr><td width=52 valign=top><a href='http://www.viceland.com/int/v11n10/htdocs/hello.php?source=db'><img width=52 src='http://www.viceland.com/int/v11n10/htdocs/hello/toc.jpg' alt='' border='0' /></a></td>
+<td width=5></td><td valign=top><span class='ddtext'><a href='http://www.viceland.com/int/v11n10/htdocs/hello.php?source=db'>HELLO, WHITE PEOPLE!</a><br>Prussian Blue Look to the Future</span></td></tr><tr><td height=4 colspan=3></td></tr><tr><td width=52 valign=top><a href='http://www.viceland.com/int/v9n7/htdocs/the_anti.php?source=db'><img width=52 src='http://www.viceland.com/int/v9n7/htdocs/the_anti/toc.jpg' alt='' border='0' /></a></td>
+<td width=5></td><td valign=top><span class='ddtext'><a href='http://www.viceland.com/int/v9n7/htdocs/the_anti.php?source=db'>THE ANTI-GOTH</a><br>Quix*o*tic Make Gloom Lovely Again</span></td></tr><tr><td height=4 colspan=3></td></tr><tr><td width=52 valign=top><a href='http://www.viceland.com/int/v8n1/htdocs/turkeywood.php?source=db'><img width=52 src='http://www.viceland.com/int/v8n1/htdocs/turkeywood/toc.jpg' alt='' border='0' /></a></td>
+<td width=5></td><td valign=top><span class='ddtext'><a href='http://www.viceland.com/int/v8n1/htdocs/turkeywood.php?source=db'>THE <i>VICE</i> GUIDE TO TURKISH...</a><br>Turkey has had a pretty good track record...</span></td></tr><tr><td height=4 colspan=3></td></tr><tr><td width=52 valign=top><a href='http://www.viceland.com/int/v9n9/htdocs/sid.php?source=db'><img width=52 src='http://www.viceland.com/int/v9n9/htdocs/sid/toc.jpg' alt='' border='0' /></a></td>
+<td width=5></td><td valign=top><span class='ddtext'><a href='http://www.viceland.com/int/v9n9/htdocs/sid.php?source=db'>SID SINGS</a><br>And the Commodore 64 Massive Represent</span></td></tr><tr><td height=4 colspan=3></td></tr></table><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' alt='' width='300' height='3' border='0'><br></div>
+<center><a href='http://www.viceland.com/int/search_author.php?search=Jesse Pearson'><span class='ddtext'>See all articles by this contributor</a></span></center><br><br>
+
+
+
+
+ <br>
+ <table width=287 height=654 border="0" cellspacing="0" cellpadding="0">
+ <tr>
+ <td>
+ <div align="center">
+ <!-- begin ad tag (tile=3) -->
+<script language="JavaScript" type="text/javascript">
+document.write('<script language="JavaScript" src="http://ad.doubleclick.net/adj/viceland/;tile=3;sz=160x600;ord=' + ord + '?" type="text/javascript"></scr' + 'ipt>');
+</script><noscript><a href="http://ad.doubleclick.net/jump/viceland/;tile=3;sz=160x600;ord=123456789?" target="_blank"><img src="http://ad.doubleclick.net/ad/viceland/;tile=3;sz=160x600;ord=123456789?" width="160" height="600" border="0" alt=""></a></noscript>
+<!-- End ad tag --></div>
+ </td>
+ </tr>
+ </table>
+ <br>
+
+ </div>
+ </td>
+ </tr>
+ <tr>
+ <td width="670" valign="top" align="left" bgcolor="white">
+ <div align="left">
+
+
+
+<table border='0' cellspacing='0' cellpadding='0'>
+<tr><td width='300'><div align='left'><a href='http://www.viceland.com/int/v16n12/htdocs/'><span class='section_header'>IN THE MAGAZINE</span></a></div></td>
+
+<td width='340'><div align='right'><table width=180 border="0" cellspacing="0" cellpadding="0"><tr><td width=74>
+<script type="text/javascript">
+digg_url = 'http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php';
+digg_title = "0";
+digg_bgcolor = '#fff';
+digg_skin = 'compact';
+digg_window = 'new';
+</script>
+<script src="http://digg.com/tools/diggthis.js" type="text/javascript"></script>
+</td><td >
+<div align="left">
+<script>function fbs_click() {u=location.href;t=document.title;window.open('http://www.facebook.com/sharer.php?u='+encodeURIComponent(u)+'&t='+encodeURIComponent(t),'sharer','toolbar=0,status=0,width=626,height=436');return false;}</script>
+<img width="49" height="17" src="http://scs.viceland.com/img/share.gif" alt="" usemap="#share" border="0" /><map name="share" id="share"><area shape="rect" coords="1,0,14,13" href="http://www.facebook.com/share.php?u=<url>" onclick="return fbs_click()" alt="" target="_blank" /><area shape="rect" coords="15,0,32,13" href="http://www.stumbleupon.com/submit?url=http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php" alt="" target="_blank" /><area shape="rect" coords="34,0,48,14" href="http://twitter.com/home?status=0 - http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php (via @VICEMAG)" alt="" target="_blank" /></map></div>
+</td></tr></table></div></td></tr></table>
+
+
+
+<h1></h1><i>Published December, 2009 <br></i><h1>BERKELEY BREATHED</h1><br>INTERVIEW BY JESSE PEARSON, PHOTO BY JODY BOYMAN<br>
+<br><img src="http://scs.viceland.com/int/v16n12/htdocs/berkeley-breathed-273/berkeley-breathed.jpg" alt="" width="670" height="936" border="0" vspace="4"><br><br><i>For those of us who grew up as weird kids in the 1980s, the work of Berkeley Breathed was as important as those twin eternal pillars of weird-kid-dom: Monty Python and </i>Mad<i> magazine. In a word: seminal. In two words: </i>fucking<i> seminal.<br>
+ <br>
+ Breathed&rsquo;s comic strip </i>Bloom County<i> ran from 1980 until 1988. It crossed the goofiness of talking penguins and drug addict cats with the topicality of stuff like nuclear anxiety and the evils of consumerism. (Remember when Opus would compulsively buy Ronco products because of infomercials?) It even won a Pulitzer Prize in 1987, though at the time we were more impressed with the Billy &amp; the Boingers flexi-disc than some boring grown-up award.</i><br>
+<br><br>A five-volume compilation of every <i>Bloom County</i> strip is being released now, and it&rsquo;s a trip to go back and reread all the stuff that we practically had memorized over 20 years ago. It&rsquo;s still as smart and hilarious as we remember it being, though it&rsquo;s amazing to realize how much of the satire went over our heads. Did 12-year-old us even know who Bella Abzug was? (Actually we kinda still don&rsquo;t know who that was.)<br><br>After<i> Bloom County</i>, Breathed did two more strips, <i>Outland </i>and<i> Opus</i>, and wrote a bunch of acclaimed children&rsquo;s books, the most recent of which is called <i>Flawed Dogs: The Shocking Raid on Westminster</i>. It&rsquo;s about a dog who goes through a series of pretty severe trials and tribulations and it made us cry. Breathed is an outspoken animal-rights activist and that often comes through in his work, especially in this novel, with its nightmarish descriptions of dogfighting and animal testing. But it&rsquo;s not all glum, chum. The humor in it is as distinctive as ever and feels to us like the voice of an old pal&#151;or maybe it&rsquo;s the ghost of our childhoods. Scary!<br><br>
+Anyway, Bill the Cat for president! Ack!<br><br><b>Vice: I&rsquo;m curious as to how the writing process for <i>Bloom County</i> worked. Did you always know where you were heading, or was there an element of discovery as you wrote? Did you think in terms of seasons, sort of like television writing?<br>
+ Berkeley Breathed: </b>Your question presumes a reality so distant from the experience that any questions about process are meaningless&#151;but perfectly reasonable. The problem is that you&rsquo;re asking a guy who didn&rsquo;t think of any individual strip or story line longer than it takes to read this sentence. I drew in a manic, sweat-flinging state of deadline panic EVERY week. Not most weeks. EVERY week. For ten years. I drew what occurred to me as I stared at the same blank strips I&rsquo;d been watching for six days, and only because the plane that would deliver them to my syndicate editor was due to take off at 5:30 AM, about seven hours from that moment.<br><br><b>Ouch.<br>
+</b>This is not how a comic strip should be drawn. This is not how ANY deadline should be handled by any reasonable, conscientious, grown-up professional. But as I wasn&rsquo;t, they weren&rsquo;t. The flip side of that confessional coin is that<i> Bloom County </i>would not have been what it was&#151;whatever it was&#151;if I&rsquo;d been that thing I just described. It was art and writing born of chaos. It was the poison the madness needed. The new book&#151;with all the chaos intact and not edited out, as it was in books before&#151;shows that rather intriguingly.<br><br><b>Can you talk a little about how you developed the looks for the main characters? What were some of the inspirations in terms of the art of the <i>Bloom County</i> universe&#151;not just for the characters but also for the settings?<br>
+</b>As many have read and few have doubted, <i>Doonesbury</i> was the stylistic key that all of us turned to in those days&#151;college cartoonists, I mean. Jules Feiffer played a similar role for Garry Trudeau. I doubt Garry would have left the word balloons behind if it hadn&rsquo;t been for Jules. I virtually didn&rsquo;t have any other artistic influences, as I wasn&rsquo;t familiar with other comic strips. I&rsquo;m still not today. They simply were never in my sights.<br><br><b>Why not?<br>
+</b>Comic strips didn&rsquo;t tell stories well, as slow and chopped as they are. And stories&#151;narrative, plot, character&#151;are what still make me sweaty with creative passion.<br><br>You want to know why <i>Bloom County</i> was set in a rural, small-town environment? <i>To Kill a Mockingbird</i>. Maycomb, Alabama, was where I naturally dropped all of my imagination when it needed a setting. A therapist might help explain why, but there it is. I <i>will</i> say that Opus is really Scout from <i>Mockingbird</i> in many ways. He&rsquo;s a motherless innocent, adrift and wandering about in an adult world of confusion, betrayal, and incivility. We experience it through both their eyes.<br><br>
+But don&rsquo;t think for a second that this occurred to me when I sketched a penguin for a throwaway gag in 1982. I show it in the new collection: Opus was meant to be dispensed with after his initial appearance. Go figger.<br><br><b>And when was the last time you met a penguin in real life?<br>
+</b>I walked with them in Antarctica in the early 80s. Swam with them in the Galapagos in 1989. I sensed&#151;and I might be projecting here&#151;that they knew who I was.<br><br><b>Were you more of a Milo or a Binkley when you were a kid? I definitely felt very Binkley most of the time.<br>
+</b>Absolutely split the difference. I think most cartoonists with an ensemble set of characters split their personality up in contrasting elements and then apply it to their characters. Not always, but to a large extent you can&rsquo;t help it.<br><br>
+So I was filled with self-doubt as Binkley was. But the little scheming media manipulator of Milo? Well, guilty.<br><br><b>As a ten-year-old kid reading your comics, a lot of the political humor went right over my head. I remember having to ask a grown-up who Jeane Kirkpatrick was because she kept popping up in <i>Bloom County</i>. It&rsquo;s interesting that a comic could encompass a range of characters and references that has Jesse Helms on one side and the Giant Purple Snorklewacker on the other.<br>
+</b>I drew what seemed amusing to me. That was the extent of my thoughtfulness when it came to designing the <i>Bloom County</i> world. As with most cartoonists, a comic strip is an unsavory peek into the head of its maker. Having said that, I have no inkling as to the inside of Jim Davis&rsquo;s head from a reading of <i>Garfield</i>. It was the classic corporate invention&#151;drawn by a staff&#151;which made it fun to skewer. It was there to sell shit.<br><br><b>Speaking of that, did you ever hear about any reaction from Jim Davis regarding your statement that Bill the Cat started as a parody of Garfield?<br>
+</b>Trust me, Davis could care less about being mocked. It wasn&rsquo;t respect that he worked hard for.<br><br><b>I think that a lot of kids in the 80s sort of started with <i>Garfield</i> when they were really young and then graduated to <i>Bloom County</i>. Do you have many memories of encounters with fans of <i>Bloom County</i>?<br>
+</b>In the heyday, I would do signings at comic-book stores, which I&rsquo;d never seen before&#151;nor the fans of such. It was a bit of a shocker. This is pre-Comic Con. I was stunned because I could never have been in one of those crowds myself. It wasn&rsquo;t in my DNA. So I had to adapt to a fan base of people that I had yet to understand. I simply didn&rsquo;t come from their world. <br><br>The influence I was having on the younger kids was rather sobering. Anyone who produces stories and popular art remembers when they suddenly realized that there were actual faces to the readers of one&rsquo;s work and that they, in many cases, took it far more seriously than I did. I remember hearing about Harrison Ford out and out dismissing his movies&rsquo; fans as being nut jobs. He&rsquo;s in the wrong business. You can sense it in his performances now. He&rsquo;d rather be drunk and somewhere else. A pity. We who are lucky enough to provoke the imaginations of the public owe it to ourselves and to them to embrace the whole enchilada. It took me some years to appreciate this.<br><br><b>Have you ever had a crazy fan?<br>
+</b>Yes indeedy, I&rsquo;ve had crazy fans. Rabid. Committable. <i>Bloom County</i> seemed to attract mental cases like flies to horseshit. One poor adult woman kept sending me hours of videotapes of herself talking to me, but calling me by a different name. Her family finally contacted me and apologized after she stripped in one of them. It&rsquo;s both sad and deeply scary that in these days, folks can find your address in two clicks. It wasn&rsquo;t like this before. We live behind gates now.<br><br><br><br><br><div align="right"><a href="http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php?page=1">1</a> | <a href="http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php?page=2">2</a> | <a href="http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php?page=2">NEXT PAGE &#x00bb;</a></div><hr noshade="noshade" /><br><br><a href='http://www.viceland.com/int/search_author.php?search=Jesse Pearson'>See all articles by this contributor</a><br><br><div align='left'>
+
+<table border='0' cellspacing='2' cellpadding='2'>
+<tr>
+<td width='50'>
+<a href='http://www.viceland.com/int/v16n12/htdocs/eileen-myles-jonathan-galassi-on-poetry-272.php'><b><span class='ddtext'><font size='1'>&lt;&nbsp;PREV </font></span></b></a>
+
+
+ </td>
+ <td width='150'>
+ <div align='center'>
+ <a href='http://www.viceland.com/int/v16n12/htdocs/'><b><span class='ddtext'><font size='1'>MORE&nbsp;FROM&nbsp;THIS&nbsp;ISSUE</font></span></b></a></div>
+ </td>
+
+
+<td width='50'>
+<div align='right'><a href='http://www.viceland.com/int/v16n12/htdocs/iain-banks-274.php'><b><span class='ddtext'><font size='1'> NEXT&nbsp;&gt;</font></span></b></a></div></td><td width='144'><div align='center'><a href='mailto:?subject=Link from Vice Magazine&body=http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php'><img src='http://scs.viceland.com/img/sendtoafriend.jpg' border='0' width='125' height='16'></a></div>
+ </td><td width='140'><div align='right'><table width=180 border="0" cellspacing="0" cellpadding="0"><tr><td width=74><script type="text/javascript">
+digg_url = 'http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php';
+digg_title = "0";
+digg_bgcolor = '#fff';
+digg_skin = 'compact';
+digg_window = 'new';
+</script>
+<script src="http://digg.com/tools/diggthis.js" type="text/javascript"></script>
+</td><td ><div align="left"><script>function fbs_click() {u=location.href;t=document.title;window.open('http://www.facebook.com/sharer.php?u='+encodeURIComponent(u)+'&t='+encodeURIComponent(t),'sharer','toolbar=0,status=0,width=626,height=436');return false;}</script><img width="49" height="17" src="http://www.viceland.com/design_rev6/share.gif" alt="" usemap="#share" border="0" /><map name="share" id="share"><area shape="rect" coords="1,0,14,13" href="http://www.facebook.com/share.php?u=<url>" onclick="return fbs_click()" alt="" target="_blank" /><area shape="rect" coords="15,0,32,13" href="http://www.stumbleupon.com/submit?url=http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php" alt="" target="_blank" /><area shape="rect" coords="34,0,48,14" href="http://twitter.com/home?status=0 - http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php (via @VICEMAG)" alt="" target="_blank" /></map></div>
+</td></tr></table></div></td></tr></table></div><br><a id="comments" name="comments"></a><table bgcolor='#bfbfbf' height=32 width=630 border="0" cellspacing="2" cellpadding="2"><tr><td><div align="left"><h3>Comments</h3></td></tr></table><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'><table width=640 border="0" cellspacing="2" cellpadding="2"><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Feb 1, 2010 <i>wrote:</i><br> Someone needs a dandelion break</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Jan 8, 2010 <i>wrote:</i><br> I&rsquo;m not camping on his lawn anytime soon, but would consider letting Berkely sleep on my lawn if was eve in town for the Kentucky Derby. And as much as Bloom County means to me, his animal rights stuff (starting with the original &quot;Flawed Dogs&quot; book and even the Greenpeace &rsquo;toons) is what&rsquo;s getting him into heaven.</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Dec 26, 2009 <i>wrote:</i><br> comics suck.</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Dec 24, 2009 <i>wrote:</i><br> I disagree, I think he chose the right medium. His stuff was cool, and is still regarded as such 30 years after it was first published.</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Dec 24, 2009 <i>wrote:</i><br> Dear 12-year-old budding artist,<br />
+<br />
+Piss off.</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td width=60 valign='top'><center>
+<img src='http://www.viceland.com/account/icons/702762631.jpg' width=60 border='0'>
+</center></td><td width=570 valign='top'><div align='left'><span class='ddtext'><b><a href='http://www.viceland.com/account/account_user.php?user=lukehavergal' target='_blank'>lukehavergal</a></b>, on Dec 24, 2009 <i>wrote:</i><br> curtis is the radiohead of comics.<br />
+<br />
+<br />
+noirfair.wordpress.com</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Dec 24, 2009 <i>wrote:</i><br> Trudeau owns you.</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Dec 24, 2009 <i>wrote:</i><br> ahhh....now I realize why even as a 12-year-old budding artist I thought his strips sucked, graphically speaking. my folks were fans but I could never really engage due to the general crappiness and last-minute shoddiness of the art.<br />
+<br />
+Given that visual appeal is - let&rsquo;s say conservatively - half the battle of selling comic art, pooping out on that aspect of a serial publication ( evinced by his comments re deadlines) seems like a clear indication that one might have picked the wrong medium. <br />
+<br />
+As contemporaries like Bill Griffith or Crumb were producing drug-hazed brilliance, this guy was making feeble attempts to join words and visuals fluidly in a commercial art context like a soggy reverse image of Hunter S. Thompson without the brilliance of Steadman to back him up. <br />
+<br />
+Give me some allegorical-yet-competently-drawn pablum like Walt Kelly&rsquo;s Pogo, Lil&rsquo; Abner ( yeah, that shit was seminal and topical) or even Calvin and Hobbes, and keep the praise for your pitbulls, penguin guy. </span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Dec 21, 2009 <i>wrote:</i><br> what the fuck is wrong with you people? THIS is the longest interview you&rsquo;ve read in a long time? what is it like 2,000 words? go read the david simon interview if you&rsquo;ve got a year or two to kill. christ. </span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Dec 18, 2009 <i>wrote:</i><br> i bought &quot;billy and the boingers&quot; for my dad, for father&rsquo;s day. he never read it and i stole it back when i was 14. flexi disc intact</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Dec 17, 2009 <i>wrote:</i><br> That was the shit. I wish Bill the Cat was real.</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Dec 16, 2009 <i>wrote:</i><br> long interview but really interesting! i&rsquo;m a big berkeley fan!</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td width=60 valign='top'><center>
+<img src='http://www.viceland.com/account/icons/515252283.jpg' width=60 border='0'>
+</center></td><td width=570 valign='top'><div align='left'><span class='ddtext'><b><a href='http://www.viceland.com/account/account_user.php?user=komodo' target='_blank'>komodo</a></b>, on Dec 14, 2009 <i>wrote:</i><br> fuck garfield. breathed for the win!</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2 valign='top'><div align='left'><span class='ddtext'><b>Anonymous</b>, on Dec 14, 2009 <i>wrote:</i><br> Longest interview I&rsquo;ve read entirely in a long time. Nice work!</span></div></td></tr><tr><td colspan=2><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'></td></tr><tr><td colspan=2></table><img src='http://www.viceland.com/design_rev5/horizontal_divider_gray2.gif' width=630 height=3 border='0'><div align="center"><br>
+<form action="http://www.viceland.com/account/adder.php" method="get" name="Commentary" target="_top">
+<input type='hidden' name='id' value='2575'>
+<input type='hidden' name='country' value='us'>
+<input type='hidden' name='permalink' value='http://www.viceland.com/int/v16n12/htdocs/berkeley-breathed-273.php'>
+<input type='hidden' name='type' value='viceland'><div align="center">
+<table border="0" cellspacing="2" cellpadding="0"><tr><td >
+<div align="center">
+<b><span class="ds1"><font face="Verdana, Arial, Helvetica, sans-serif">POST A COMMENT [<a href='http://www.viceland.com/account/account.php' target='_blank'>SIGN IN</a>]<br /></font></span></b><span class='ddtext'><i>Hi, in case you haven't heard, you can now sign up to become a "member" of Viceland.com, which entitles you to all sorts of amazing benefits like pictures and a nickname. <a href='http://www.viceland.com/account/account_registration.php' target='_blank'>Click here</a> to make your own profile. You can still comment if you don't, but you gotta do it all 'nonymously.</i></span><br><br></div></td></tr></table>
+
+ <table width=350 border="0" cellspacing="2" cellpadding="0">
+ <tr>
+ <td width=70 valign=top><b><span class='vice_comment_box'>Name: </span></b></td><td width=280><input class="dsR1812" readonly="readonly" value="Anonymous" type="text" name="name" size="30" maxlength="40" /></td></tr><tr>
+ <td valign="top"><b><span class='vice_comment_box'>Comment:</span></b></td><td><textarea name="comment" rows="3" cols="40" wrap="soft"></textarea></td>
+ </tr>
+ <tr>
+ <td colspan=2>
+ <div align="center">
+ <input type="submit" name="" value="Submit Comment" /></div>
+ </td>
+ </tr>
+ </table>
+ </div>
+ </form>
+ <br />
+ </div>
+
+ <br></div></td><td class="dsR106779"></td></tr></table></div><br>
+
+
+
+
+<div id="mega_footer"><div class='mega_footer_breaker'><!-- --></div>
+<div class="mega_footer_column">
+ <img src="http://scs.viceland.com/img/logo_vice_small.gif" alt="Vice">
+ </div>
+ <ul class="mega_footer_column">
+ <li><a href="http://www.viceland.com/account/account.php" target='_blank'>ACCOUNT</a></li><li><a href="http://www.viceland.com/int/about.php">About us</a></li>
+ <li><a href="http://viceland.stores.yahoo.net/subscriptions.html">Subscribe</a></li>
+ <li><a href="http://www.viceland.com/int/about.php">Find Vice</a></li>
+ <li><a href="http://www.viceland.com/int/about.php">Media Kit</a></li>
+ <li><a href="http://www.viceland.com/int/jobs.php">Jobs</a></li>
+ <li><a href="http://www.vbs.tv" target="_blank">VBS.TV</a></li>
+ <li><a href="http://www.motherboard.tv" target="_blank">MOTHERBOARD.TV</a></li>
+ </ul>
+ <ul class="mega_footer_column">
+ <li><a href="http://www.viceland.com/choose/decide.php?country=ar">ARGENTINA</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=au">AUSTRALIA</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=at">AUSTRIA</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=be">BELGIUM</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=br">BRASIL</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=bg">BULGARIA</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=cs">CZECH REPUBLIC &amp; SLOVAKIA</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=ca">CANADA</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=de">DEUTSCHLAND</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=es">ESPA&Ntilde;A</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=fr">FRANCE</a></li>
+
+ </ul>
+ <ul class="mega_footer_column">
+ <li><a href="http://www.viceland.com/choose/decide.php?country=it">ITALY</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=jp">JAPAN</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=mx">MEXICO</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=nl">NETHERLANDS</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=nz">NEW ZEALAND</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=pt">PORTUGAL</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=se">SCANDINAVIA</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=ch">SCHWEIZ</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=za">SOUTH AFRICA</a></li>
+ <li><a href="http://www.viceland.com/choose/decide.php?country=uk">UK</a></li>
+ <li>US: <a href="http://www.viceland.com/choose/decide.php?country=us">ENGLISH</a>/<a href="http://www.viceland.com/choose/decide.php?country=us_es">SPANISH</a></li>
+ </ul>
+ <ul class="mega_footer_column">
+ <li><a href="mailto:VICE@VICELAND.COM">VICE@VICELAND.COM</a></li>
+ <li><a href="http://www.viceland.com/privacy_statement.html">PRIVACY STATEMENT</a></li>
+ <li><a href="http://www.viceland.com/terms_of_use.html">TERMS OF USE</a></li>
+ <li>&copy; 2000-2010, VICE MAGAZINE </li>
+ <li><br>Development: <a href="http://www.solidsender.com/">Solid Sender</a></li>
+ <li><br><br><!-- Site Meter -->
+<script type="text/javascript" src="http://s51.sitemeter.com/js/counter.js?site=s51viceattack">
+</script>
+<noscript>
+<a href="http://s51.sitemeter.com/stats.asp?site=s51viceattack" target="_top">
+<img src="http://s51.sitemeter.com/meter.asp?site=s51viceattack" alt="Site Meter" border="0"/></a>
+</noscript>
+<!-- Copyright (c)2006 Site Meter --></li>
+ </ul>
+ <div class="clear"><!-- --></div>
+ </div>
+<script type="text/javascript">
+var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+</script>
+<script type="text/javascript">
+var pageTracker = _gat._getTracker("UA-599058-1");
+pageTracker._initData();
+pageTracker._trackPageview();
+</script><!-- Start Quantcast tag -->
+<script type="text/javascript">
+_qoptions={
+qacct:"p-3c8ay_v-NNA9Y"
+};
+</script>
+<script type="text/javascript" src="http://edge.quantserve.com/quant.js"></script>
+<noscript>
+<img src="http://pixel.quantserve.com/pixel/p-3c8ay_v-NNA9Y.gif" style="display: none;" border="0" height="1" width="1" alt="Quantcast"/>
+</noscript>
+<!-- End Quantcast tag -->
+
+<a title="Web Analytics" href="http://getclicky.com/182667"><img alt="Web Analytics" src="http://static.getclicky.com/media/links/badge.gif" border="0" /></a>
+<script src="http://static.getclicky.com/js" type="text/javascript"></script>
+<script type="text/javascript">clicky.init(182667);</script>
+<noscript><p><img alt="Clicky" width="1" height="1" src="http://static.getclicky.com/182667ns.gif" /></p></noscript>
+
+
+</div>
+</body>
+</html>
View
148 spec/readability_spec.rb
@@ -1,4 +1,150 @@
require File.expand_path(File.join(File.dirname(__FILE__), "spec_helper"))
describe Readability do
-end
+ before do
+ @simple_html_fixture = <<-HTML
+ <html>
+ <head>
+ <title>title!</title>
+ </head>
+ <body class='comment'>
+ <div>
+ <p class='comment'>a comment</p>
+ <div class='comment' id='body'>real content</div>
+ <div id="contains_blockquote"><blockquote>something in a table</blockquote></div>
+ </div>
+ </body>
+ </html>
+ HTML
+ end
+
+ describe "transformMisusedDivsIntoParagraphs" do
+ before do
+ @doc = Readability::Document.new(@simple_html_fixture)
+ @doc.transform_misused_divs_into_paragraphs!
+ end
+
+ it "should transform divs containing no block elements into <p>s" do
+ @doc.html.css("#body").first.name.should == "p"
+ end
+
+ it "should not transform divs that contain block elements" do
+ @doc.html.css("#contains_blockquote").first.name.should == "div"
+ end
+ end
+
+ describe "score_node" do
+ before do
+ @doc = Readability::Document.new(<<-HTML)
+ <html>
+ <body>
+ <div id='elem1'>
+ <p>some content</p>
+ </div>
+ <th id='elem2'>
+ <p>some other content</p>
+ </th>
+ </body>
+ </html>
+ HTML
+ @elem1 = @doc.html.css("#elem1").first
+ @elem2 = @doc.html.css("#elem2").first
+ end
+
+ it "should like <div>s more than <th>s" do
+ @doc.score_node(@elem1)[:content_score].should > @doc.score_node(@elem2)[:content_score]
+ end
+
+ it "should like classes like text more than classes like comment" do
+ @elem2.name = "div"
+ @doc.score_node(@elem1)[:content_score].should == @doc.score_node(@elem2)[:content_score]
+ @elem1['class'] = "text"
+ @elem2['class'] = "comment"
+ @doc.score_node(@elem1)[:content_score].should > @doc.score_node(@elem2)[:content_score]
+ end
+ end
+
+ describe "removeUnlikelyCandidates" do
+ before do
+ @doc = Readability::Document.new(@simple_html_fixture)
+ @doc.remove_unlikely_candidates!
+ end
+
+ it "should remove things that have class comment" do
+ @doc.html.inner_html.should_not =~ /a comment/
+ end
+
+ it "should not remove body tags" do
+ @doc.html.inner_html.should =~ /<\/body>/
+ end
+
+ it "should not remove things with class comment and id body" do
+ @doc.html.inner_html.should =~ /real content/
+ end
+ end
+
+ describe "score_paragraphs" do
+ before(:each) do
+ @doc = Readability::Document.new(<<-HTML)
+ <html>
+ <head>
+ <title>title!</title>
+ </head>
+ <body id="body">
+ <div id="div1">
+ <div id="div2>
+ <p id="some_comment">a comment</p>
+ </div>
+ <p id="some_text">some text</p>
+ </div>
+ <div id="div3">
+ <p id="some_text2">some more text</p>
+ </div>
+ </body>
+ </html>
+ HTML
+ @candidates = @doc.score_paragraphs(0)
+ end
+
+ it "should score elements in the document" do
+ @candidates.values.length.should == 3
+ end
+
+ it "should prefer the body in this particular example" do
+ @candidates.values.sort { |a, b|
+ b[:content_score] <=> a[:content_score]
+ }.first[:elem][:id].should == "body"
+ end
+ end
+
+ describe "the cant_read.html fixture" do
+ it "should work on the cant_read.html fixture with some allowed tags" do
+ allowed_tags = %w[div span table tr td p i strong u h1 h2 h3 h4 pre code br a]
+ allowed_attributes = %w[href]
+ html = File.read(File.dirname(__FILE__) + "/fixtures/cant_read.html")
+ Readability::Document.new(html, :tags => allowed_tags, :attributes => allowed_attributes).content.should match(/Can you talk a little about how you developed the looks for the/)
+ end
+ end
+
+ describe "general functionality" do
+ before do
+ @doc = Readability::Document.new("<html><head><title>title!</title></head><body><div><p>Some content</p></div></body>",
+ :min_text_length => 0, :retry_length => 1)
+ end
+
+ it "should return the main page content" do
+ @doc.content.should match("Some content")
+ end
+ end
+
+ describe "ignoring sidebars" do
+ before do
+ @doc = Readability::Document.new("<html><head><title>title!</title></head><body><div><p>Some content</p></div><div class='sidebar'><p>sidebar<p></div></body>",
+ :min_text_length => 0, :retry_length => 1)
+ end
+
+ it "should not return the sidebar" do
+ @doc.content.should_not match("sidebar")
+ end
+ end
+end
Please sign in to comment.
Something went wrong with that request. Please try again.