diff --git a/README.markdown b/README.markdown index fedd4e4..290e513 100644 --- a/README.markdown +++ b/README.markdown @@ -23,7 +23,7 @@ the jQuery API. PHPricot is motivated by Rubys HPricot but only a distantly related port. -## Installation +## Installation of Required Libraries This currently a linux only installation guide (please contribute Windows or MacOS if you find the time). diff --git a/lib/PHPricot/Document.php b/lib/PHPricot/Document.php index 1950a7d..eb6b8e3 100644 --- a/lib/PHPricot/Document.php +++ b/lib/PHPricot/Document.php @@ -35,6 +35,7 @@ public function toText() foreach ($this->childNodes AS $node) { $txt .= $node->toText(); } - return $txt; + $txt = preg_replace('(([\s]{3,}))', ' ', $txt); + return rtrim($txt); } } \ No newline at end of file diff --git a/lib/PHPricot/Nodes/Element.php b/lib/PHPricot/Nodes/Element.php index 0cb628c..8a8a25d 100644 --- a/lib/PHPricot/Nodes/Element.php +++ b/lib/PHPricot/Nodes/Element.php @@ -142,6 +142,15 @@ public function val() public function toText() { $out = ''; + + if ($this->name == 'h1') { + $out .= "\n" . str_repeat('*', 65) . "\n"; + } else if ($this->name == 'h2') { + $out .= "\n" . str_repeat('-', 65) . "\n"; + } else if ($this->name == 'li') { + $out .= " * "; + } + foreach ($this->childNodes AS $child) { $out .= $child->toText(); } @@ -150,10 +159,20 @@ public function toText() $out .= "\n"; } else if ($this->name == 'p') { $out .= "\n\n"; - } else if ($this->name == 'a' && isset($this->attributes['href'])) { - $out .= "[#" . $this->attributes['href'] . "]"; + } else if ($this->name == 'a' && isset($this->attributes['href']) && strpos($this->attributes['href'], "#") !== 0) { + $out .= "(" . $this->attributes['href'] . ")"; } else if ($this->name == 'img' && isset($this->attributes['src'])) { $out .= "[image:" . $this->attributes['src'] . "]"; + } else if ($this->name == 'h1') { + $out .= "\n" . str_repeat('*', 65) . "\n\n"; + } else if (in_array($this->name, array('h2', 'h3', 'h4', 'h5', 'h6'))) { + $out .= "\n" . str_repeat('-', 65) . "\n\n"; + } else if ($this->name == 'td') { + $out .= "\t"; + } else if ($this->name == 'li') { + $out .= "\n"; + } else if ($this->name == 'ul') { + $out .= "\n"; } return $out; diff --git a/lib/PHPricot/Nodes/Text.php b/lib/PHPricot/Nodes/Text.php index e733760..631f705 100644 --- a/lib/PHPricot/Nodes/Text.php +++ b/lib/PHPricot/Nodes/Text.php @@ -28,6 +28,6 @@ public function toHtml() public function toText() { - return $this->text; + return str_replace(" ", " ", $this->text); } } \ No newline at end of file diff --git a/tests/PHPricot/ToTextTest.php b/tests/PHPricot/ToTextTest.php new file mode 100644 index 0000000..aab5450 --- /dev/null +++ b/tests/PHPricot/ToTextTest.php @@ -0,0 +1,55 @@ +Foo

bar baz

'); + + $expected = <<assertEquals($expected, $query->getDocument()->toText()); + } + + public function testHeader2() + { + $query = new PHPricot_Query('

Foo

'); + + $expected = <<assertEquals($expected, $query->getDocument()->toText()); + } + + + public function testHeader3() + { + $query = new PHPricot_Query('

Foo

'); + + $expected = <<assertEquals($expected, $query->getDocument()->toText()); + } + + public function testList() + { + $query = new PHPricot_Query('
  • Foo
  • Bar
'); + + $expected = <<assertEquals($expected, $query->getDocument()->toText()); + } +} \ No newline at end of file