Skip to content

Commit

Permalink
Parse HTML for a product and read the title.
Browse files Browse the repository at this point in the history
  • Loading branch information
gma committed Apr 24, 2010
1 parent 614a9ff commit 38aa9d7
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 19 deletions.
27 changes: 27 additions & 0 deletions fixtures/product.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<title>Wordtracker Keywords</title>
<link rel="shortcut icon" href="/favicon.gif" type="image/gif" />
</head>
<body>


<div class="hproduct">
<a href="http://api.getsatisfaction.com/companies/wordtracker" class="brand">Wordtracker</a>
<a href="http://api.getsatisfaction.com/products/45632" class="name uri">Wordtracker Keywords</a>


<img alt="Product_default_small" class="image thumb" src="http://assets1.getsatisfaction.com/images/product_default_small.png" />
<p class="description">
Wordtracker's keyword research tool.
</p>

<a href="http://api.getsatisfaction.com/products/45632/topics" rel="topics">Topics associated with Wordtracker Keywords</a>
<a href="http://api.getsatisfaction.com/products/45632/companies" rel="companies">Companies associated with Wordtracker Keywords</a>

</div>
</body>
</html>
63 changes: 48 additions & 15 deletions satisfaction.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,69 @@
import urllib

import feedparser
import lxml.html


class ResourceNotFound(RuntimeError): pass


class Resource:

def __init__(self, resource_id):
self.resource_id = resource_id
self._document = None

@classmethod
def url(cls, resource_id, page):
def url(cls, resource_id, page=None):
url = cls.URL % resource_id
if page:
url += '?page=%s' % page
return url

def resource_not_found(self):
name = self.__class__.__name__
raise ResourceNotFound('%s not found: %s' % (name, self.resource_id))

@property
def document(self):
if self._document is None:
self.load_document()
return self._document


class AtomParser:

def load_document(self):
document = feedparser.parse(self.url(self.resource_id, self._page))
if document.get('status', None) == 404:
self.resource_not_found()
self._document = document


class Topic(Resource):
class HtmlParser:

def load_document(self):
response = urllib.urlopen(self.url(self.resource_id))
if response.headers.getheader('status') == '404':
self.resource_not_found()
self._document = lxml.html.document_fromstring(html)


class Product(Resource, HtmlParser):

URL = 'http://api.getsatisfaction.com/products/%s'

@property
def title(self):
return self.document.cssselect('a.name').text_content()


class Topic(Resource, AtomParser):

URL = 'http://api.getsatisfaction.com/topics/%s'

def __init__(self, resource_id):
self.resource_id = resource_id
self._document = None
def __init__(self, *args):
Resource.__init__(self, *args)
self._page = 1

def __iter__(self):
Expand All @@ -44,16 +87,6 @@ def load_next_page(self):
self._document = None
self._page += 1

@property
def document(self):
if self._document is None:
url = self.url(self.resource_id, self._page)
self._document = feedparser.parse(url)
if self._document.get('status', None) == 404:
name = self.__class__.__name__
raise ResourceNotFound('%s not found: %s' % (name, self.resource_id))
return self._document

@property
def title(self):
return self.document.feed.title
Expand Down
20 changes: 16 additions & 4 deletions satisfaction_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,29 @@ def tearDown(self):
for cls, func in self.original_functions:
cls.url = func

def useFixture(self, cls, name):
def stubbed_url(self, topic_id, page):
filename = '%s-%s-page-%s.xml' % (cls.__name__.lower(), name, page)
def useFixture(self, cls, name=None):
def stubbed_url(self, topic_id, page=None):
filename = cls.__name__.lower()
if name:
filename += '-%s' % name
if page:
filename += '-page-%s' % page
filename += '.xml'
return os.path.join(os.getcwd(), 'fixtures', filename)
self.original_functions.append((cls, cls.url))
cls.url = stubbed_url

def topic(self):
# TODO: topic id is ignored in tests - does it work?
return satisfaction.Topic('1234')


class MissingProductTest(TestHelper):

def test_product_not_found(self):
with self.assertRaises(satisfaction.ResourceNotFound):
satisfaction.Product('bad_product_name').title


class MissingTopicTest(TestHelper):

Expand All @@ -35,7 +47,7 @@ def test_topic_not_found(self):
self.topic().title


class TopicWithNoRepliesTest(TestHelper):
class TopicWithoutRepliesTest(TestHelper):

def withFixtures(self):
self.useFixture(satisfaction.Topic, 'without-replies')
Expand Down

0 comments on commit 38aa9d7

Please sign in to comment.