Skip to content

Commit

Permalink
Removed obsolete SGMLParser
Browse files Browse the repository at this point in the history
SGMLParser has been removed from Python 3
  • Loading branch information
claudep committed Jun 1, 2015
1 parent c7cc6f8 commit 495faf8
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 25 deletions.
48 changes: 26 additions & 22 deletions linkcheck/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from sgmllib import SGMLParser
from HTMLParser import HTMLParser

from django.contrib.contenttypes.models import ContentType


class Lister(SGMLParser):
class Lister(HTMLParser):
def reset(self):
SGMLParser.reset(self)
HTMLParser.reset(self)
self.urls = []


Expand All @@ -16,42 +15,47 @@ def __init__(self):
#self.in_img = False
self.text = ''
self.url = ''
SGMLParser.__init__(self)
def start_a(self, attrs):
self.in_a = True
href = [v for k, v in attrs if k=='href']
if href:
self.url = href[0]
def start_img(self, attrs):
if self.in_a:
HTMLParser.__init__(self)

def handle_starttag(self, tag, attrs):
if tag == 'a':
href = [v for k, v in attrs if k == 'href']
if href:
self.in_a = True
self.url = href[0]
elif tag == 'img' and self.in_a:
src = [v for k, v in attrs if k=='src']
if src:
self.text += ' [image:%s] ' % src[0]
def handle_data(self, data):
if self.in_a:
self.text += data
def end_a(self):
if self.url:

def handle_endtag(self, tag):
if tag == 'a' and self.in_a:
self.urls.append((self.text[:256], self.url))
self.in_a = False
self.text = ''
self.url = ''

def handle_data(self, data):
if self.in_a:
self.text += data


class ImageLister(Lister):
def start_img(self, attrs):
src = [v for k, v in attrs if k=='src']
if src:
self.urls.append(('', src[0]))
def handle_starttag(self, tag, attrs):
if tag == 'img':
src = [v for k, v in attrs if k=='src']
if src:
self.urls.append(('', src[0]))


class AnchorLister(HTMLParser):
def __init__(self):
self.names = []
HTMLParser.__init__(self)

def reset(self):
HTMLParser.reset(self)
self.names = []

def handle_starttag(self, tag, attributes):
name = [v for k, v in attributes if k=='id']
if name:
Expand Down Expand Up @@ -166,4 +170,4 @@ def get_linklist(self, extra_filter={}):

@classmethod
def content_type(cls):
return ContentType.objects.get_for_model(cls.model)
return ContentType.objects.get_for_model(cls.model)
12 changes: 9 additions & 3 deletions linkcheck/tests/test_linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,15 @@ def test_external_check_404(self):
class FindingLinksTestCase(TestCase):
def test_found_links(self):
self.assertEqual(Url.objects.all().count(), 0)
Book.objects.create(title='My Title', description="""Here's a link: <a href="http://www.example.org">Example</a>""")
self.assertEqual(Url.objects.all().count(), 1)
self.assertEqual(Url.objects.all()[0].url, "http://www.example.org")
Book.objects.create(title='My Title', description="""
Here's a link: <a href="http://www.example.org">Example</a>,
and an image: <img src="http://www.example.org/logo.png" alt="logo">""")
self.assertEqual(Url.objects.all().count(), 2)
self.assertQuerysetEqual(
Url.objects.all().order_by('url'),
["<Url: http://www.example.org>", "<Url: http://www.example.org/logo.png>"]
)


class ReportViewTestCase(TestCase):
def setUp(self):
Expand Down

0 comments on commit 495faf8

Please sign in to comment.