Skip to content

Commit

Permalink
Correctly downloading, storing, retrieving, and showing favicons and …
Browse files Browse the repository at this point in the history
…gradients for favicons. This commit includes numerous fixes for the Feed view, too. Loaded.
  • Loading branch information
samuelclay committed Jan 30, 2011
1 parent 696d0d6 commit b938e95
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 93 deletions.
13 changes: 1 addition & 12 deletions apps/reader/views.py
Expand Up @@ -367,18 +367,7 @@ def load_feed_page(request):
if feed_id == 0:
raise Http404

feed_page = MFeedPage.objects.filter(feed_id=feed_id)
data = None

if feed_page:
data = feed_page[0].page_data and zlib.decompress(feed_page[0].page_data)
else:
dupe_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
if dupe_feed:
feed = dupe_feed[0].feed
feed_page = MFeedPage.objects.filter(feed_id=feed.pk)
if feed_page:
data = feed_page[0].page_data and zlib.decompress(feed_page[0].page_data)
data = MFeedPage.get_data(feed_id=feed_id)

if not data:
data = "Fetching feed..."
Expand Down
149 changes: 85 additions & 64 deletions apps/rss_feeds/icon_importer.py
Expand Up @@ -3,12 +3,18 @@
import scipy
import scipy.misc
import scipy.cluster
import Image
import urlparse
import operator
import struct
from StringIO import StringIO
from PIL import ImageFile
import ImageChops, Image
from django.conf import settings
from apps.rss_feeds.models import MFeedPage
from PIL import BmpImagePlugin, PngImagePlugin, ImageFile

class BadImage(Exception): pass
HEADERS = {
'User-Agent': 'NewsBlur Favicon Fetcher - http://www.newsblur.com',
'Connection': 'close',
}

class IconImporter(object):

Expand All @@ -20,110 +26,126 @@ def save(self):
if not self.force and self.feed.icon.not_found:
print 'Not found, skipping...'
return
image, icon_url = self.fetch(force=self.force)
image, icon_url = self.fetch_image_from_page_data()
if not image:
image, icon_url = self.fetch(force=self.force)

if image:
image = self.normalize_image(image)
color = self.determine_dominant_color_in_image(image)
image_str = self.string_from_image(image)

self.feed.icon.save()
self.feed.icon.data = image_str
self.feed.icon.icon_url = icon_url
self.feed.icon.color = color
self.feed.icon.not_found = False
else:
self.feed.icon.save()
self.feed.icon.not_found = True

self.feed.icon.save()
return not self.feed.icon.not_found


def fetch_image_from_page_data(self):
image = None
content = MFeedPage.get_data(feed_id=self.feed.pk)
url = self._url_from_html(content)
if url:
image = self.get_image_from_url(url)
return image, url

def fetch(self, path='favicon.ico', force=False):
HEADERS = {
'User-Agent': 'NewsBlur Favicon Fetcher - http://www.newsblur.com',
'Connection': 'close',
}
image = None
url = None

if not force:
url = self.feed.icon.icon_url
if not url:
url = self.feed.feed_link
url = urlparse.urljoin(self.feed.feed_link, 'favicon.ico')

if not url.endswith('/') and not url.endswith('favicon.ico'):
url += '/favicon.ico'
if url.endswith('/'):
url += 'favicon.ico'

def request_image(url):
print 'Requesting: %s' % url
request = urllib2.Request(url, headers=HEADERS)
icon = urllib2.urlopen(request)
parser = ImageFile.Parser()
s = icon.read()
if s:
parser.feed(s)
try:
image = parser.close()
return image
except IOError:
raise BadImage

try:
image = request_image(url)
except (urllib2.HTTPError, urllib2.URLError, BadImage):
request = urllib2.Request(self.feed.feed_link, headers=HEADERS)
try:
# 2048 bytes should be enough for most of websites
content = urllib2.urlopen(request).read(2048)
except(urllib2.HTTPError, urllib2.URLError):
return None, None
icon_path = lxml.html.fromstring(content).xpath(
'//link[@rel="icon" or @rel="shortcut icon"]/@href'
)
if icon_path:
if str(icon_path[0]).startswith('http'):
url = icon_path[0]
else:
url = self.feed.feed_link + icon_path[0]
image = self.get_image_from_url(url)
if not image:
url = urlparse.urljoin(self.feed.feed_link, '/favicon.ico')
image = self.get_image_from_url(url)
if not image:
request = urllib2.Request(self.feed.feed_link, headers=HEADERS)
try:
image = request_image(url)
except(urllib2.HTTPError, urllib2.URLError, BadImage):
# 2048 bytes should be enough for most of websites
content = urllib2.urlopen(request).read(2048)
except(urllib2.HTTPError, urllib2.URLError):
return None, None
url = self._url_from_html(content)
if url:
try:
image = self.get_image_from_url(url)
except(urllib2.HTTPError, urllib2.URLError):
return None, None
print 'Found: %s - %s' % (url, image)
return image, url

def get_image_from_url(self, url):
print 'Requesting: %s' % url
try:
request = urllib2.Request(url, headers=HEADERS)
icon = urllib2.urlopen(request)
except (urllib2.HTTPError, urllib2.URLError), e:
return None
parser = ImageFile.Parser()
s = icon.read()
if s:
parser.feed(s)
try:
image = parser.close()
return image
except IOError, e:
return None

def _url_from_html(self, content):
url = None
icon_path = lxml.html.fromstring(content).xpath(
'//link[@rel="icon" or @rel="shortcut icon"]/@href'
)
if icon_path:
if str(icon_path[0]).startswith('http'):
url = icon_path[0]
else:
url = urlparse.urljoin(self.feed.feed_link, icon_path[0])
return url

def normalize_image(self, image):
image = image.resize((16, 16), Image.ANTIALIAS)
print image.size
# if image.size != (16, 16):
# image = image.resize((16, 16), Image.BICUBIC)
print image
if image.mode != 'RGBA':
image = image.convert('RGBA')
# mask = Image.open(settings.IMAGE_MASK)
print image
print image.mode
print image.size
# mask = mask.convert('L')
# print mask
# image.paste(Image.new('RGBA', image.size, '#FFFFFF'), (0, 0), ImageChops.invert(mask))
# image.putalpha(mask)

return image

def determine_dominant_color_in_image(self, image):
NUM_CLUSTERS = 5

# if image.mode == 'P':
# image.putalpha(0)

ar = scipy.misc.fromimage(image)
shape = ar.shape
if len(shape) > 2:
ar = ar.reshape(scipy.product(shape[:2]), shape[2])

codes, dist = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
codes, _ = scipy.cluster.vq.kmeans(ar, NUM_CLUSTERS)
print "Before: %s" % codes
original_codes = codes
for low, hi in [(60, 200), (35, 230), (10, 250)]:
codes = scipy.array([code for code in codes
if not ((code[0] < low and code[1] < low and code[2] < low) or
(code[0] > hi and code[1] > hi and code[2] > hi))])
if not len(codes): codes = original_codes
else: break
print "After: %s" % codes
colors = [''.join(chr(c) for c in code).encode('hex') for code in codes]

vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes
vecs, _ = scipy.cluster.vq.vq(ar, codes) # assign codes
counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences
print counts
total = scipy.sum(counts)
print dict(zip(colors, [count/float(total) for count in counts]))
index_max = scipy.argmax(counts) # find most frequent
Expand All @@ -138,6 +160,5 @@ def string_from_image(self, image):
image.save(output, 'png', quality=95)
contents = output.getvalue()
output.close()
print contents.encode('base64')
return contents.encode('base64')

24 changes: 21 additions & 3 deletions apps/rss_feeds/models.py
Expand Up @@ -726,7 +726,7 @@ def save(self, *args, **kwargs):


class FeedIcon(models.Model):
feed = AutoOneToOneField(Feed, related_name='icon')
feed = AutoOneToOneField(Feed, primary_key=True, related_name='icon')
color = models.CharField(max_length=6, default="000000")
data = models.TextField()
icon_url = models.CharField(max_length=2000, blank=True, null=True)
Expand All @@ -735,7 +735,8 @@ class FeedIcon(models.Model):
def save(self, *args, **kwargs):
try:
super(FeedIcon, self).save(*args, **kwargs)
except (IntegrityError, OperationError):
except (IntegrityError, OperationError), e:
print "Error on Icon: %s" % e
if self.id: self.delete()


Expand All @@ -752,8 +753,25 @@ def save(self, *args, **kwargs):
if self.page_data:
self.page_data = zlib.compress(self.page_data)
super(MFeedPage, self).save(*args, **kwargs)


@classmethod
def get_data(cls, feed_id):
data = None
feed_page = cls.objects(feed_id=feed_id)

if feed_page:
data = feed_page[0].page_data and zlib.decompress(feed_page[0].page_data)

if not data:
dupe_feed = DuplicateFeed.objects.filter(duplicate_feed_id=feed_id)
if dupe_feed:
feed = dupe_feed[0].feed
feed_page = MFeedPage.objects.filter(feed_id=feed.pk)
if feed_page:
data = feed_page[0].page_data and zlib.decompress(feed_page[0].page_data)

return data

class MStory(mongo.Document):
'''A feed item'''
story_feed_id = mongo.IntField()
Expand Down
9 changes: 7 additions & 2 deletions media/css/reader.css
Expand Up @@ -912,6 +912,8 @@ background: transparent;
margin: 2px 4px 0 2px;
vertical-align: top;
float: left;
width: 16px;
height: 16px;
}

#story_titles .NB-feedbar .feed .feed_title {
Expand Down Expand Up @@ -1247,6 +1249,8 @@ background: transparent;
left: 0;
vertical-align: text-bottom;
opacity: .6;
height: 16px;
width: 16px;
}
#story_titles .story .NB-story-feed .feed_title {
display: block;
Expand Down Expand Up @@ -1488,22 +1492,23 @@ background: transparent;
}

#story_pane .NB-feed-story-header-feed {
background: #404040 url('../img/reader/feed_view_feed_background.png') repeat-x 0 0;
background-image: -webkit-gradient(
linear,
left bottom,
left top,
color-stop(0.36, rgba(248,221,105, 250)),
color-stop(0.36, rgba(248, 221,105, 250)),
color-stop(0.84, rgba(268, 241, 125, 250))
);
background-image: -moz-linear-gradient(
center bottom,
rgb(76,76,76) 36%,
rgb(55,55,55) 84%
);
background: #404040 url('../img/reader/feed_view_feed_background.png') repeat-x 0 0;
padding: 2px 200px 2px 28px;
position: relative;
border-bottom: 1px solid #000;
border-top: 1px solid #707070;
z-index: 2;
}
#story_pane .NB-feed-story-header-feed.NB-feed-story-river-same-feed {
Expand Down
Binary file modified media/img/icons/mini/icon_world.gif 100755 → 100644
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit b938e95

Please sign in to comment.