Skip to content

Commit

Permalink
Merge pull request #440 from bookieio/qa-suggested-tags-update
Browse files Browse the repository at this point in the history
- Make sure that you get the title/url suggestions as well as the content
suggestions when saving a bookmark via the webui.

Landing on behalf of infinitum. Will try this out to test the ci automated lander.
  • Loading branch information
bookiebot committed Apr 27, 2014
2 parents ba46585 + 894fd6e commit cc7f00a
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 81 deletions.
101 changes: 36 additions & 65 deletions bookie/models/__init__.py
Expand Up @@ -41,7 +41,6 @@
Base = declarative_base()

LOG = logging.getLogger(__name__)
RECENT = 24 # number of hours to consider a bookmark as recent


def initialize_sql(settings):
Expand Down Expand Up @@ -223,61 +222,50 @@ def complete(prefix, current=None, limit=5, username=None):
return DBSession.execute(query)

@staticmethod
def suggestions(bmark=None, recent=True, url=None, username=None,
new=False):
"""Find suggestions for tags for a bookmark
def suggestions(bmark=None, url=None, username=None):
"""Find suggestions for tags for an existing bookmark
The plan:
Suggest recent tags if there's a recent bookmark to pull tags from
Suggest related tags if there are other tags in bookmarks related
somehow (tbd)
Suggested other tags based on other people bookmarking this url
Suggest tags based on the readable content of the Bookmark
that the user is editing. New Bookmarks won't end up here.
"""
tag_suggest = []
tag_list = []
MAX_TAGS = 5
# Suggested tags feature only supported for edits.
if not new:
# If url is None return empty tag list.
if url is None:

# If url is None return empty tags
if url is None:
return tag_list
else:
bmark = BmarkMgr.get_by_url(url)
# If bmark is not parsed return empty tag list
if bmark.readable is None:
return tag_list
# Some times parsing may fail and we cannot parse the webpage
# then satus_code will be set to 900
elif bmark.readable.status_code == '900':
return tag_list
else:
bmark = BmarkMgr.get_by_url(url)
if bmark.readable is None:
return tag_list
# Some times parsing may fail and we cannot parse the webpage
# then satus_code will be set to 900
elif bmark.readable.status_code == '900':
return tag_list
else:
content = bmark.readable.content
# Remove unicode character while printing
clean_content = (
"".join(
BeautifulSoup(content).findAll(text=True)).encode(
'ascii', 'ignore'))
get_tags = extract.TermExtractor()
tag_suggest = get_tags(clean_content)
tag_suggest = sorted(tag_suggest, key=lambda tag_suggest:
tag_suggest[1], reverse=True)
for result in tag_suggest:
# If it has a space in it, split it.
tags = result[0].split()
for tag in tags:
# Require at least 3 chars long and ignore pure
# numbers.
if tag not in tag_list and tag not in bmark.tags:
if len(tag) > 2 and not tag.isdigit():
tag_list.append(tag.lower())

# return maximum of 5 tags
if len(tag_list) >= MAX_TAGS:
return tag_list[0:MAX_TAGS]
else:
return tag_list
# If not an edit request, return the tag_list
return tag_list
content = bmark.readable.content
# Remove unicode character while printing
clean_content = (
"".join(
BeautifulSoup(content).findAll(text=True)).encode(
'ascii', 'ignore'))
get_tags = extract.TermExtractor()
tag_suggest = get_tags(clean_content)
tag_suggest = sorted(tag_suggest, key=lambda tag_suggest:
tag_suggest[1], reverse=True)
for result in tag_suggest:
# If it has a space in it, split it.
tags = result[0].split()
for tag in tags:
# Require at least 3 chars long and ignore pure
# numbers.
if tag not in tag_list and tag not in bmark.tags:
if len(tag) > 2 and not tag.isdigit():
tag_list.append(tag.lower())
return tag_list

@staticmethod
def count():
Expand Down Expand Up @@ -488,23 +476,6 @@ def user_dump(username):
).\
filter(Bmark.username == username).all()

@staticmethod
def recent(limit=50, page=0, with_tags=False):
"""Get a recent set of bookmarks"""
qry = Bmark.query

offset = limit * page
qry = qry.order_by(Bmark.stored.desc()).\
limit(limit).\
offset(offset).\
from_self()

if with_tags:
qry = qry.outerjoin(Bmark.tags).\
options(contains_eager(Bmark.tags))

return qry.all()

@staticmethod
def popular(limit=50, page=0, with_tags=False):
"""Get the bookmarks by most popular first"""
Expand Down
55 changes: 39 additions & 16 deletions bookie/views/bmarks.py
Expand Up @@ -7,6 +7,7 @@

from bookie.bcelery import tasks
from bookie.lib.access import ReqAuthorize
from bookie.lib.utils import suggest_tags
from bookie.lib.urlhash import generate_hash
from bookie.models import (
Bmark,
Expand Down Expand Up @@ -116,7 +117,12 @@ def edit(request):
"""
rdict = request.matchdict
params = request.params
url = params.get('url', u"")
title = params.get('description', None)
new = False
MAX_TAGS = 10
tag_suggest = []
base_tags = set()

with ReqAuthorize(request, username=rdict['username'].lower()):

Expand All @@ -129,17 +135,19 @@ def edit(request):

if hash_id:
bmark = BmarkMgr.get_by_hash(hash_id, request.user.username)

if bmark is None:
return HTTPNotFound()
else:
title = bmark.description
url = bmark.hashed.url
else:
# hash the url and make sure that it doesn't exist
url = params.get('url', u"")
# Hash the url and make sure that it doesn't exist
if url != u"":
new_url_hash = generate_hash(url)

test_exists = BmarkMgr.get_by_hash(new_url_hash,
request.user.username)
test_exists = BmarkMgr.get_by_hash(
new_url_hash,
request.user.username)

if test_exists:
location = request.route_url(
Expand All @@ -148,22 +156,37 @@ def edit(request):
username=request.user.username)
return HTTPFound(location)

# No url info given so shown the form to the user.
new = True
desc = params.get('description', None)
bmark = Bmark(url, request.user.username, desc=desc)

tag_suggest = TagMgr.suggestions(
bmark=bmark,
url=bmark.hashed.url,
username=request.user.username,
new=new
)

# Setup a dummy bookmark so the template can operate
# correctly.
bmark = Bmark(url, request.user.username, desc=title)

# Title and url will be in params for new bookmark and
# fetched from database if it is an edit request
if title or url:
suggested_tags = suggest_tags(url)
suggested_tags.update(suggest_tags(title))
base_tags.update(suggested_tags)

# If user is editing a bookmark, suggested tags will include tags
# based on readable content also
if not new:
tag_suggest = TagMgr.suggestions(
bmark=bmark,
url=bmark.hashed.url,
username=request.user.username
)
# tags based on url and title will always be there
# order of tags is important so convert set to list
tag_suggest.extend(list(base_tags))
tag_suggest = (tag_suggest[0:MAX_TAGS],
tag_suggest)[len(tag_suggest) < MAX_TAGS]
return {
'new': new,
'bmark': bmark,
'user': request.user,
'tag_suggest': tag_suggest,
'tag_suggest': list(set(tag_suggest)),
}


Expand Down

0 comments on commit cc7f00a

Please sign in to comment.