diff --git a/bookie/models/__init__.py b/bookie/models/__init__.py index c2d6eab3..af11bcfa 100644 --- a/bookie/models/__init__.py +++ b/bookie/models/__init__.py @@ -41,7 +41,6 @@ Base = declarative_base() LOG = logging.getLogger(__name__) -RECENT = 24 # number of hours to consider a bookmark as recent def initialize_sql(settings): @@ -223,61 +222,50 @@ def complete(prefix, current=None, limit=5, username=None): return DBSession.execute(query) @staticmethod - def suggestions(bmark=None, recent=True, url=None, username=None, - new=False): - """Find suggestions for tags for a bookmark + def suggestions(bmark=None, url=None, username=None): + """Find suggestions for tags for an existing bookmark The plan: - Suggest recent tags if there's a recent bookmark to pull tags from - Suggest related tags if there are other tags in bookmarks related - somehow (tbd) - Suggested other tags based on other people bookmarking this url + Suggest tags based on the readable content of the Bookmark + that the user is editing. New Bookmarks won't end up here. """ tag_suggest = [] tag_list = [] - MAX_TAGS = 5 - # Suggested tags feature only supported for edits. - if not new: - # If url is None return empty tag list. - if url is None: + + # If url is None return empty tags + if url is None: + return tag_list + else: + bmark = BmarkMgr.get_by_url(url) + # If bmark is not parsed return empty tag list + if bmark.readable is None: + return tag_list + # Some times parsing may fail and we cannot parse the webpage + # then satus_code will be set to 900 + elif bmark.readable.status_code == '900': return tag_list else: - bmark = BmarkMgr.get_by_url(url) - if bmark.readable is None: - return tag_list - # Some times parsing may fail and we cannot parse the webpage - # then satus_code will be set to 900 - elif bmark.readable.status_code == '900': - return tag_list - else: - content = bmark.readable.content - # Remove unicode character while printing - clean_content = ( - "".join( - BeautifulSoup(content).findAll(text=True)).encode( - 'ascii', 'ignore')) - get_tags = extract.TermExtractor() - tag_suggest = get_tags(clean_content) - tag_suggest = sorted(tag_suggest, key=lambda tag_suggest: - tag_suggest[1], reverse=True) - for result in tag_suggest: - # If it has a space in it, split it. - tags = result[0].split() - for tag in tags: - # Require at least 3 chars long and ignore pure - # numbers. - if tag not in tag_list and tag not in bmark.tags: - if len(tag) > 2 and not tag.isdigit(): - tag_list.append(tag.lower()) - - # return maximum of 5 tags - if len(tag_list) >= MAX_TAGS: - return tag_list[0:MAX_TAGS] - else: - return tag_list - # If not an edit request, return the tag_list - return tag_list + content = bmark.readable.content + # Remove unicode character while printing + clean_content = ( + "".join( + BeautifulSoup(content).findAll(text=True)).encode( + 'ascii', 'ignore')) + get_tags = extract.TermExtractor() + tag_suggest = get_tags(clean_content) + tag_suggest = sorted(tag_suggest, key=lambda tag_suggest: + tag_suggest[1], reverse=True) + for result in tag_suggest: + # If it has a space in it, split it. + tags = result[0].split() + for tag in tags: + # Require at least 3 chars long and ignore pure + # numbers. + if tag not in tag_list and tag not in bmark.tags: + if len(tag) > 2 and not tag.isdigit(): + tag_list.append(tag.lower()) + return tag_list @staticmethod def count(): @@ -488,23 +476,6 @@ def user_dump(username): ).\ filter(Bmark.username == username).all() - @staticmethod - def recent(limit=50, page=0, with_tags=False): - """Get a recent set of bookmarks""" - qry = Bmark.query - - offset = limit * page - qry = qry.order_by(Bmark.stored.desc()).\ - limit(limit).\ - offset(offset).\ - from_self() - - if with_tags: - qry = qry.outerjoin(Bmark.tags).\ - options(contains_eager(Bmark.tags)) - - return qry.all() - @staticmethod def popular(limit=50, page=0, with_tags=False): """Get the bookmarks by most popular first""" diff --git a/bookie/views/bmarks.py b/bookie/views/bmarks.py index b4417d5e..f8014425 100644 --- a/bookie/views/bmarks.py +++ b/bookie/views/bmarks.py @@ -7,6 +7,7 @@ from bookie.bcelery import tasks from bookie.lib.access import ReqAuthorize +from bookie.lib.utils import suggest_tags from bookie.lib.urlhash import generate_hash from bookie.models import ( Bmark, @@ -116,7 +117,12 @@ def edit(request): """ rdict = request.matchdict params = request.params + url = params.get('url', u"") + title = params.get('description', None) new = False + MAX_TAGS = 10 + tag_suggest = [] + base_tags = set() with ReqAuthorize(request, username=rdict['username'].lower()): @@ -129,17 +135,19 @@ def edit(request): if hash_id: bmark = BmarkMgr.get_by_hash(hash_id, request.user.username) - if bmark is None: return HTTPNotFound() + else: + title = bmark.description + url = bmark.hashed.url else: - # hash the url and make sure that it doesn't exist - url = params.get('url', u"") + # Hash the url and make sure that it doesn't exist if url != u"": new_url_hash = generate_hash(url) - test_exists = BmarkMgr.get_by_hash(new_url_hash, - request.user.username) + test_exists = BmarkMgr.get_by_hash( + new_url_hash, + request.user.username) if test_exists: location = request.route_url( @@ -148,22 +156,37 @@ def edit(request): username=request.user.username) return HTTPFound(location) + # No url info given so shown the form to the user. new = True - desc = params.get('description', None) - bmark = Bmark(url, request.user.username, desc=desc) - - tag_suggest = TagMgr.suggestions( - bmark=bmark, - url=bmark.hashed.url, - username=request.user.username, - new=new - ) - + # Setup a dummy bookmark so the template can operate + # correctly. + bmark = Bmark(url, request.user.username, desc=title) + + # Title and url will be in params for new bookmark and + # fetched from database if it is an edit request + if title or url: + suggested_tags = suggest_tags(url) + suggested_tags.update(suggest_tags(title)) + base_tags.update(suggested_tags) + + # If user is editing a bookmark, suggested tags will include tags + # based on readable content also + if not new: + tag_suggest = TagMgr.suggestions( + bmark=bmark, + url=bmark.hashed.url, + username=request.user.username + ) + # tags based on url and title will always be there + # order of tags is important so convert set to list + tag_suggest.extend(list(base_tags)) + tag_suggest = (tag_suggest[0:MAX_TAGS], + tag_suggest)[len(tag_suggest) < MAX_TAGS] return { 'new': new, 'bmark': bmark, 'user': request.user, - 'tag_suggest': tag_suggest, + 'tag_suggest': list(set(tag_suggest)), }