Skip to content

Commit

Permalink
fix on cluster handling
Browse files Browse the repository at this point in the history
  • Loading branch information
jaesivsm committed Aug 7, 2017
1 parent 797f55f commit 059c4dd
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 17 deletions.
1 change: 1 addition & 0 deletions src/tests/controllers/cluster_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def gen_articles(factor):
Mock(valuable_tokens=['Sarkozy', 'garage', 'chanson'])] \
* factor
ccontr = ClusterController()
ccontr.tfidf_min_score = 0.6

acontr_cls.return_value.read.return_value = gen_articles(2)

Expand Down
39 changes: 26 additions & 13 deletions src/web/controllers/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,32 @@ def remove_from_cluster(self, article):
Return True if the article is deleted at the end or not
"""
from web.controllers import ClusterController
if not article.cluster_id:
return
clu_ctrl = ClusterController(self.user_id)
cluster = clu_ctrl.read(id=article.cluster_id,
main_article_id=article.id).first()
cluster = clu_ctrl.read(id=article.cluster_id).first()
if not cluster:
return True
if len(cluster.articles) == 1: # only on article in cluster, deleting
clu_ctrl.delete(cluster.id)
return False
clu_ctrl._enrich_cluster(cluster, cluster.articles[1],
cluster.read, cluster.liked, True)
return True
return

try:
new_art = next(new_art for new_art in cluster.articles
if new_art.id != article.id)
except StopIteration:
# only on article in cluster, deleting cluster
clu_ctrl.delete(cluster.id, delete_articles=False)
else:
if cluster.main_article_id == article.id:
cluster.main_article_id = None
clu_ctrl._enrich_cluster(cluster, new_art,
cluster.read, cluster.liked,
force_article_as_main=True)
self.update({'id': article.id},
{'cluster_id': None,
'cluster_reason': None,
'cluster_score': None,
'cluster_tfidf_with': None,
'cluster_tfidf_neighbor_size': None})
return

def _delete(self, article, commit):
Tag.query.filter(Tag.article_id == article.id).delete()
Expand All @@ -119,7 +134,5 @@ def _delete(self, article, commit):

def delete(self, obj_id, commit=True):
article = self.get(id=obj_id)
still_delete_article = self.remove_from_cluster(article)
if still_delete_article:
return self._delete(article, commit=commit)
return article
self.remove_from_cluster(article)
return self._delete(article, commit=commit)
15 changes: 11 additions & 4 deletions src/web/controllers/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ class ClusterController(AbstractController):
_db_cls = Cluster
max_day_dist = timedelta(days=7)

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.tfidf_min_score = MIN_SIMILARITY_SCORE

def _get_cluster_by_link(self, article):
cluster = self.read(user_id=article.user_id,
main_date__lt=article.date + self.max_day_dist,
Expand Down Expand Up @@ -73,7 +77,7 @@ def _get_cluster_by_similarity(self, article, min_sample_size=10):
return

best_match, score = get_best_match_and_score(article, neighbors)
if score > MIN_SIMILARITY_SCORE:
if score > self.tfidf_min_score:
article.cluster_reason = ClusterReason.tf_idf
article.cluster_score = int(score * 1000)
article.cluster_tfidf_neighbor_size = len(neighbors)
Expand Down Expand Up @@ -263,12 +267,15 @@ def join_read(self, feed_id=None, **filters):
.order_by(Cluster.main_date.desc()).limit(JR_LENGTH),
filter_on_cat)

def delete(self, obj_id):
def delete(self, obj_id, delete_articles=True):
from web.controllers import ArticleController
self.update({'id': obj_id}, {'main_article_id': None}, commit=False)
actrl = ArticleController(self.user_id)
for art in actrl.read(cluster_id=obj_id):
actrl._delete(art, commit=False)
if delete_articles:
for art in actrl.read(cluster_id=obj_id):
actrl._delete(art, commit=False)
else:
actrl.update({'cluster_id': obj_id}, {'cluster_id': None})
return super().delete(obj_id)

#
Expand Down

0 comments on commit 059c4dd

Please sign in to comment.