Skip to content
Permalink
Browse files
Don't calculate similarities from/to drafts and private posts
  • Loading branch information
ralsina committed Oct 23, 2017
1 parent 35a1a98 commit 7f948e6407e180de6162c57e68f2b16affd7bf75
Showing with 11 additions and 9 deletions.
  1. +1 −1 v7/similarity/similarity.plugin
  2. +10 −8 v7/similarity/similarity.py
@@ -7,6 +7,6 @@ PluginCategory = Task

[Documentation]
Author = Roberto Alsina
Version = 0.1
Version = 0.3
Website = http://plugins.getnikola.com/#similarity
Description = Calculate similar posts
@@ -46,6 +46,7 @@ def set_site(self, site):
def gen_tasks(self):
"""Build similarity data for each post."""
self.site.scan_posts()
timeline = [p for p in self.site.timeline if not (p.is_draft or p.is_private)]

kw = {
"translations": self.site.translations,
@@ -85,7 +86,7 @@ def title_similarity(p1, p2):

def create_idx(indexes, dictionaries, lsis, lang):
texts = []
for p in self.site.timeline:
for p in timeline:
texts.append(split_text(p.text(strip_html=True, lang=lang), lang=lang))
dictionary = gensim.corpora.Dictionary(texts)
corpus = [dictionary.doc2bow(text) for text in texts]
@@ -101,12 +102,13 @@ def write_similar(path, post, lang, indexes=indexes, dictionaries=dictionaries,
vec_bow = dictionaries[lang].doc2bow(doc)
vec_lsi = lsis[lang][vec_bow]
body_sims = indexes[lang][vec_lsi]
tag_sims = [tags_similarity(post, p) for p in self.site.timeline]
title_sims = [title_similarity(post, p) for p in self.site.timeline]
full_sims = [tag_sims[i] + title_sims[i] + body_sims[i] * 1.5 for i in range(len(self.site.timeline))]

tag_sims = [tags_similarity(post, p) for p in timeline]
title_sims = [title_similarity(post, p) for p in timeline]
full_sims = [tag_sims[i] + title_sims[i] + body_sims[i] * 1.5 for i in range(len(timeline))]
full_sims = sorted(enumerate(full_sims), key=lambda item: -item[1])
idx = self.site.timeline.index(post)
related = [(self.site.timeline[s[0]], s[1], tag_sims[s[0]], title_sims[s[0]], body_sims[s[0]]) for s in
idx = timeline.index(post)
related = [(timeline[s[0]], s[1], tag_sims[s[0]], title_sims[s[0]], body_sims[s[0]]) for s in
full_sims[:kw['similar_count'] + 1] if s[0] != idx]
data = []
for p, score, tag, title, body in related:
@@ -120,9 +122,9 @@ def write_similar(path, post, lang, indexes=indexes, dictionaries=dictionaries,
json.dump(data, outf)

for lang in self.site.translations:
file_dep = [p.translated_source_path(lang) for p in self.site.timeline]
file_dep = [p.translated_source_path(lang) for p in timeline]
uptodate = utils.config_changed({1: kw}, 'similarity')
for i, post in enumerate(self.site.timeline):
for i, post in enumerate(timeline):
out_name = os.path.join(kw['output_folder'], post.destination_path(lang=lang)) + '.related.json'
task = {
'basename': self.name,

0 comments on commit 7f948e6

Please sign in to comment.