Permalink
Browse files

lint

  • Loading branch information...
ralsina committed May 22, 2017
1 parent e7fb38e commit 0516994b2f42c4efaa5500843e2d26d45f6a2be4
Showing with 6 additions and 7 deletions.
  1. +6 −7 v7/similarity/similarity.py
@@ -30,33 +30,32 @@
from nikola.plugin_categories import Task
+
class Similarity(Task):
"""Calculate post similarity."""
name = "similarity"
def set_site(self, site):
self.site = site
-
+
def gen_tasks(self):
"""Build similarity data for each post."""
self.site.scan_posts()
-
+
texts = []
-
+
for p in self.site.timeline:
texts.append(p.text(strip_html=True).lower().split())
-
+
dictionary = gensim.corpora.Dictionary(texts)
corpus = [dictionary.doc2bow(text) for text in texts]
lsi = gensim.models.LsiModel(corpus, id2word=dictionary, num_topics=2)
index = gensim.similarities.MatrixSimilarity(lsi[corpus])
-
+
for i, post in enumerate(self.site.timeline):
doc = texts[i]
vec_bow = dictionary.doc2bow(doc)
vec_lsi = lsi[vec_bow]
sims = index[vec_lsi]
sims = sorted(enumerate(sims), key=lambda item: -item[1])
print(i, sims[:10])
-
-

0 comments on commit 0516994

Please sign in to comment.