Skip to content
Permalink
Browse files

lint

  • Loading branch information
ralsina committed May 22, 2017
1 parent e7fb38e commit 0516994b2f42c4efaa5500843e2d26d45f6a2be4
Showing with 6 additions and 7 deletions.
  1. +6 −7 v7/similarity/similarity.py
@@ -30,33 +30,32 @@

from nikola.plugin_categories import Task


class Similarity(Task):
"""Calculate post similarity."""
name = "similarity"

def set_site(self, site):
self.site = site

def gen_tasks(self):
"""Build similarity data for each post."""
self.site.scan_posts()

texts = []

for p in self.site.timeline:
texts.append(p.text(strip_html=True).lower().split())

dictionary = gensim.corpora.Dictionary(texts)
corpus = [dictionary.doc2bow(text) for text in texts]
lsi = gensim.models.LsiModel(corpus, id2word=dictionary, num_topics=2)
index = gensim.similarities.MatrixSimilarity(lsi[corpus])

for i, post in enumerate(self.site.timeline):
doc = texts[i]
vec_bow = dictionary.doc2bow(doc)
vec_lsi = lsi[vec_bow]
sims = index[vec_lsi]
sims = sorted(enumerate(sims), key=lambda item: -item[1])
print(i, sims[:10])


0 comments on commit 0516994

Please sign in to comment.