Permalink
Browse files

Rename levels of refs: primary, secondary, tertiary

  • Loading branch information...
jonathansick committed Dec 10, 2014
1 parent 3b2afbd commit e0ed5e464977d6cc7b11743d041c863e050e6b43
Showing with 31 additions and 23 deletions.
  1. +1 −1 astrorec/latexrec.py
  2. +30 −22 astrorec/mentionsrec.py
@@ -42,7 +42,7 @@ def __init__(self, tex_filepath, ads_cache=None):
continue
n_mentions = len(rich_cites)
mention_recs.append(ref_pub, n_mentions)
mention_recs.add_primary_ref(ref_pub, n_mentions)
mention_recs.analyze_secondary()
# TODO get top *N* recommendations by score
@@ -17,54 +17,58 @@ def __init__(self, ads_cache):
super(MentionsRecs, self).__init__()
self._adsdb = ADSBibDB(cache=ads_cache)
# List of B-level publications
self._cited_pubs = []
self._cited_bibcodes = []
self._cited_mention_counts = []
self._primary_pubs = []
self._primary_bibcodes = []
self._primary_mention_counts = []
def add_cited_pub(self, pub, n_mentions):
self._cited_pubs.append(pub)
self._cited_bibcodes.append(pub.bibcode)
self._cited_mention_counts.append(n_mentions)
def add_primary_ref(self, pub, n_mentions):
self._primary_pubs.append(pub)
self._primary_bibcodes.append(pub.bibcode)
self._primary_mention_counts.append(n_mentions)
def analyze_secondary(self):
"""Build a secondary set of references to recommend from."""
# First build the unique set of secondary-level publications.
# that are not in the B-level (directly cited)
secondary_bibcodes = []
for cited_pub in self._cited_pubs:
secondary_bibcodes += cited_pub.reference_bibcodes
for primary_pub in self._primary_pubs:
try:
secondary_bibcodes += primary_pub.reference_bibcodes
except:
continue
secondary_bibcodes = list(set(secondary_bibcodes)
- set(self._cited_bibcodes))
- set(self._primary_bibcodes))
self._secondary_pubs = []
cited_mentions = np.array(self._cited_mention_counts)
primary_mentions = np.array(self._primary_mention_counts)
for bibcode in secondary_bibcodes:
spub = SecondaryPub(bibcode, self._adsdb, self._cited_bibcodes,
cited_mentions)
spub = SecondaryPub(bibcode, self._adsdb, self._primary_bibcodes,
primary_mentions)
self._secondary_pubs.append(spub)
self._secondary_scores = []
for spub in self._secondary_pubs:
self._secondary_scores.append(spub.score)
# TODO way to return top *n* publications
print(zip(self._secondary_bibcodes, self._secondary_scores))
class SecondaryPub(object):
"""A publication at the seconary level that will be scored for relevance
to the original paper via mentions to the tertiary papers
"""
def __init__(self, bibcode, adsdb, cited_bibcodes, cited_mentions):
def __init__(self, bibcode, adsdb, primary_bibcodes, primary_mentions):
super(SecondaryPub, self).__init__()
self._bibcode = bibcode
self._adsdb = adsdb
self._cited_bibcodes = cited_bibcodes
self._primary_bibcodes = primary_bibcodes
# Mentions vector for primary references
self._cited_mentions = cited_mentions
self._primary_mentions = primary_mentions
# Mentions vector for tertiary reference
self._tertiary_mentions = np.zeros(self._cited_mentions.shape)
self._tertiary_mentions = np.zeros(self._primary_mentions.shape)
# TODO read and build the rich citations for this publication
@@ -74,14 +78,18 @@ def __init__(self, bibcode, adsdb, cited_bibcodes, cited_mentions):
# Analyze only quaternay references that appear in the orginal
# paper too (and thus are likely to be relevant).
for bibcode in pub.reference_bibcodes:
if bibcode not in self._cited_bibcodes:
if bibcode not in self._primary_bibcodes:
continue
# TODO combine bibcode to number of mentions to fill in
# self._tertiary_mentions
else:
i = self._primary_bibcodes.index(bibcode)
# TODO combine bibcode to number of mentions to fill in
# self._tertiary_mentions
# FIXME this gives unit weit all all cited tertiary papers
self._tertiary_mentions[i] = 1
@property
def score(self):
"""http://en.wikipedia.org/wiki/Cosine_similarity"""
return np.sum(self._cited_mentions * self._tertiary_mentions) \
/ (np.hypot(self._cited_mentions)
return np.sum(self._primary_mentions * self._tertiary_mentions) \
/ (np.hypot(self._primary_mentions)
* np.hypot(self._tertiary_mentions))

0 comments on commit e0ed5e4

Please sign in to comment.