Merge 2b4607d into ff71fb3

adidier17 · Oct 7, 2020 · 33cbcbd · 33cbcbd
2 parents ff71fb3 + 2b4607d
commit 33cbcbd
Show file tree

Hide file tree

Showing 10 changed files with 358 additions and 271 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 A modification of PageRank to find the most prestigious authors in a scientific collaboration network.
 
 [![Language](https://img.shields.io/badge/python-3.5%20%7C%203.6%20%7C%203.7%20%7C%203.8-blue)](#)
-[![PyPi](https://img.shields.io/badge/pypi-0.1.1-blue.svg)](https://pypi.python.org/pypi/author_rank/0.1.1)
+[![PyPi](https://img.shields.io/badge/pypi-0.1.2-blue.svg)](https://pypi.python.org/pypi/author_rank/0.1.2)
 [![License](https://img.shields.io/github/license/adidier17/AuthorRank)](https://opensource.org/licenses/MIT)
 [![Coverage Status](https://coveralls.io/repos/github/adidier17/AuthorRank/badge.svg?branch=main)](https://coveralls.io/github/adidier17/AuthorRank?branch=main)
 [![Build Status](https://api.travis-ci.org/adidier17/AuthorRank.svg?branch=main)](https://travis-ci.org/adidier17/AuthorRank)

diff --git a/author_rank/__init__.py b/author_rank/__init__.py
@@ -16,7 +16,7 @@
 del sys
 
 __author__ = "Valentino Constantinou, Annie Didier"
-__version__ = "0.1.1"
+__version__ = "0.1.2"
 
 import author_rank.graph
 from author_rank.graph import *

diff --git a/author_rank/graph.py b/author_rank/graph.py
@@ -32,8 +32,8 @@ def _extend_graph(self, authors_by_document: list, doc_index: int, progress_bar:
         """
 
         if len(authors_by_document[doc_index]) > 1:
-            author_ids = [tuple(d.values()) for d in self._author_list]
-            pairs = (list(itertools.permutations(author_ids, 2)))
+            # author_ids = [tuple(d.values()) for d in self._author_list]
+            pairs = (list(itertools.permutations(authors_by_document[doc_index], 2)))
             # calculate g_i_j_k
             exclusivity = 1 / (len(authors_by_document[doc_index]) - 1)
             edge = [{"edge": (x[0], x[1]), "weight": exclusivity} for x in pairs]
@@ -101,6 +101,7 @@ def fit(self, documents: List[dict], authorship_key: str = "authors",
         # gets a list of lists
         doc_authors = [i[authorship_key] for i in documents]
 
+
         # remove keys and values that are not used as part of an author UID
         for doc in doc_authors:
             for author in doc:
@@ -109,21 +110,22 @@ def fit(self, documents: List[dict], authorship_key: str = "authors",
                     del author[unwanted_key]
 
         # create a UID for each author based on the remaining keys
+        doc_authors_tuples = [[tuple(d.values()) for d in doc] for doc in doc_authors]
         # unique combination of key values will serve as keys for each author
-        self._author_list = list(itertools.chain.from_iterable(doc_authors))
-        author_uid_tuples = [tuple(d.values()) for d in self._author_list]
+        # self._author_list = list(itertools.chain.from_iterable(doc_authors))
 
+        # author_uid_tuples = [tuple(d.values()) for d in self._author_list]
         # get overall counts of each author
-        counts = Counter(author_uid_tuples)
-
+        author_list = list(itertools.chain.from_iterable(doc_authors_tuples))
+        counts = Counter(author_list)
         acceptable_author_count = check_author_count(counts)
         if acceptable_author_count is False:
             warnings.warn("Number of authors in document set must be greater than one. "
                           "AuthorRank not fit to the data, please try again.", UserWarning)
         else:
             # process each document, create the edges with the appropriate weights
             for doc in range(0, len(doc_authors)):
-                self._extend_graph(doc_authors, doc, progress_bar)
+                self._extend_graph(doc_authors_tuples, doc, progress_bar)
 
             # sort the edges for processing
             edges_all_sorted = sorted(self._edges_all, key=lambda x: str(x["edge"]))

diff --git a/changelog.md b/changelog.md
@@ -4,8 +4,13 @@ All notable changes to `author_rank` will be documented in this Changelog.
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 
 and adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
-## 0.1.1
+## 0.1.2 
+### Fixed
+- An identified issue that resulted in disconnected (i.e. non-co-authoring) 
+authors to be connected in the AuthorRank graph. Several code changes 
+were introduced to address this issue. 
 
+## 0.1.1
 ### Changed 
 - The progress bar functionality such that it indicates progress on the 
 `.fit()` function across both the graph creation and scoring of authors 
@@ -17,7 +22,6 @@ and adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
 
 ## 0.1.0
-
 ### Changed 
 - The manner in which users interact with the library to more
 closely mirror the conventions of the [scikit-learn](https://scikit-learn.org/) 
@@ -42,7 +46,6 @@ was being processed into the AuthorRank graph.
 
 
 ## 0.0.3
-
 ### Added
 
 - A progress bar as an optional argument for creating the `graph.create` 
@@ -58,14 +61,12 @@ added to the testing suite and a fix developed.
 - Libraries required listed in `setup.py`. 
 
 ## 0.0.2
-
 ### Changed 
 
 - Updates the normalization of scores in `top_authors` to a pure Python 
 approach, removing the `numpy` and `scikit-learn` requirements. 
 
 ## 0.0.1
-
 ### Added
 - An example dataset in the `data` directory.
 - `examples` and `notebooks` directories that contain code that 

diff --git a/dist/author_rank-0.1.2-py3-none-any.whl b/dist/author_rank-0.1.2-py3-none-any.whl
diff --git a/dist/author_rank-0.1.2.tar.gz b/dist/author_rank-0.1.2.tar.gz