From 5bf4ef9f7817698a8beea669f9138c4e0796bfb7 Mon Sep 17 00:00:00 2001 From: Ivana Yovcheva Date: Mon, 24 Aug 2020 21:59:53 +0300 Subject: [PATCH 01/15] Update release worker This updates release worker to insert new values when release_id is not already inserted and update when a row exist. Includes few tiny fixes Signed-off-by: Ivana Yovcheva --- workers/release_worker/release_worker.py | 42 +++++++++++++++++++----- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/workers/release_worker/release_worker.py b/workers/release_worker/release_worker.py index 5c70210311..768cd2f7e9 100644 --- a/workers/release_worker/release_worker.py +++ b/workers/release_worker/release_worker.py @@ -10,6 +10,13 @@ #TODO - fully edit to match releases class ReleaseWorker(Worker): + """ + Worker that collects Repository Releases data from the Github API + and stores it in our database. + + :param task: most recent task the broker added to the worker's queue + :param config: holds info like api keys, descriptions, and database connection strings + """ def __init__(self, config={}): worker_type = "release_worker" @@ -30,8 +37,21 @@ def __init__(self, config={}): self.tool_version = '1.0.0' self.data_source = 'GitHub API' - def insert_release(self, repo_id, owner, release): - author = release['author']['name']+'_'+release['author']['company'] + def insert_release(self, task, repo_id, owner, release): + + # Get current table values + release_id_data_sql = s.sql.text(""" + SELECT releases.release_id + FROM releases + WHERE repo_id = :repo_id + """) + self.logger.info(f'Getting release table values with the following PSQL query: \n{release_id_data_sql}\n') + release_id_data = pd.read_sql(release_id_data_sql, self.db, params={'repo_id': repo_id}) + release_id_data = release_id_data.apply(lambda x: x.str.strip()) + + name = "" if release['author']['name'] is None else release['author']['name'] + company = "" if release['author']['company'] is None else release['author']['company'] + author = name+'_'+company # Put all data together in format of the table self.logger.info(f'Inserting release for repo with id:{repo_id}, owner:{owner}, release name:{release["name"]}\n') release_inf = { @@ -39,7 +59,7 @@ def insert_release(self, repo_id, owner, release): 'repo_id': repo_id, 'release_name': release['name'], 'release_description': release['description'], - 'release_author': release['author'], + 'release_author': author, 'release_created_at': release['createdAt'], 'release_published_at': release['publishedAt'], 'release_updated_at': release['updatedAt'], @@ -52,14 +72,20 @@ def insert_release(self, repo_id, owner, release): 'data_source': self.data_source } - result = self.db.execute(self.releases_table.insert().values(release_inf)) - self.logger.info(f"Primary Key inserted into releases table: {result.inserted_primary_key}\n") + if release_id_data.size > 0 and release['id'] in release_id_data.values: + result = self.db.execute(self.releases_table.update().where( + self.releases_table.c.release_id==release['id']).values(release_inf)) + self.logger.info(f"Release {release['id']} updated into releases table\n") + else: + result = self.db.execute(self.releases_table.insert().values(release_inf)) + self.logger.info(f"Release {release['id']} inserted into releases table\n") + self.logger.info(f"Primary Key inserted into releases table: {result.inserted_primary_key}\n") self.results_counter += 1 - self.logger.info(f"Inserted info for {owner}/{repo}/{release['name']}\n") + self.logger.info(f"Inserted info for {owner}/{repo_id}/{release['name']}\n") #Register this task as completed - self.register_task_completion(task, release_id, "releases") + self.register_task_completion(task, repo_id, "releases") return def releases_model(self, task, repo_id): @@ -146,7 +172,7 @@ def releases_model(self, task, repo_id): for n in data['releases']['edges']: if 'node' in n: release = n['node'] - self.insert_release(self, repo_id, owner, release) + self.insert_release(task, repo_id, owner, release) else: self.logger.info("There's no release to insert. Current node is not available in releases: {}\n".format(n)) else: From de8df7324c818c1d9b27feec3b47c8a69f55f245 Mon Sep 17 00:00:00 2001 From: Ivana Yovcheva Date: Wed, 26 Aug 2020 21:55:25 +0300 Subject: [PATCH 02/15] Add tag_only releases to the release worker This change checks for and adds tags to the `releases` table when they are used instead of actual GitHub releases. It also adds metrics for tag_only releases Signed-off-by: Ivana Yovcheva --- augur/metrics/release.py | 80 ++++++++- workers/release_worker/release_worker.py | 200 +++++++++++++++++------ 2 files changed, 221 insertions(+), 59 deletions(-) diff --git a/augur/metrics/release.py b/augur/metrics/release.py index e999a53abb..c264421446 100644 --- a/augur/metrics/release.py +++ b/augur/metrics/release.py @@ -9,7 +9,7 @@ @register_metric() def releases(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None): - """ Returns a timeseris of new reviews or pull requests opened + """ Returns a timeseris of new releases created :param repo_group_id: The repository's repo_group_id :param repo_id: The repository's repo_id, defaults to None @@ -24,7 +24,7 @@ def releases(self, repo_group_id, repo_id=None, period='day', begin_date=None, e end_date = datetime.datetime.now().strftime('%Y-%m-%d') if not repo_id: - reviews_SQL = s.sql.text(""" + releases_SQL = s.sql.text(""" SELECT res.repo_name, res.release_id, @@ -47,18 +47,19 @@ def releases(self, repo_group_id, repo_id=None, period='day', begin_date=None, e releases LEFT JOIN repo ON releases.repo_id = repo.repo_id WHERE repo.repo_id in (SELECT repo_id FROM repo WHERE repo_group_id=:repo_group_id ) + AND releases.tag_only = False ) as res GROUP BY releases.repo_id, releases.release_id ORDER BY releases.release_published_at DESC """) - results = pd.read_sql(reviews_SQL, self.database, + results = pd.read_sql(releases_SQL, self.database, params={'period': period, 'repo_group_id': repo_group_id, 'begin_date': begin_date, 'end_date': end_date }) return results else: - reviews_SQL = s.sql.text(""" + releases_SQL = s.sql.text(""" SELECT repo.repo_name, releases.release_id, @@ -75,11 +76,80 @@ def releases(self, repo_group_id, repo_id=None, period='day', begin_date=None, e COUNT(releases) FROM releases LEFT JOIN repo ON releases.repo_id = repo.repo_id + WHERE releases.tag_only = False GROUP BY repo.repo_id, releases.release_id ORDER BY releases.release_published_at DESC """) - results = pd.read_sql(reviews_SQL, self.database, + results = pd.read_sql(releases_SQL, self.database, + params={'period': period, 'repo_id': repo_id, + 'begin_date': begin_date, 'end_date': end_date}) + return results + +@register_metric() +def tag_only_releases(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None): + """ Returns a timeseris of new tags that are considered releases + without an official release being published + + :param repo_group_id: The repository's repo_group_id + :param repo_id: The repository's repo_id, defaults to None + :param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day' + :param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00' + :param end_date: Specifies the end date, defaults to datetime.now() + :return: DataFrame of new releases/period + """ + if not begin_date: + begin_date = '1970-1-1' + if not end_date: + end_date = datetime.datetime.now().strftime('%Y-%m-%d') + + if not repo_id: + releases_SQL = s.sql.text(""" + SELECT + res.repo_name, + res.release_id, + res.release_name, + res.release_author, + res.release_created_at, + res.release_tag_name, + COUNT(res) + FROM ( + SELECT + releases.* + repo.repo_name + FROM + releases LEFT JOIN repo ON releases.repo_id = repo.repo_id + WHERE + repo.repo_id in (SELECT repo_id FROM repo WHERE repo_group_id=:repo_group_id ) + AND releases.tag_only = True + ) as res + GROUP BY releases.repo_id, releases.release_id + ORDER BY releases.release_published_at DESC + """) + + results = pd.read_sql(releases_SQL, self.database, + params={'period': period, 'repo_group_id': repo_group_id, + 'begin_date': begin_date, 'end_date': end_date }) + return results + + else: + releases_SQL = s.sql.text(""" + SELECT + repo.repo_name, + releases.release_id, + releases.release_name, + releases.release_author, + releases.release_created_at, + releases.release_tag_name, + COUNT(releases) + FROM + releases LEFT JOIN repo ON releases.repo_id = repo.repo_id + WHERE releases.tag_only = True + GROUP BY repo.repo_id, releases.release_id + ORDER BY releases.release_published_at DESC + """) + + results = pd.read_sql(releases_SQL, self.database, params={'period': period, 'repo_id': repo_id, 'begin_date': begin_date, 'end_date': end_date}) return results diff --git a/workers/release_worker/release_worker.py b/workers/release_worker/release_worker.py index 768cd2f7e9..d7db2de70a 100644 --- a/workers/release_worker/release_worker.py +++ b/workers/release_worker/release_worker.py @@ -37,7 +37,65 @@ def __init__(self, config={}): self.tool_version = '1.0.0' self.data_source = 'GitHub API' - def insert_release(self, task, repo_id, owner, release): + def get_release_inf(self, repo_id, release, tag_only): + if not tag_only: + name = "" if release['author']['name'] is None else release['author']['name'] + company = "" if release['author']['company'] is None else release['author']['company'] + author = name + '_' + company + release_inf = { + 'release_id': release['id'], + 'repo_id': repo_id, + 'release_name': release['name'], + 'release_description': release['description'], + 'release_author': author, + 'release_created_at': release['createdAt'], + 'release_published_at': release['publishedAt'], + 'release_updated_at': release['updatedAt'], + 'release_is_draft': release['isDraft'], + 'release_is_prerelease': release['isPrerelease'], + 'release_tag_name': release['tagName'], + 'release_url': release['url'], + 'tag_only': tag_only, + 'tool_source': self.tool_source, + 'tool_version': self.tool_version, + 'data_source': self.data_source + } + else: + if 'tagger' in release['target']: + if 'name' in release['target']['tagger']: + name = release['target']['tagger']['name'] + else: + name = "" + if 'email' in release['target']['tagger']: + email = '_' + release['target']['tagger']['email'] + else: + email = "" + author = name + email + if 'date' in release['target']['tagger']: + date = release['target']['tagger']['date'] + else: + date = "" + else: + author = "" + date = "" + release_inf = { + 'release_id': release['id'], + 'repo_id': repo_id, + 'release_name': release['name'], + 'release_author': author, + 'release_tag_name': release['name'], + 'tag_only': tag_only, + 'tool_source': self.tool_source, + 'tool_version': self.tool_version, + 'data_source': self.data_source + } + if date: + release_inf['release_created_at'] = date + + return release_inf + + + def insert_release(self, task, repo_id, owner, release, tag_only = False): # Get current table values release_id_data_sql = s.sql.text(""" @@ -49,28 +107,9 @@ def insert_release(self, task, repo_id, owner, release): release_id_data = pd.read_sql(release_id_data_sql, self.db, params={'repo_id': repo_id}) release_id_data = release_id_data.apply(lambda x: x.str.strip()) - name = "" if release['author']['name'] is None else release['author']['name'] - company = "" if release['author']['company'] is None else release['author']['company'] - author = name+'_'+company # Put all data together in format of the table self.logger.info(f'Inserting release for repo with id:{repo_id}, owner:{owner}, release name:{release["name"]}\n') - release_inf = { - 'release_id': release['id'], - 'repo_id': repo_id, - 'release_name': release['name'], - 'release_description': release['description'], - 'release_author': author, - 'release_created_at': release['createdAt'], - 'release_published_at': release['publishedAt'], - 'release_updated_at': release['updatedAt'], - 'release_is_draft': release['isDraft'], - 'release_is_prerelease': release['isPrerelease'], - 'release_tag_name': release['tagName'], - 'release_url': release['url'], - 'tool_source': self.tool_source, - 'tool_version': self.tool_version, - 'data_source': self.data_source - } + release_inf = self.get_release_inf(repo_id, release, tag_only) if release_id_data.size > 0 and release['id'] in release_id_data.values: result = self.db.execute(self.releases_table.update().where( @@ -88,7 +127,64 @@ def insert_release(self, task, repo_id, owner, release): self.register_task_completion(task, repo_id, "releases") return - def releases_model(self, task, repo_id): + def get_query(self, owner, repo, tag_only): + if not tag_only: + query = """ + { + repository(owner:"%s", name:"%s"){ + id + releases(orderBy: {field: CREATED_AT, direction: ASC}, last: %d) { + edges { + node { + name + publishedAt + createdAt + description + id + isDraft + isPrerelease + tagName + url + updatedAt + author { + name + company + } + } + } + } + } + } + """ % (owner, repo, 10) + else: + query = """ + { + repository(owner:"%s", name:"%s"){ + id + refs(refPrefix: "refs/tags/", last: %d){ + edges { + node { + name + id + target { + ... on Tag { + tagger { + name + email + date + } + } + } + } + } + } + } + } + """ % (owner, repo, 10) + + return query + + def fetch_data(self, task, repo_id, tag_only = False): github_url = task['given']['github_url'] @@ -98,33 +194,7 @@ def releases_model(self, task, repo_id): url = 'https://api.github.com/graphql' - query = """ - { - repository(owner:"%s", name:"%s"){ - id - releases(orderBy: {field: CREATED_AT, direction: ASC}, last: %d) { - edges { - node { - name - publishedAt - createdAt - description - id - isDraft - isPrerelease - tagName - url - updatedAt - author { - name - company - } - } - } - } - } - } - """ % (owner, repo, 10) + query = self.get_query(owner, repo, tag_only) # Hit the graphql endpoint and retry 3 times in case of failure num_attempts = 0 @@ -165,19 +235,41 @@ def releases_model(self, task, repo_id): self.register_task_failure(task, repo_id, "Failed to hit endpoint: {}".format(url)) return - self.logger.info("repository value is: {}\n".format(data)) + data['owner'] = owner + + return data + + + def releases_model(self, task, repo_id): + data = self.fetch_data(task, repo_id) + + self.logger.info("repository value is: {}\n".format(data)) if 'releases' in data: - if 'edges' in data['releases']: + if 'edges' in data['releases'] and data['releases']['edges']: for n in data['releases']['edges']: if 'node' in n: release = n['node'] - self.insert_release(task, repo_id, owner, release) + self.insert_release(task, repo_id, data['owner'], release) else: self.logger.info("There's no release to insert. Current node is not available in releases: {}\n".format(n)) + elif 'edges' in data['releases'] and not data['releases']['edges']: + self.logger.info("Searching for tags instead of releases...") + data = self.fetch_data(task, repo_id, True) + self.logger.info("refs value is: {}\n".format(data)) + if 'refs' in data: + if 'edges' in data['refs']: + for n in data['refs']['edges']: + if 'node' in n: + release = n['node'] + self.insert_release(task, repo_id, data['owner'], release, True) + else: + self.logger.info("There's no release to insert. Current node is not available in releases: {}\n".format(n)) + else: + self.logger.info("There are no releases to insert for current repository: {}\n".format(data)) + else: + self.logger.info("There are no refs in data: {}\n".format(data)) else: self.logger.info("There are no releases to insert for current repository: {}\n".format(data)) else: self.logger.info("Graphql response does not contain repository: {}\n".format(data)) - - From 45c10a6368b1aacae49f2e7ba317121fa58fd1a5 Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Thu, 27 Aug 2020 10:29:19 -0500 Subject: [PATCH 03/15] Update Travis CI build to install all worker deps Signed-off-by: Carter Landis --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0df9abb1dd..6967a4ebd5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,7 +41,8 @@ env: before_install: - docker run -d -p 5432:5432 --name augur_test_database augurlabs/augur:test_data@sha256:fd2d9a178a9fee7cd548bd40a16e08d4611be22892491e817aafd53502f74cd0 install: - - pip install .[dev] + - ./scripts/install/backend.sh + - ./scripts/install/workers.sh - augur configure generate script: From 560a0c7d3fe501e58e4f734c1fbb72595c4056fb Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Thu, 27 Aug 2020 10:32:07 -0500 Subject: [PATCH 04/15] Remove duplicated and outdated doc pages Signed-off-by: Carter Landis --- docs/README.md | 11 ---- docs/apidoc.json | 7 --- .../workers/writing-docs.rst | 53 ----------------- docs/source/getting-started/dev-start.md | 20 ------- schema/generate/draft.md | 57 ------------------- 5 files changed, 148 deletions(-) delete mode 100644 docs/README.md delete mode 100644 docs/apidoc.json delete mode 100644 docs/source/development-guide/workers/writing-docs.rst delete mode 100644 docs/source/getting-started/dev-start.md delete mode 100644 schema/generate/draft.md diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 5393b7b680..0000000000 --- a/docs/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# Getting Started with Docs - -1. Directory for source files: `augur/docs/source` -2. Each branch has itself published on readthedocs.io - - Master: https://oss-augur.readthedocs.io/en/master/ - - Dev: https://oss-augur.readthedocs.io/en/dev/ -3. Syntax reference for restructred text, which is what readthedocs.io uses: https://docutils.sourceforge.io/docs/user/rst/quickref.html -4. Best way to figure out how things are structure is to look in the source folder under docs - - Each directory has a `toc.rst` file that is a table of contents - - There are configuration steps on each branch so the docs are built there if that's what you are working on. - - There is also a way to build locally, but Sean doesn't know what it is. \ No newline at end of file diff --git a/docs/apidoc.json b/docs/apidoc.json deleted file mode 100644 index cb4126f556..0000000000 --- a/docs/apidoc.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "name": "Augur API", - "version": "0.0.2", - "description": "REST API for Health and Sustainability Metrics", - "title": "Augur API", - "url" : "http://localhost:5000/api/unstable" -} \ No newline at end of file diff --git a/docs/source/development-guide/workers/writing-docs.rst b/docs/source/development-guide/workers/writing-docs.rst deleted file mode 100644 index 93bc3926b3..0000000000 --- a/docs/source/development-guide/workers/writing-docs.rst +++ /dev/null @@ -1,53 +0,0 @@ -Writing documentation -====================== - -Currently, we maintain a set of library and usage documentation (which is what you are reading!) that -we update with each release. The following sections briefly outline how to contribute to our documentation. - -.. note:: - - **All** PRs which require a documentation change will not be merged until that change has been made. - -Library and Usage Documentation --------------------------------- - -The library documenation is written using `reStructuredText `_ for the raw markdown, and then built into web pages using `Sphinx `_. - -We'll avoid going over reStructuredText in detail here, -but `here `__ is a good reference document. - -Similarly, we'll avoid going over Sphinx in great detail as well; `here `__ is a good reference document for the -most commonly used directives. - -Building -~~~~~~~~ -To see your changes and make sure everything rendered correctly, run ``make docs`` in the root -``augur/`` directory, and then open ``docs/build/html/index.html`` in your web browser to view it. - -.. code-block:: bash - - $ make docs - $ open docs/build/html/index.html - -Or, you can use the shorcut which does exactly this: - -.. code-block:: bash - - # to build and then open to the locally built documentation - $ make docs-view - - -After opening it once, just run ``make docs`` and refresh the page in your browser to see the changes: - -.. code-block:: bash - - # after opening the documentation - $ make docs - -Hosting -~~~~~~~ -Our documentation is graciously hosted by `Read the Docs `_. - -Enabled branches of the main ``chaoss/augur`` repository will each have their own documentation, with the -default ``master`` corresponding to ``master`` on the readthedocs. The documentation will automatically be -built and deployed on a push to one of these branches or on any incoming PR, but please don't forget to check before you push! \ No newline at end of file diff --git a/docs/source/getting-started/dev-start.md b/docs/source/getting-started/dev-start.md deleted file mode 100644 index 8f7a22eb88..0000000000 --- a/docs/source/getting-started/dev-start.md +++ /dev/null @@ -1,20 +0,0 @@ -# Getting Started with Development -1. Augur Documentation: https://oss-augur.readthedocs.io/en/dev/ (Use the dev branch) -2. Follow documentation to install on whatever OS you are most comfortable with (other than windows). -3. Use the "oh my zsh" shell. It has a lot of nice Git features. https://ohmyz.sh/ -4. Postgres.app download for mac: https://postgresapp.com/downloads.html -5. git clone https://github.com/chaoss/augur augur-rdohm - - `pwd` will tell you the current working directory. - - https://ma.ttias.be/mac-os-xcrun-error-invalid-active-developer-path-missing-xcrun/ -6. For the first week or so, if you encounter an issue that is not clear? Its clear what you're supposed to do as a next step: Enter an issue: https://github.com/chaoss/augur/issues -7. I recommmend getting a copy of Navicat for Postgres -8. Create a password on the command line for postgres: -``` -sudo -u postgres psql postgres - -# \password postgres - -Enter new password: - -``` -9. diff --git a/schema/generate/draft.md b/schema/generate/draft.md deleted file mode 100644 index 3708e8bc91..0000000000 --- a/schema/generate/draft.md +++ /dev/null @@ -1,57 +0,0 @@ -```sql - - - -CREATE INDEX CONCURRENTLY "contributor_worker_issue_events_finder" ON "augur_data"."issue_events" USING brin ( - "cntrb_id" -); - - -CREATE INDEX CONCURRENTLY "contributor_worker_pull_request_events_finder" ON "augur_data"."pull_request_events" USING brin ( - "cntrb_id" -); - - - - - issue_events_result = self.db.execute(self.issue_events_table.update().where( - self.issue_events_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuples in the issue_events table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - pr_events_result = self.db.execute(self.pull_request_events_table.update().where( - self.pull_request_events_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuples in the pull_request_events table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - issues_cntrb_result = self.db.execute(self.issues_table.update().where( - self.issues_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuples in the issues table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - issues_reporter_result = self.db.execute(self.issues_table.update().where( - self.issues_table.c.reporter_id.in_(dupe_ids)).values(reporter_col)) - self.logger.info("Updated reporter_id column in the issues table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - issue_assignee_result = self.db.execute(self.issue_assignees_table.update().where( - self.issue_assignees_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuple in the issue_assignees table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - pr_assignee_result = self.db.execute(self.pull_request_assignees_table.update().where( - self.pull_request_assignees_table.c.contrib_id.in_(dupe_ids)).values(pr_assignee_col)) - self.logger.info("Updated contrib_id column for tuple in the pull_request_assignees table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - message_result = self.db.execute(self.message_table.update().where( - self.message_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuple in the message table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - pr_reviewers_result = self.db.execute(self.pull_request_reviewers_table.update().where( - self.pull_request_reviewers_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuple in the pull_request_reviewers table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - pr_meta_result = self.db.execute(self.pull_request_meta_table.update().where( - self.pull_request_meta_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuple in the pull_request_meta table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - pr_repo_result = self.db.execute(self.pull_request_repo_table.update().where( - self.pull_request_repo_table.c.pr_cntrb_id.in_(dupe_ids)).values(pr_repo_col)) - self.logger.info("Updated cntrb_id column for tuple in the pull_request_repo table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - -``` \ No newline at end of file From 69e09bd50de3f9afa33a5495fdb4646c48b5f0e3 Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Wed, 2 Sep 2020 17:59:07 -0500 Subject: [PATCH 05/15] Revert "updating `augur util kill` cli_kill_processes and kill_processes to send SIGKILL after waiting 15 seconds and checking what is still running again." This reverts commit 7826039c8e9139dd5778d0c71dd4f026fe5e08a2. --- augur/cli/util.py | 42 ++---------------------------------------- 1 file changed, 2 insertions(+), 40 deletions(-) diff --git a/augur/cli/util.py b/augur/cli/util.py index da75063dc8..7d68843e91 100644 --- a/augur/cli/util.py +++ b/augur/cli/util.py @@ -7,7 +7,6 @@ import signal import logging from subprocess import call, run -import time import psutil import click @@ -52,29 +51,10 @@ def cli_kill_processes(): processes = get_augur_processes() if processes != []: for process in processes: - if process.pid != os.getpid(): - logger.info(f"Terminating process {process.pid}") - try: - process.send_signal(signal.SIGTERM) - logger.info(f"sending SIGTERM Signal to {process.pid}") - except psutil.NoSuchProcess as e: - pass - - logger.info(f"Waiting to check if processes terminated.") - - time.sleep(15) - logger.info(f"Checking on process termination.") - - processes = get_augur_processes() - - if processes != []: - for process in processes: - if process.pid != os.getpid(): logger.info(f"Killing process {process.pid}") try: - process.send_signal(signal.SIGKILL) - logger.info(f"sending SIGKILL Signal to {process.pid}") + process.send_signal(signal.SIGTERM) except psutil.NoSuchProcess as e: pass @@ -84,29 +64,11 @@ def kill_processes(): if processes != []: for process in processes: if process.pid != os.getpid(): - logger.info(f"Terminating process {process.pid}") + logger.info(f"Killing process {process.pid}") try: process.send_signal(signal.SIGTERM) - logger.info(f"sending SIGTERM Signal to {process.pid}") except psutil.NoSuchProcess as e: logger.warning(e) - logger.info(f"Waiting to check if processes terminated.") - - time.sleep(15) - logger.info(f"Checking on process termination.") - - processes = get_augur_processes() - - if processes != []: - for process in processes: - if process.pid != os.getpid(): - logger.info(f"Killing process {process.pid}") - logger.info(f"Killing process {process.pid}") - try: - process.send_signal(signal.SIGKILL) - logger.info(f"sending SIGKILL Signal to {process.pid}") - except psutil.NoSuchProcess as e: - pass @cli.command('list',) @initialize_logging From b06fcfef74b96d2e1b00bca0dc34e93bf39b5599 Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Wed, 2 Sep 2020 18:15:48 -0500 Subject: [PATCH 06/15] Add augur util stop and enforce DRY Signed-off-by: Carter Landis --- augur/cli/run.py | 4 +- augur/cli/util.py | 37 +++++++++++----- .../command-line-interface/util.rst | 44 +++++++++---------- 3 files changed, 50 insertions(+), 35 deletions(-) diff --git a/augur/cli/run.py b/augur/cli/run.py index 3352089a5f..36e6fb16b9 100644 --- a/augur/cli/run.py +++ b/augur/cli/run.py @@ -11,7 +11,7 @@ from augur.housekeeper import Housekeeper from augur.server import Server -from augur.cli.util import kill_processes +from augur.cli.util import stop_processes from augur.application import Application logger = logging.getLogger("augur") @@ -27,7 +27,7 @@ def cli(disable_housekeeper, skip_cleanup): logger.info("Augur application initialized") if not skip_cleanup: logger.debug("Cleaning up old Augur processes...") - kill_processes() + stop_processes() time.sleep(2) else: logger.debug("Skipping process cleanup") diff --git a/augur/cli/util.py b/augur/cli/util.py index 7d68843e91..fce169a848 100644 --- a/augur/cli/util.py +++ b/augur/cli/util.py @@ -42,39 +42,54 @@ def export_env(config): export_file.close() env_file.close() -@cli.command('kill') -@initialize_logging -def cli_kill_processes(): - """ - Terminates all currently running backend Augur processes, including any workers. Will only work in a virtual environment. - """ +def _stop_processes_handler(attach_handler=False): + if attach_handler is True: + _logger = logging.getLogger("augur") + else: + _logger = logger processes = get_augur_processes() if processes != []: for process in processes: if process.pid != os.getpid(): - logger.info(f"Killing process {process.pid}") + logger.info(f"Stopping process {process.pid}") try: process.send_signal(signal.SIGTERM) except psutil.NoSuchProcess as e: pass +@cli.command('stop') +@initialize_logging +def cli_stop_processes(): + """ + Terminates all currently running backend Augur processes, including any workers. Will only work in a virtual environment. + """ + _stop_processes_handler() + + +def stop_processes(): + _stop_processes_handler(attach_handler=True) + +@cli.command('kill') +@initialize_logging def kill_processes(): - logger = logging.getLogger("augur") + """ + Terminates all currently running backend Augur processes, including any workers. Will only work in a virtual environment. + """ processes = get_augur_processes() if processes != []: for process in processes: if process.pid != os.getpid(): logger.info(f"Killing process {process.pid}") try: - process.send_signal(signal.SIGTERM) + process.send_signal(signal.SIGKILL) except psutil.NoSuchProcess as e: - logger.warning(e) + pass @cli.command('list',) @initialize_logging def list_processes(): """ - Outputs the name and process ID (PID) of all currently running backend Augur processes, including any workers. Will only work in a virtual environment. + Outputs the name and process ID (PID) of all currently running backend Augur processes, including any workers. Will only work in a virtual environment. """ processes = get_augur_processes() for process in processes: diff --git a/docs/source/getting-started/command-line-interface/util.rst b/docs/source/getting-started/command-line-interface/util.rst index aac7b2d17b..d9d4920499 100644 --- a/docs/source/getting-started/command-line-interface/util.rst +++ b/docs/source/getting-started/command-line-interface/util.rst @@ -58,28 +58,28 @@ Example usage:: $ augur util kill # successful output looks like: - > CLI: [util.cli_kill_processes] [INFO] Killing process 33607 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33775 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33776 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33777 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33778 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33780 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33781 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33782 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33783 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33784 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33785 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33786 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33787 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33788 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33789 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33790 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33792 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33793 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33794 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33795 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33798 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33962 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33607 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33775 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33776 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33777 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33778 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33780 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33781 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33782 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33783 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33784 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33785 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33786 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33787 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33788 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33789 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33790 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33792 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33793 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33794 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33795 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33798 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33962 ``list`` --------- From 379f31d5e7f58e42f3013791728b0b2892e495be Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Wed, 2 Sep 2020 18:28:28 -0500 Subject: [PATCH 07/15] Update augur util kill usage Signed-off-by: Carter Landis --- Makefile | 14 +++++------ .../command-line-interface/util.rst | 23 ++++++++++++++++--- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index d6827f45d1..95fd488546 100644 --- a/Makefile +++ b/Makefile @@ -30,8 +30,8 @@ default: # # Installation # -.PHONY: install install-dev -.PHONY: install-spdx install-spdx-sudo install-augur-sbom +.PHONY: install install-dev +.PHONY: install-spdx install-spdx-sudo install-augur-sbom .PHONY: clean rebuild install: @ ./scripts/install/install.sh prod @@ -63,12 +63,12 @@ rebuild-dev: # .PHONY: dev-start dev-stop dev monitor-frontend monitor-backend monitor frontend backend-stop backend-start backend-restart backend clean rebuild -dev-start: +dev-start: @ scripts/control/start_augur.sh @ scripts/control/start_frontend.sh -dev-stop: - @ augur util kill +dev-stop: + @ augur util stop @ scripts/control/kill_frontend.sh dev: dev-stop dev-start @@ -100,7 +100,7 @@ test-python-versions: # # Documentation # -.PHONY: docs docs-view +.PHONY: docs docs-view docs: @ bash -c 'cd docs/ && rm -rf build/ && make html;' @@ -112,7 +112,7 @@ docs-view: docs # Docker Shortcuts # Do not use these unless you know what they mean. .PHONY: compose-run compose-run-database -.PHONY: build-backend run-backend build-frontend run-frontend build-database run-database +.PHONY: build-backend run-backend build-frontend run-frontend build-database run-database compose-run: diff --git a/docs/source/getting-started/command-line-interface/util.rst b/docs/source/getting-started/command-line-interface/util.rst index d9d4920499..0ed98a6bf6 100644 --- a/docs/source/getting-started/command-line-interface/util.rst +++ b/docs/source/getting-started/command-line-interface/util.rst @@ -48,14 +48,14 @@ Example usage:: AUGUR_DB_USER="your_db_user" AUGUR_DB_PASSWORD="your_db_password" -``kill`` +``stop`` --------- -Terminates all currently running backend Augur processes, including any workers. Will only work in a virtual environment. +Gracefully terminates all currently running backend Augur processes, including any workers. Will only work in a virtual environment. Example usage:: # to stop the server and workers - $ augur util kill + $ augur util stop # successful output looks like: > CLI: [util.cli_stop_processes] [INFO] Killing process 33607 @@ -81,6 +81,23 @@ Example usage:: > CLI: [util.cli_stop_processes] [INFO] Killing process 33798 > CLI: [util.cli_stop_processes] [INFO] Killing process 33962 +``kill`` +--------- +Forcefully terminates all currently running backend Augur processes, including any workers, by using ``SIGKILL``. Will only work in a virtual environment. + +Example usage:: + + # to stop the server and workers + $ augur util kill + + # successful output looks like: + > CLI: [util.kill_processes] [INFO] Killing process 87340 + > CLI: [util.kill_processes] [INFO] Killing process 87573 + > CLI: [util.kill_processes] [INFO] Killing process 87574 + > CLI: [util.kill_processes] [INFO] Killing process 87575 + > CLI: [util.kill_processes] [INFO] Killing process 87576 + + ``list`` --------- Outputs the process ID (PID) of all currently running backend Augur processes, including any workers. Will only work in a virtual environment. From fde415aa427c2eef34753328b4c6597c9ebf9047 Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Fri, 11 Sep 2020 14:40:05 -0500 Subject: [PATCH 08/15] Update default config file location Currently, on all branches, using `augur configure generate` (the augur lib command to create a config file) behaves in an undesirable way if installed using `make install`. The issue is that after running `augur configure generate`, if you used `make install` to install Augur, your config file is not written to root of your cloned source code directory. It is instead written to the `site-packages/augur/` folder that lives with your Python binary inside your virtualenv folder. The reason that it writes to this location is due to how I implemented the config generation script. The script uses a package-level variable called `ROOT_AUGUR_DIRECTORY` to determine the location of the root of the file currently that is currently running; when installed in non-editable mode (without using the `-e` flag, like in `make install`), `pip` will copy ONLY the files defined in your `MANIFEST.in` from your cloned source code directory (if that's what you're installing) into that `site-packages` folder; **these are the files that are "running" when using augur installed in non-editable mode** - that is, without `-e`. However, when installing the package in editable mode with `-e` (like `pip install -e .` in `make install-dev`), the files are **not** copied into that folder. Instead, some stuff happens behind the scenes, and `pip` links the contents of your local cloned directory to the `site-packages` folder; this is what allows you to make changes, save, and then run the commands again. That means when we use `install-dev`, **these are the files that are running when we execute augur commands. This is why the augur.config.json file is always installed to your cloned source code directory; that's the "root" of the currently running Augur binary. There's a very important caveat to using `make install`, however; _it has no way of knowing where you installed the binary from_. This means it has _no way of installing the config file to your cloned source code directory_. In fact, it's not even guranteed that the source is on your machine; all the other packages you install with `pip` like `pandas` only have the files in their `MANIFEST.in` copied as well. If we want the config file to only ever be in one location, then this is problem; it can't be in the source code directory like it has been. We need to pick a standard location on the file system to store the config file outside the cloned source code tree. To this end, this patch changes the default config file location `$HOME/.augur/augur.config.json`. However, if it detects a config file in the root Augur directory, then it will use that instead. Signed-off-by: Carter Landis --- augur/cli/configure.py | 16 +++++++++---- augur/cli/run.py | 2 +- augur/config.py | 23 ++++++++++++------- augur/logging.py | 4 +--- .../source/development-guide/installation.rst | 13 ++++++----- .../getting-started/collecting-data.rst | 16 ++++++------- scripts/install/config.sh | 12 +++++++++- scripts/install/install.sh | 4 ++-- 8 files changed, 56 insertions(+), 34 deletions(-) diff --git a/augur/cli/configure.py b/augur/cli/configure.py index 4327bc3eb3..4ac036fbe0 100644 --- a/augur/cli/configure.py +++ b/augur/cli/configure.py @@ -7,15 +7,15 @@ import click import json import logging +from pathlib import Path -from augur.config import default_config, ENVVAR_PREFIX +from augur.config import default_config, ENVVAR_PREFIX, CONFIG_HOME from augur.cli import initialize_logging from augur.logging import ROOT_AUGUR_DIRECTORY logger = logging.getLogger(__name__) ENVVAR_PREFIX = "AUGUR_" - @click.group('configure', short_help='Generate an augur.config.json') def cli(): pass @@ -30,14 +30,16 @@ def cli(): @click.option('--facade_repo_directory', help="Directory on the database server where Facade should clone repos", envvar=ENVVAR_PREFIX + 'FACADE_REPO_DIRECTORY') @click.option('--rc-config-file', help="File containing existing config whose values will be used as the defaults", type=click.Path(exists=True)) @click.option('--gitlab_api_key', help="GitLab API key for data collection from the GitLab API", envvar=ENVVAR_PREFIX + 'GITLAB_API_KEY') +@click.option('--write-to-src', is_flag=True, help="Write generated config file to the source code tree instead of default (for development use only)") @initialize_logging -def generate(db_name, db_host, db_user, db_port, db_password, github_api_key, facade_repo_directory, rc_config_file, gitlab_api_key): +def generate(db_name, db_host, db_user, db_port, db_password, github_api_key, facade_repo_directory, rc_config_file, gitlab_api_key, write_to_src=False): """ Generate an augur.config.json """ config = default_config rc_config = None + Path(CONFIG_HOME).mkdir(exist_ok=True) if rc_config_file != None: try: @@ -82,9 +84,13 @@ def generate(db_name, db_host, db_user, db_port, db_password, github_api_key, fa if facade_repo_directory is not None: config['Workers']['facade_worker']['repo_directory'] = facade_repo_directory + config_path = CONFIG_HOME + '/augur.config.json' + if write_to_src is True: + config_path = ROOT_AUGUR_DIRECTORY + '/augur.config.json' + try: - with open(os.path.abspath(ROOT_AUGUR_DIRECTORY + '/augur.config.json'), 'w') as f: + with open(os.path.abspath(config_path), 'w') as f: json.dump(config, f, indent=4) - logger.info('augur.config.json successfully created') + logger.info('Config written to ' + config_path) except Exception as e: logger.error("Error writing augur.config.json " + str(e)) diff --git a/augur/cli/run.py b/augur/cli/run.py index 36e6fb16b9..1ad27c5592 100644 --- a/augur/cli/run.py +++ b/augur/cli/run.py @@ -25,6 +25,7 @@ def cli(disable_housekeeper, skip_cleanup): """ augur_app = Application() logger.info("Augur application initialized") + logger.info(f"Using config file: {augur_app.config.config_file_location}") if not skip_cleanup: logger.debug("Cleaning up old Augur processes...") stop_processes() @@ -98,7 +99,6 @@ def exit(augur_app, worker_processes, master): if master is not None: logger.debug("Shutting down Gunicorn server") master.halt() - master = None logger.info("Shutdown complete") sys.exit(0) diff --git a/augur/config.py b/augur/config.py index b282acd3c4..2cb1b64cdf 100644 --- a/augur/config.py +++ b/augur/config.py @@ -2,10 +2,16 @@ import json import logging +from augur.logging import ROOT_AUGUR_DIRECTORY + ENVVAR_PREFIX = "AUGUR_" +CONFIG_HOME = f"{os.getenv('HOME', '~')}/.augur" default_config = { "version": 1, + "Augur": { + "developer": 0 + }, "Database": { "name": "augur", "host": "localhost", @@ -65,7 +71,7 @@ ], "model": "contributors", "repo_group_id": 0 - }, + }, { "delay": 1000000, "given": [ @@ -122,7 +128,7 @@ }, "insight_worker": { "port": 50300, - "metrics": {"issues-new": "issues", "code-changes": "commit_count", "code-changes-lines": "added", + "metrics": {"issues-new": "issues", "code-changes": "commit_count", "code-changes-lines": "added", "reviews": "pull_requests", "contributors-new": "new_contributors"}, "confidence_interval": 95, "contamination": 0.041, @@ -223,6 +229,7 @@ def __init__(self, root_augur_dir, given_config={}): self._root_augur_dir = root_augur_dir self._default_config = default_config self._env_config = {} + self.config_file_location = None self.load_config() self.version = self.get_version() self._config.update(given_config) @@ -268,9 +275,9 @@ def load_config(self): logger.debug("Attempting to load config file") try: - config_file_path = self.discover_config_file() + self.discover_config_file() try: - with open(config_file_path, 'r+') as config_file_handle: + with open(self.config_file_location, 'r+') as config_file_handle: self._config = json.loads(config_file_handle.read()) logger.debug("Config file loaded successfully") except json.decoder.JSONDecodeError as e: @@ -285,10 +292,10 @@ def load_config(self): self.load_env_configuration() def discover_config_file(self): - default_config_path = self._root_augur_dir + '/' + self._default_config_file_name + developer_config_location = ROOT_AUGUR_DIRECTORY + "/" + self._default_config_file_name config_file_path = None - config_locations = [self._default_config_file_name, default_config_path + config_locations = [developer_config_location, CONFIG_HOME + "/" + self._default_config_file_name , f"/opt/augur/{self._default_config_file_name}"] if os.getenv('AUGUR_CONFIG_FILE', None) is not None: config_file_path = os.getenv('AUGUR_CONFIG_FILE') @@ -302,9 +309,9 @@ def discover_config_file(self): except FileNotFoundError: pass if config_file_path: - return config_file_path + self.config_file_location = config_file_path else: - raise(AugurConfigFileNotFoundException(message=f"{self._default_config_file_name} not found", errors=None)) + raise(AugurConfigFileNotFoundException(message="Config file was not found", errors=None)) def load_env_configuration(self): self.set_env_value(section='Database', name='key', environment_variable='AUGUR_GITHUB_API_KEY') diff --git a/augur/logging.py b/augur/logging.py index f41aaf2617..97c3b336c1 100644 --- a/augur/logging.py +++ b/augur/logging.py @@ -3,7 +3,7 @@ import logging.handlers from logging import FileHandler, StreamHandler, Formatter from multiprocessing import Process, Queue, Event, current_process -from time import sleep +from time import sleep import os from pathlib import Path import atexit @@ -98,7 +98,6 @@ def __init__(self, disable_logs=False, reset_logfiles=True): if disable_logs: self._disable_all_logging() - def _disable_all_logging(self): for logger in ["augur", "augur.application", "augur.housekeeper", "augur.config", "augur.cli", "root"]: lg = logging.getLogger(logger) @@ -142,7 +141,6 @@ def configure_logging(self, augur_config): self._configure_logfiles() self._configure_cli_logger() self._configure_gunicorn_logging() - logger.debug("Loggers are fully configured") def _configure_logfiles(self): self.logfile_config = { diff --git a/docs/source/development-guide/installation.rst b/docs/source/development-guide/installation.rst index 5f2a07ac63..eb277aebf1 100644 --- a/docs/source/development-guide/installation.rst +++ b/docs/source/development-guide/installation.rst @@ -14,10 +14,10 @@ However, during the course of development, you might find that you need to reset More information about Augur's Docker images can be found `here <../docker/docker.html>`_. If you're new to our Docker process, we recommend following the `introduction section <../docker/toc.html>`_ first. -Installing the source code +Installing from source ---------------------------- -The process for installing Augur's source code for development is essentially the same as detailed in the `installation <../getting-started/installation.html>`_ section of the Getting Started guide. +The process for installing Augur's source code for development is essentially the same as detailed in the `installation <../getting-started/installation.html>`_ section of the Getting Started guide. **However**, when running the installation script, use the following command instead: @@ -25,15 +25,16 @@ The process for installing Augur's source code for development is essentially th $ make install-dev -This will install a few extra dependencies for testing and documentation, as well as install all the Python packages in `edit mode `_. -This essentially means you will not have to run ``pip install .`` everytime you make a change to the backend. +This will install a few extra dependencies for testing and documentation, as well as install all the Python packages in `editable mode `_. This means you will not have to reinstall the package everytime you make a change to the Python source code. + +This command will also create your ``augur.config.json`` file in the root of your cloned source code directory **instead of** the default location in ``$HOME/.augur/``. This is purely for convenience sake, as it will allow you to open this file in your text editor with all the other source code files, and also allows you to have multiple developer installations of Augur on the same machine if needed. If Augur finds a config file in both the root of the cloned directory AND in the default location, it will always use the one in the root of the cloned directory. .. note:: - You can still use ``make clean`` to get rid of the installed binaries if something we wrong and you want to try again. + You can still use ``make clean`` to get rid of the installed binaries if something went wrong and you want to try again. Conclusion ----------- -Like I said, it's pretty similar. For (optional) further reading, the `Makefile `_ documentation and the `Creating a Metric guide `_ are good places to start. +All in all, it's pretty similar. For further reading, the `Makefile `_ documentation and the `Creating a Metric guide `_ are good places to start. Happy hacking! \ No newline at end of file diff --git a/docs/source/getting-started/collecting-data.rst b/docs/source/getting-started/collecting-data.rst index 8e91f29aab..ce74664199 100644 --- a/docs/source/getting-started/collecting-data.rst +++ b/docs/source/getting-started/collecting-data.rst @@ -21,7 +21,7 @@ There are a few workers that ship ready to collect out of the box: - ``linux_badge_worker`` (collects `CII badging `_ data from the CII API) - ``insight_worker`` (queries Augur's metrics API to find interesting anomalies in the collected data) -All worker configuration options are found in the ``Workers`` block of the ``augur.config.json`` file (which you generated at the end of the previous section) with each worker having its own subsection with same title as the the worker's name. +All worker configuration options are found in the ``Workers`` block of the ``augur.config.json`` file (which was generated for you at the end of the previous section). This file is located at ``$HOME/.augur/augur.config.json``. Each worker has its own subsection with same title as the the worker's name. A full configuration file reference can be found on the next page, but we recommend leaving the defaults and only changing them when necessary; read on for more on how to make sure your workers are properly configured. @@ -51,7 +51,7 @@ Next up are the configuration options specific to each worker (but some workers ``insight_worker`` :::::::::::::::::: -We recommend leaving the defaults in place for the insight worker unless you interested in other metrics, or anomalies for a different time period. +We recommend leaving the defaults in place for the insight worker unless you interested in other metrics, or anomalies for a different time period. - ``training_days``, which specifies the date range that the ``insight_worker`` should use as its baseline for the statistical comparison. Defaults to ``365``, meaning that the worker will identify metrics that have had anomalies compared to their values over the course of the past year, starting at the current date. @@ -66,15 +66,15 @@ We recommend leaving the defaults in place for the insight worker unless you int 'endpoint_name_1': 'field_2_of_endpoint', 'endpoint_name_2': 'field_1_of_endpoint', ... - } + } # defaults to the following { - "issues-new": "issues", - "code-changes": "commit_count", - "code-changes-lines": "added", - "reviews": "pull_requests", + "issues-new": "issues", + "code-changes": "commit_count", + "code-changes-lines": "added", + "reviews": "pull_requests", "contributors-new": "new_contributors" } @@ -114,7 +114,7 @@ If you're using the Docker container, you can use the `provided UI <../docker/us Running collections -------------------- -Congratuations! At this point you (hopefully) have a fully functioning and configured Augur instance. +Congratuations! At this point you (hopefully) have a fully functioning and configured Augur instance. After you've loaded your repos, you're ready for your first collection run. We recommend running only the default workers first to gather the initial data. If you're collecting data for a lot of repositories, or repositories with a lot of data, we recommend increasing the number of ``github_workers`` and ``pull_request_workers``. diff --git a/scripts/install/config.sh b/scripts/install/config.sh index c635ff4b41..15525d2ee3 100755 --- a/scripts/install/config.sh +++ b/scripts/install/config.sh @@ -4,6 +4,8 @@ PS3=" Please type the number corresponding to your selection and then press the Enter/Return key. Your choice: " +target=$1 + function get_api_key_and_repo_path() { echo echo "Please provide a valid GitHub API key." @@ -91,7 +93,14 @@ function save_credentials() { echo "**********************************" echo - augur configure generate --db_name $db_name --db_host $host --db_port $port --db_user $db_user --db_password $password --github_api_key $github_api_key --gitlab_api_key $gitlab_api_key --facade_repo_directory $facade_repo_directory + cmd=( augur configure generate --db_name $db_name --db_host $host --db_port $port --db_user $db_user --db_password $password --github_api_key $github_api_key --gitlab_api_key $gitlab_api_key --facade_repo_directory $facade_repo_directory ) + + if [[ $target == *"dev"* ]]; then + cmd+=( --write-to-src ) + fi + + "${cmd[@]}" + augur db check-pgpass } @@ -106,6 +115,7 @@ function create_db_schema() { echo } + echo echo "**********************************" echo "Setting up database credentials..." diff --git a/scripts/install/install.sh b/scripts/install/install.sh index 77ad991e9f..6a33b2ec07 100755 --- a/scripts/install/install.sh +++ b/scripts/install/install.sh @@ -33,9 +33,9 @@ if [[ ! -e augur.config.json ]]; then else read -r -p "We noticed you have a config file already. Would you like to overwrite it with a new one? [Y/n] " response case "$response" in - [yY][eE][sS]|[yY]) + [yY][eE][sS]|[yY]) echo "Generating a config file..." - scripts/install/config.sh + scripts/install/config.sh $target echo ;; *) From e5ccd2a5e4a11a567720e481f9d5a8c70d331a8f Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Fri, 11 Sep 2020 14:46:00 -0500 Subject: [PATCH 09/15] Update version to 0.13.1 Signed-off-by: Carter Landis --- metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata.py b/metadata.py index 0b47499f56..7ef300e746 100644 --- a/metadata.py +++ b/metadata.py @@ -4,8 +4,8 @@ __short_description__ = "Python 3 package for free/libre and open-source software community metrics & data collection" -__version__ = "0.13.0" -__release__ = "v0.13.0" +__version__ = "0.13.1" +__release__ = "v0.13.1" __license__ = "MIT" __copyright__ = "CHAOSS & Augurlabs 2020" From 1260ae1fe5727dbcd79c5efc736c291ff3536ddd Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Fri, 11 Sep 2020 15:38:52 -0500 Subject: [PATCH 10/15] Update installation logging for clarity This patch exposes the logs for the dependency installation process, including all errors. It also moves all logs from logs/install to just logs/. Signed-off-by: Carter Landis --- docs/source/getting-started/installation.rst | 8 ++++---- scripts/install/api_key.sh | 4 +--- scripts/install/backend.sh | 1 - scripts/install/checks.sh | 7 +------ scripts/install/frontend.sh | 9 ++------- scripts/install/install.sh | 15 +++++---------- 6 files changed, 13 insertions(+), 31 deletions(-) diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst index e36d4b3037..fc69caacfd 100644 --- a/docs/source/getting-started/installation.rst +++ b/docs/source/getting-started/installation.rst @@ -23,14 +23,14 @@ Backend Required: - `GitHub Access Token `__ (``repo`` and all ``read`` scopes except ``enterprise``) -- `GitLab Access Token `__ +- `GitLab Access Token `__ - `Python 3.6 or later `__ Our REST API & data collection workers are written in Python 3.6. We query the GitHub & GitLab API to collect data about issues, pull requests, contributors, and other information about a repository, so GitLab and GitHub access tokens are **required** for data collection. Optional: -- `Go 1.12 or later `__ +- `Go 1.12 or later `__ The ``value_worker`` uses a Go package called `scc `_ to run COCOMO calculations. Once you've installed Go, follow the appropriate steps for your system to install the ``scc`` package. @@ -63,7 +63,7 @@ after which you'll move on to the next section to configure the workers. $ git clone https://github.com/chaoss/augur.git $ cd augur/ -1. Create a virtual environment in a directory of your choosing. Be sure to use the correct ``python`` command for +1. Create a virtual environment in a directory of your choosing. Be sure to use the correct ``python`` command for your installation of Python 3: on most systems, this is ``python3``, but yours may differ (you can use ``python -V`` or ``python3 -V`` to check). .. code-block:: bash @@ -92,7 +92,7 @@ your installation of Python 3: on most systems, this is ``python3``, but yours m $ make install -If you think something went wrong, check the log files under ``logs/install/``. If you want to try again, you can use ``make clean`` to delete any build files before running ``make install`` again. +If you think something went wrong, check the log files in ``logs/``. If you want to try again, you can use ``make clean`` to delete any build files before running ``make install`` again. .. note:: diff --git a/scripts/install/api_key.sh b/scripts/install/api_key.sh index a9fc868830..4c767b5806 100755 --- a/scripts/install/api_key.sh +++ b/scripts/install/api_key.sh @@ -20,13 +20,11 @@ echo if [[ $existing_api_key != *"invalid_key"* ]]; then read -r -p "We noticed you have an Augur API key already. Would you like to overwrite it with a new one? [Y/n] " response case "$response" in - [yY][eE][sS]|[yY]) + [yY][eE][sS]|[yY]) echo get_api_key ;; *) - echo "Skipping API key generation process and resuming installation..." - echo ;; esac else diff --git a/scripts/install/backend.sh b/scripts/install/backend.sh index 7700a0e545..4d0e12a235 100755 --- a/scripts/install/backend.sh +++ b/scripts/install/backend.sh @@ -12,4 +12,3 @@ if [[ $target == *"prod"* ]]; then else pip install -e .[dev] fi - diff --git a/scripts/install/checks.sh b/scripts/install/checks.sh index 43f09933c7..942bcf46b2 100755 --- a/scripts/install/checks.sh +++ b/scripts/install/checks.sh @@ -4,7 +4,7 @@ if [[ -z "$VIRTUAL_ENV" ]]; then echo "*** We noticed you're not currently inside a virtual environment. Augur MUST be run inside a virtual environment. ***" read -r -p "*** Would you like us to generate a environment for you automatically? If you select no, you must create it yourself. [Y/n] " response case "$response" in - [yY][eE][sS]|[yY]) + [yY][eE][sS]|[yY]) echo $augur_python_command -m venv $HOME/.virtualenvs/augur_env echo "*** Your environment was installed to $HOME/.virtualenvs/augur_env/. Please activate your environment using your shell's appropriate command. ***" @@ -57,9 +57,4 @@ fi if [[ ! -d logs ]]; then mkdir logs - mkdir logs/install -fi - -if [[ ! -d logs/install ]]; then - mkdir logs/install fi diff --git a/scripts/install/frontend.sh b/scripts/install/frontend.sh index 3b341a7b6b..28660fcac5 100755 --- a/scripts/install/frontend.sh +++ b/scripts/install/frontend.sh @@ -22,14 +22,9 @@ function install_deps() { read -r -p "Would you like to install Augur's frontend dependencies? [Y/n] " response case "$response" in - [yY][eE][sS]|[yY]) + [yY][eE][sS]|[yY]) echo "Installing..." - - if [[ ! -d logs/install ]]; then - mkdir logs/install - fi - - install_deps > logs/install/frontend.log 2>&1 + install_deps > logs/frontend-install.log echo "Done!" ;; *) diff --git a/scripts/install/install.sh b/scripts/install/install.sh index 6a33b2ec07..f3113d3980 100755 --- a/scripts/install/install.sh +++ b/scripts/install/install.sh @@ -18,29 +18,24 @@ else echo fi -echo "Installing the backend and its dependencies..." -scripts/install/backend.sh $target > logs/install/backend.log 2>&1 +scripts/install/backend.sh $target 2>&1 | tee logs/backend-install.log echo "Done!" -echo "Installing workers and their dependencies..." -scripts/install/workers.sh $target > logs/install/workers.log 2>&1 +scripts/install/workers.sh $target 2>&1 | tee logs/workers-install.log echo "Done!" -if [[ ! -e augur.config.json ]]; then +if [[ ! -e augur.config.json && ! -e $HOME/.augur/augur.config.json ]]; then echo "No config file found. Generating..." - scripts/install/config.sh - echo + scripts/install/config.sh $target else + echo "file found" read -r -p "We noticed you have a config file already. Would you like to overwrite it with a new one? [Y/n] " response case "$response" in [yY][eE][sS]|[yY]) echo "Generating a config file..." scripts/install/config.sh $target - echo ;; *) - echo "Skipping config generation process and resuming installation..." - echo ;; esac fi From 35e1815c8e6cd22d3c319e99d01bd2321dc91723 Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Fri, 11 Sep 2020 16:13:52 -0500 Subject: [PATCH 11/15] Hide password during config setup process Signed-off-by: Carter Landis --- scripts/install/config.sh | 3 ++- scripts/install/install.sh | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/install/config.sh b/scripts/install/config.sh index 15525d2ee3..da70221ad9 100755 --- a/scripts/install/config.sh +++ b/scripts/install/config.sh @@ -72,7 +72,8 @@ function set_db_credentials() { read -p "Database: " db_name read -p "User: " db_user - read -p "Password: " password + read -s -p "Password: " password + echo if [[ $install_locally == 'false' ]]; then read -p "Host: " host diff --git a/scripts/install/install.sh b/scripts/install/install.sh index f3113d3980..06f9648ff7 100755 --- a/scripts/install/install.sh +++ b/scripts/install/install.sh @@ -28,7 +28,6 @@ if [[ ! -e augur.config.json && ! -e $HOME/.augur/augur.config.json ]]; then echo "No config file found. Generating..." scripts/install/config.sh $target else - echo "file found" read -r -p "We noticed you have a config file already. Would you like to overwrite it with a new one? [Y/n] " response case "$response" in [yY][eE][sS]|[yY]) From 48eeb2be5141afbb797440922ebeb6a8f6657453 Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Fri, 11 Sep 2020 16:19:38 -0500 Subject: [PATCH 12/15] Add sudo warning to installation docs Signed-off-by: Carter Landis --- docs/source/getting-started/installation.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst index fc69caacfd..df782a2047 100644 --- a/docs/source/getting-started/installation.rst +++ b/docs/source/getting-started/installation.rst @@ -12,7 +12,7 @@ If you're running Augur on macOS, we strongly suggest adding the following line macOS takes "helpful" measures to prevent Python subprocesses (which Augur uses) from forking cleanly, and setting this environment variable disables these safety measures to restore normal Python functionality. .. warning:: - If you skip this step, you'll likely see all housekeeer jobs randomly exiting for no reason, and the Gunicorn server will not behave nicely either. Don't say we didn't warn you! + If you skip this step, you'll likely see all housekeeer jobs randomly exiting for no reason, and the Gunicorn server will not behave nicely either. Skip this step at your peril! Dependencies @@ -56,6 +56,9 @@ after which you'll move on to the next section to configure the workers. .. note:: Lines that start with a ``$`` denote a command to be run in an interactive terminal. +.. warning:: + Do **NOT** install or run Augur using ``sudo``. It is not required, and using it will inevitably cause some permissions trouble. Don't say we didn't warn you! + 0. Clone the repository and change to the newly created directory. .. code-block:: bash From 4beb303d528821c5aa01dc38b0f74f2c080a66de Mon Sep 17 00:00:00 2001 From: "Dawn M. Foster" Date: Fri, 18 Sep 2020 13:29:41 +0100 Subject: [PATCH 13/15] fixed a few links and avoided using the word 'here' as a link Signed-off-by: Dawn M. Foster --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 19bb96ba01..3362b9593c 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ master | [![Build Status](https://travis-ci.org/chaoss/augur.svg?branch=master)] ## What is Augur? Augur is a tool for collecting and measuring structured data -about free (https://www.fsf.org/about/) and [open source](https://opensource.org/docs/osd) (FOSS) communities. +about [free](https://www.fsf.org/about/) and [open source](https://opensource.org/docs/osd) (FOSS) communities. We gather trace data for a group of repositories, normalize it into our data model, and provide a variety of metrics about said @@ -20,8 +20,7 @@ questions about the way these communities evolve. We are a [CHAOSS](https://chaoss.community>) project, and many of our metrics are implementations of the metrics defined by our awesome community. You -can find more information about how to get involved -(here)[https://chaoss.community/participate/]. +can find more information about [how to get involved on the CHAOSS website](https://chaoss.community/participate/). ## Collecting Data @@ -37,7 +36,7 @@ This data is collected by dedicated data collection workers controlled by Augur, ## Getting Started -If you're interested in collecting data with our tool, the Augur team has worked hard to develop a detailed guide to getting started with our project, which can be found [here](https://oss-augur.readthedocs.io/en/master/getting-started/toc.html) alongside our main documentation. +If you're interested in collecting data with our tool, the Augur team has worked hard to develop a detailed guide to getting started with our project, which can be found [in our documentation](https://oss-augur.readthedocs.io/en/master/getting-started/toc.html). If you're looking to contribute to Augur's code, you can find installation instructions, development guides, architecture references (coming soon), best practices and more in our [developer documentation](https://oss-augur.readthedocs.io/en/master/development-guide/toc.html). From 19beec81b192289aab85767caea791c8ec246229 Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Mon, 21 Sep 2020 14:19:09 -0500 Subject: [PATCH 14/15] Add ability to create repo groupgs from GH orgs on the CLI Signed-off-by: Carter Landis --- augur/cli/db.py | 48 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/augur/cli/db.py b/augur/cli/db.py index 6f1afb3e87..edafdd16c0 100644 --- a/augur/cli/db.py +++ b/augur/cli/db.py @@ -11,6 +11,7 @@ import click import sqlalchemy as s import pandas as pd +import requests from sqlalchemy import exc from augur.cli import pass_config, pass_application @@ -32,8 +33,8 @@ def add_repos(augur_app, filename): repo_group_IDs = [group[0] for group in df.fetchall()] insertSQL = s.sql.text(""" - INSERT INTO augur_data.repo(repo_group_id, repo_git, repo_status, - tool_source, tool_version, data_source, data_collection_date) + INSERT INTO augur_data.repo(repo_group_id, repo_git, repo_status, + tool_source, tool_version, data_source, data_collection_date) VALUES (:repo_group_id, :repo_git, 'New', 'CLI', 1.0, 'Git', CURRENT_TIMESTAMP) """) @@ -81,6 +82,47 @@ def add_repo_groups(augur_app, filename): else: logger.info(f"Repo group with ID {row[1]} for repo group {row[1]} already exists, skipping...") +@cli.command('add-github-org') +@click.argument('organization_name') +@pass_application +def add_github_org(augur_app, organization_name): + """ + Create new repo groups in Augur's database + """ + org_query_response = requests.get(f"https://api.github.com/orgs/{organization_name}").json() + if "login" in org_query_response: + logger.info(f"Organization \"{organization_name}\" found") + else: + logger.fatal(f"No organization with name {organization_name} could be found") + exit(1) + + page = 1 + headers = {'Authorization': 'token %s' % augur_app.config.get_value("Database", "key")} + all_repos = [] + repo_query_response = requests.get(org_query_response['repos_url'] + f"?per_page=100&page={page}", headers=headers).json() + while repo_query_response != []: + for repo in repo_query_response: + all_repos.append(repo) + + page+=1 + repo_query_response = requests.get(org_query_response['repos_url'] + f"?per_page=100&page={page}", headers=headers).json() + + insert_repo_group_sql = s.sql.text(""" + INSERT INTO "augur_data"."repo_groups"("rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP) RETURNING repo_group_id; + """) + new_repo_group_id = augur_app.database.execute(insert_repo_group_sql, repo_group_name=organization_name).fetchone()[0] + + insert_repo_sql = s.sql.text(""" + INSERT INTO augur_data.repo(repo_group_id, repo_git, repo_status, + tool_source, tool_version, data_source, data_collection_date) + VALUES (:repo_group_id, :repo_git, 'New', 'CLI', 1.0, 'Git', CURRENT_TIMESTAMP) + """) + logger.info(f"{organization_name} repo group created") + + for repo in all_repos: + logger.info(f"Adding {organization_name}/{repo['name']} ({repo['clone_url']})") + result = augur_app.database.execute(insert_repo_sql, repo_group_id=new_repo_group_id, repo_git=repo['clone_url']) + @cli.command('update-repo-directory') @click.argument('repo_directory') @pass_application @@ -240,7 +282,7 @@ def check_pgpass(config): @click.option('--port', default='5432') def init_database(default_db_name, default_user, default_password, target_db_name, target_user, target_password, host, port): """ - Create database with the given credentials using the given maintenance database + Create database with the given credentials using the given maintenance database """ config = { 'Database': { From eb7206c7376fe98049133d0f0ea55e5ae163e3cb Mon Sep 17 00:00:00 2001 From: Carter Landis Date: Wed, 23 Sep 2020 11:55:39 -0500 Subject: [PATCH 15/15] Refactor add-github-org impl to actually make sense Signed-off-by: Carter Landis --- augur/cli/db.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/augur/cli/db.py b/augur/cli/db.py index edafdd16c0..7066c1d046 100644 --- a/augur/cli/db.py +++ b/augur/cli/db.py @@ -96,16 +96,15 @@ def add_github_org(augur_app, organization_name): logger.fatal(f"No organization with name {organization_name} could be found") exit(1) + all_repos = [] page = 1 + repo_query_response = None headers = {'Authorization': 'token %s' % augur_app.config.get_value("Database", "key")} - all_repos = [] - repo_query_response = requests.get(org_query_response['repos_url'] + f"?per_page=100&page={page}", headers=headers).json() while repo_query_response != []: + repo_query_response = requests.get(org_query_response['repos_url'] + f"?per_page=100&page={page}", headers=headers).json() for repo in repo_query_response: all_repos.append(repo) - page+=1 - repo_query_response = requests.get(org_query_response['repos_url'] + f"?per_page=100&page={page}", headers=headers).json() insert_repo_group_sql = s.sql.text(""" INSERT INTO "augur_data"."repo_groups"("rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP) RETURNING repo_group_id;