diff --git a/.travis.yml b/.travis.yml index 0df9abb1dd..6967a4ebd5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,7 +41,8 @@ env: before_install: - docker run -d -p 5432:5432 --name augur_test_database augurlabs/augur:test_data@sha256:fd2d9a178a9fee7cd548bd40a16e08d4611be22892491e817aafd53502f74cd0 install: - - pip install .[dev] + - ./scripts/install/backend.sh + - ./scripts/install/workers.sh - augur configure generate script: diff --git a/Makefile b/Makefile index d6827f45d1..95fd488546 100644 --- a/Makefile +++ b/Makefile @@ -30,8 +30,8 @@ default: # # Installation # -.PHONY: install install-dev -.PHONY: install-spdx install-spdx-sudo install-augur-sbom +.PHONY: install install-dev +.PHONY: install-spdx install-spdx-sudo install-augur-sbom .PHONY: clean rebuild install: @ ./scripts/install/install.sh prod @@ -63,12 +63,12 @@ rebuild-dev: # .PHONY: dev-start dev-stop dev monitor-frontend monitor-backend monitor frontend backend-stop backend-start backend-restart backend clean rebuild -dev-start: +dev-start: @ scripts/control/start_augur.sh @ scripts/control/start_frontend.sh -dev-stop: - @ augur util kill +dev-stop: + @ augur util stop @ scripts/control/kill_frontend.sh dev: dev-stop dev-start @@ -100,7 +100,7 @@ test-python-versions: # # Documentation # -.PHONY: docs docs-view +.PHONY: docs docs-view docs: @ bash -c 'cd docs/ && rm -rf build/ && make html;' @@ -112,7 +112,7 @@ docs-view: docs # Docker Shortcuts # Do not use these unless you know what they mean. .PHONY: compose-run compose-run-database -.PHONY: build-backend run-backend build-frontend run-frontend build-database run-database +.PHONY: build-backend run-backend build-frontend run-frontend build-database run-database compose-run: diff --git a/README.md b/README.md index 19bb96ba01..3362b9593c 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ master | [![Build Status](https://travis-ci.org/chaoss/augur.svg?branch=master)] ## What is Augur? Augur is a tool for collecting and measuring structured data -about free (https://www.fsf.org/about/) and [open source](https://opensource.org/docs/osd) (FOSS) communities. +about [free](https://www.fsf.org/about/) and [open source](https://opensource.org/docs/osd) (FOSS) communities. We gather trace data for a group of repositories, normalize it into our data model, and provide a variety of metrics about said @@ -20,8 +20,7 @@ questions about the way these communities evolve. We are a [CHAOSS](https://chaoss.community>) project, and many of our metrics are implementations of the metrics defined by our awesome community. You -can find more information about how to get involved -(here)[https://chaoss.community/participate/]. +can find more information about [how to get involved on the CHAOSS website](https://chaoss.community/participate/). ## Collecting Data @@ -37,7 +36,7 @@ This data is collected by dedicated data collection workers controlled by Augur, ## Getting Started -If you're interested in collecting data with our tool, the Augur team has worked hard to develop a detailed guide to getting started with our project, which can be found [here](https://oss-augur.readthedocs.io/en/master/getting-started/toc.html) alongside our main documentation. +If you're interested in collecting data with our tool, the Augur team has worked hard to develop a detailed guide to getting started with our project, which can be found [in our documentation](https://oss-augur.readthedocs.io/en/master/getting-started/toc.html). If you're looking to contribute to Augur's code, you can find installation instructions, development guides, architecture references (coming soon), best practices and more in our [developer documentation](https://oss-augur.readthedocs.io/en/master/development-guide/toc.html). diff --git a/augur/cli/configure.py b/augur/cli/configure.py index 4327bc3eb3..4ac036fbe0 100644 --- a/augur/cli/configure.py +++ b/augur/cli/configure.py @@ -7,15 +7,15 @@ import click import json import logging +from pathlib import Path -from augur.config import default_config, ENVVAR_PREFIX +from augur.config import default_config, ENVVAR_PREFIX, CONFIG_HOME from augur.cli import initialize_logging from augur.logging import ROOT_AUGUR_DIRECTORY logger = logging.getLogger(__name__) ENVVAR_PREFIX = "AUGUR_" - @click.group('configure', short_help='Generate an augur.config.json') def cli(): pass @@ -30,14 +30,16 @@ def cli(): @click.option('--facade_repo_directory', help="Directory on the database server where Facade should clone repos", envvar=ENVVAR_PREFIX + 'FACADE_REPO_DIRECTORY') @click.option('--rc-config-file', help="File containing existing config whose values will be used as the defaults", type=click.Path(exists=True)) @click.option('--gitlab_api_key', help="GitLab API key for data collection from the GitLab API", envvar=ENVVAR_PREFIX + 'GITLAB_API_KEY') +@click.option('--write-to-src', is_flag=True, help="Write generated config file to the source code tree instead of default (for development use only)") @initialize_logging -def generate(db_name, db_host, db_user, db_port, db_password, github_api_key, facade_repo_directory, rc_config_file, gitlab_api_key): +def generate(db_name, db_host, db_user, db_port, db_password, github_api_key, facade_repo_directory, rc_config_file, gitlab_api_key, write_to_src=False): """ Generate an augur.config.json """ config = default_config rc_config = None + Path(CONFIG_HOME).mkdir(exist_ok=True) if rc_config_file != None: try: @@ -82,9 +84,13 @@ def generate(db_name, db_host, db_user, db_port, db_password, github_api_key, fa if facade_repo_directory is not None: config['Workers']['facade_worker']['repo_directory'] = facade_repo_directory + config_path = CONFIG_HOME + '/augur.config.json' + if write_to_src is True: + config_path = ROOT_AUGUR_DIRECTORY + '/augur.config.json' + try: - with open(os.path.abspath(ROOT_AUGUR_DIRECTORY + '/augur.config.json'), 'w') as f: + with open(os.path.abspath(config_path), 'w') as f: json.dump(config, f, indent=4) - logger.info('augur.config.json successfully created') + logger.info('Config written to ' + config_path) except Exception as e: logger.error("Error writing augur.config.json " + str(e)) diff --git a/augur/cli/db.py b/augur/cli/db.py index 6f1afb3e87..7066c1d046 100644 --- a/augur/cli/db.py +++ b/augur/cli/db.py @@ -11,6 +11,7 @@ import click import sqlalchemy as s import pandas as pd +import requests from sqlalchemy import exc from augur.cli import pass_config, pass_application @@ -32,8 +33,8 @@ def add_repos(augur_app, filename): repo_group_IDs = [group[0] for group in df.fetchall()] insertSQL = s.sql.text(""" - INSERT INTO augur_data.repo(repo_group_id, repo_git, repo_status, - tool_source, tool_version, data_source, data_collection_date) + INSERT INTO augur_data.repo(repo_group_id, repo_git, repo_status, + tool_source, tool_version, data_source, data_collection_date) VALUES (:repo_group_id, :repo_git, 'New', 'CLI', 1.0, 'Git', CURRENT_TIMESTAMP) """) @@ -81,6 +82,46 @@ def add_repo_groups(augur_app, filename): else: logger.info(f"Repo group with ID {row[1]} for repo group {row[1]} already exists, skipping...") +@cli.command('add-github-org') +@click.argument('organization_name') +@pass_application +def add_github_org(augur_app, organization_name): + """ + Create new repo groups in Augur's database + """ + org_query_response = requests.get(f"https://api.github.com/orgs/{organization_name}").json() + if "login" in org_query_response: + logger.info(f"Organization \"{organization_name}\" found") + else: + logger.fatal(f"No organization with name {organization_name} could be found") + exit(1) + + all_repos = [] + page = 1 + repo_query_response = None + headers = {'Authorization': 'token %s' % augur_app.config.get_value("Database", "key")} + while repo_query_response != []: + repo_query_response = requests.get(org_query_response['repos_url'] + f"?per_page=100&page={page}", headers=headers).json() + for repo in repo_query_response: + all_repos.append(repo) + page+=1 + + insert_repo_group_sql = s.sql.text(""" + INSERT INTO "augur_data"."repo_groups"("rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP) RETURNING repo_group_id; + """) + new_repo_group_id = augur_app.database.execute(insert_repo_group_sql, repo_group_name=organization_name).fetchone()[0] + + insert_repo_sql = s.sql.text(""" + INSERT INTO augur_data.repo(repo_group_id, repo_git, repo_status, + tool_source, tool_version, data_source, data_collection_date) + VALUES (:repo_group_id, :repo_git, 'New', 'CLI', 1.0, 'Git', CURRENT_TIMESTAMP) + """) + logger.info(f"{organization_name} repo group created") + + for repo in all_repos: + logger.info(f"Adding {organization_name}/{repo['name']} ({repo['clone_url']})") + result = augur_app.database.execute(insert_repo_sql, repo_group_id=new_repo_group_id, repo_git=repo['clone_url']) + @cli.command('update-repo-directory') @click.argument('repo_directory') @pass_application @@ -240,7 +281,7 @@ def check_pgpass(config): @click.option('--port', default='5432') def init_database(default_db_name, default_user, default_password, target_db_name, target_user, target_password, host, port): """ - Create database with the given credentials using the given maintenance database + Create database with the given credentials using the given maintenance database """ config = { 'Database': { diff --git a/augur/cli/run.py b/augur/cli/run.py index 3352089a5f..1ad27c5592 100644 --- a/augur/cli/run.py +++ b/augur/cli/run.py @@ -11,7 +11,7 @@ from augur.housekeeper import Housekeeper from augur.server import Server -from augur.cli.util import kill_processes +from augur.cli.util import stop_processes from augur.application import Application logger = logging.getLogger("augur") @@ -25,9 +25,10 @@ def cli(disable_housekeeper, skip_cleanup): """ augur_app = Application() logger.info("Augur application initialized") + logger.info(f"Using config file: {augur_app.config.config_file_location}") if not skip_cleanup: logger.debug("Cleaning up old Augur processes...") - kill_processes() + stop_processes() time.sleep(2) else: logger.debug("Skipping process cleanup") @@ -98,7 +99,6 @@ def exit(augur_app, worker_processes, master): if master is not None: logger.debug("Shutting down Gunicorn server") master.halt() - master = None logger.info("Shutdown complete") sys.exit(0) diff --git a/augur/cli/util.py b/augur/cli/util.py index da75063dc8..fce169a848 100644 --- a/augur/cli/util.py +++ b/augur/cli/util.py @@ -7,7 +7,6 @@ import signal import logging from subprocess import call, run -import time import psutil import click @@ -43,68 +42,46 @@ def export_env(config): export_file.close() env_file.close() -@cli.command('kill') -@initialize_logging -def cli_kill_processes(): - """ - Terminates all currently running backend Augur processes, including any workers. Will only work in a virtual environment. - """ +def _stop_processes_handler(attach_handler=False): + if attach_handler is True: + _logger = logging.getLogger("augur") + else: + _logger = logger processes = get_augur_processes() if processes != []: for process in processes: if process.pid != os.getpid(): - logger.info(f"Terminating process {process.pid}") + logger.info(f"Stopping process {process.pid}") try: process.send_signal(signal.SIGTERM) - logger.info(f"sending SIGTERM Signal to {process.pid}") except psutil.NoSuchProcess as e: pass - logger.info(f"Waiting to check if processes terminated.") +@cli.command('stop') +@initialize_logging +def cli_stop_processes(): + """ + Terminates all currently running backend Augur processes, including any workers. Will only work in a virtual environment. + """ + _stop_processes_handler() - time.sleep(15) - logger.info(f"Checking on process termination.") - processes = get_augur_processes() - - if processes != []: - for process in processes: - - if process.pid != os.getpid(): - logger.info(f"Killing process {process.pid}") - try: - process.send_signal(signal.SIGKILL) - logger.info(f"sending SIGKILL Signal to {process.pid}") - except psutil.NoSuchProcess as e: - pass +def stop_processes(): + _stop_processes_handler(attach_handler=True) +@cli.command('kill') +@initialize_logging def kill_processes(): - logger = logging.getLogger("augur") - processes = get_augur_processes() - if processes != []: - for process in processes: - if process.pid != os.getpid(): - logger.info(f"Terminating process {process.pid}") - try: - process.send_signal(signal.SIGTERM) - logger.info(f"sending SIGTERM Signal to {process.pid}") - except psutil.NoSuchProcess as e: - logger.warning(e) - logger.info(f"Waiting to check if processes terminated.") - - time.sleep(15) - logger.info(f"Checking on process termination.") - + """ + Terminates all currently running backend Augur processes, including any workers. Will only work in a virtual environment. + """ processes = get_augur_processes() - if processes != []: for process in processes: if process.pid != os.getpid(): - logger.info(f"Killing process {process.pid}") logger.info(f"Killing process {process.pid}") try: process.send_signal(signal.SIGKILL) - logger.info(f"sending SIGKILL Signal to {process.pid}") except psutil.NoSuchProcess as e: pass @@ -112,7 +89,7 @@ def kill_processes(): @initialize_logging def list_processes(): """ - Outputs the name and process ID (PID) of all currently running backend Augur processes, including any workers. Will only work in a virtual environment. + Outputs the name and process ID (PID) of all currently running backend Augur processes, including any workers. Will only work in a virtual environment. """ processes = get_augur_processes() for process in processes: diff --git a/augur/config.py b/augur/config.py index b282acd3c4..2cb1b64cdf 100644 --- a/augur/config.py +++ b/augur/config.py @@ -2,10 +2,16 @@ import json import logging +from augur.logging import ROOT_AUGUR_DIRECTORY + ENVVAR_PREFIX = "AUGUR_" +CONFIG_HOME = f"{os.getenv('HOME', '~')}/.augur" default_config = { "version": 1, + "Augur": { + "developer": 0 + }, "Database": { "name": "augur", "host": "localhost", @@ -65,7 +71,7 @@ ], "model": "contributors", "repo_group_id": 0 - }, + }, { "delay": 1000000, "given": [ @@ -122,7 +128,7 @@ }, "insight_worker": { "port": 50300, - "metrics": {"issues-new": "issues", "code-changes": "commit_count", "code-changes-lines": "added", + "metrics": {"issues-new": "issues", "code-changes": "commit_count", "code-changes-lines": "added", "reviews": "pull_requests", "contributors-new": "new_contributors"}, "confidence_interval": 95, "contamination": 0.041, @@ -223,6 +229,7 @@ def __init__(self, root_augur_dir, given_config={}): self._root_augur_dir = root_augur_dir self._default_config = default_config self._env_config = {} + self.config_file_location = None self.load_config() self.version = self.get_version() self._config.update(given_config) @@ -268,9 +275,9 @@ def load_config(self): logger.debug("Attempting to load config file") try: - config_file_path = self.discover_config_file() + self.discover_config_file() try: - with open(config_file_path, 'r+') as config_file_handle: + with open(self.config_file_location, 'r+') as config_file_handle: self._config = json.loads(config_file_handle.read()) logger.debug("Config file loaded successfully") except json.decoder.JSONDecodeError as e: @@ -285,10 +292,10 @@ def load_config(self): self.load_env_configuration() def discover_config_file(self): - default_config_path = self._root_augur_dir + '/' + self._default_config_file_name + developer_config_location = ROOT_AUGUR_DIRECTORY + "/" + self._default_config_file_name config_file_path = None - config_locations = [self._default_config_file_name, default_config_path + config_locations = [developer_config_location, CONFIG_HOME + "/" + self._default_config_file_name , f"/opt/augur/{self._default_config_file_name}"] if os.getenv('AUGUR_CONFIG_FILE', None) is not None: config_file_path = os.getenv('AUGUR_CONFIG_FILE') @@ -302,9 +309,9 @@ def discover_config_file(self): except FileNotFoundError: pass if config_file_path: - return config_file_path + self.config_file_location = config_file_path else: - raise(AugurConfigFileNotFoundException(message=f"{self._default_config_file_name} not found", errors=None)) + raise(AugurConfigFileNotFoundException(message="Config file was not found", errors=None)) def load_env_configuration(self): self.set_env_value(section='Database', name='key', environment_variable='AUGUR_GITHUB_API_KEY') diff --git a/augur/logging.py b/augur/logging.py index f41aaf2617..97c3b336c1 100644 --- a/augur/logging.py +++ b/augur/logging.py @@ -3,7 +3,7 @@ import logging.handlers from logging import FileHandler, StreamHandler, Formatter from multiprocessing import Process, Queue, Event, current_process -from time import sleep +from time import sleep import os from pathlib import Path import atexit @@ -98,7 +98,6 @@ def __init__(self, disable_logs=False, reset_logfiles=True): if disable_logs: self._disable_all_logging() - def _disable_all_logging(self): for logger in ["augur", "augur.application", "augur.housekeeper", "augur.config", "augur.cli", "root"]: lg = logging.getLogger(logger) @@ -142,7 +141,6 @@ def configure_logging(self, augur_config): self._configure_logfiles() self._configure_cli_logger() self._configure_gunicorn_logging() - logger.debug("Loggers are fully configured") def _configure_logfiles(self): self.logfile_config = { diff --git a/augur/metrics/release.py b/augur/metrics/release.py index e999a53abb..c264421446 100644 --- a/augur/metrics/release.py +++ b/augur/metrics/release.py @@ -9,7 +9,7 @@ @register_metric() def releases(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None): - """ Returns a timeseris of new reviews or pull requests opened + """ Returns a timeseris of new releases created :param repo_group_id: The repository's repo_group_id :param repo_id: The repository's repo_id, defaults to None @@ -24,7 +24,7 @@ def releases(self, repo_group_id, repo_id=None, period='day', begin_date=None, e end_date = datetime.datetime.now().strftime('%Y-%m-%d') if not repo_id: - reviews_SQL = s.sql.text(""" + releases_SQL = s.sql.text(""" SELECT res.repo_name, res.release_id, @@ -47,18 +47,19 @@ def releases(self, repo_group_id, repo_id=None, period='day', begin_date=None, e releases LEFT JOIN repo ON releases.repo_id = repo.repo_id WHERE repo.repo_id in (SELECT repo_id FROM repo WHERE repo_group_id=:repo_group_id ) + AND releases.tag_only = False ) as res GROUP BY releases.repo_id, releases.release_id ORDER BY releases.release_published_at DESC """) - results = pd.read_sql(reviews_SQL, self.database, + results = pd.read_sql(releases_SQL, self.database, params={'period': period, 'repo_group_id': repo_group_id, 'begin_date': begin_date, 'end_date': end_date }) return results else: - reviews_SQL = s.sql.text(""" + releases_SQL = s.sql.text(""" SELECT repo.repo_name, releases.release_id, @@ -75,11 +76,80 @@ def releases(self, repo_group_id, repo_id=None, period='day', begin_date=None, e COUNT(releases) FROM releases LEFT JOIN repo ON releases.repo_id = repo.repo_id + WHERE releases.tag_only = False GROUP BY repo.repo_id, releases.release_id ORDER BY releases.release_published_at DESC """) - results = pd.read_sql(reviews_SQL, self.database, + results = pd.read_sql(releases_SQL, self.database, + params={'period': period, 'repo_id': repo_id, + 'begin_date': begin_date, 'end_date': end_date}) + return results + +@register_metric() +def tag_only_releases(self, repo_group_id, repo_id=None, period='day', begin_date=None, end_date=None): + """ Returns a timeseris of new tags that are considered releases + without an official release being published + + :param repo_group_id: The repository's repo_group_id + :param repo_id: The repository's repo_id, defaults to None + :param period: To set the periodicity to 'day', 'week', 'month' or 'year', defaults to 'day' + :param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00' + :param end_date: Specifies the end date, defaults to datetime.now() + :return: DataFrame of new releases/period + """ + if not begin_date: + begin_date = '1970-1-1' + if not end_date: + end_date = datetime.datetime.now().strftime('%Y-%m-%d') + + if not repo_id: + releases_SQL = s.sql.text(""" + SELECT + res.repo_name, + res.release_id, + res.release_name, + res.release_author, + res.release_created_at, + res.release_tag_name, + COUNT(res) + FROM ( + SELECT + releases.* + repo.repo_name + FROM + releases LEFT JOIN repo ON releases.repo_id = repo.repo_id + WHERE + repo.repo_id in (SELECT repo_id FROM repo WHERE repo_group_id=:repo_group_id ) + AND releases.tag_only = True + ) as res + GROUP BY releases.repo_id, releases.release_id + ORDER BY releases.release_published_at DESC + """) + + results = pd.read_sql(releases_SQL, self.database, + params={'period': period, 'repo_group_id': repo_group_id, + 'begin_date': begin_date, 'end_date': end_date }) + return results + + else: + releases_SQL = s.sql.text(""" + SELECT + repo.repo_name, + releases.release_id, + releases.release_name, + releases.release_author, + releases.release_created_at, + releases.release_tag_name, + COUNT(releases) + FROM + releases LEFT JOIN repo ON releases.repo_id = repo.repo_id + WHERE releases.tag_only = True + GROUP BY repo.repo_id, releases.release_id + ORDER BY releases.release_published_at DESC + """) + + results = pd.read_sql(releases_SQL, self.database, params={'period': period, 'repo_id': repo_id, 'begin_date': begin_date, 'end_date': end_date}) return results diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 5393b7b680..0000000000 --- a/docs/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# Getting Started with Docs - -1. Directory for source files: `augur/docs/source` -2. Each branch has itself published on readthedocs.io - - Master: https://oss-augur.readthedocs.io/en/master/ - - Dev: https://oss-augur.readthedocs.io/en/dev/ -3. Syntax reference for restructred text, which is what readthedocs.io uses: https://docutils.sourceforge.io/docs/user/rst/quickref.html -4. Best way to figure out how things are structure is to look in the source folder under docs - - Each directory has a `toc.rst` file that is a table of contents - - There are configuration steps on each branch so the docs are built there if that's what you are working on. - - There is also a way to build locally, but Sean doesn't know what it is. \ No newline at end of file diff --git a/docs/apidoc.json b/docs/apidoc.json deleted file mode 100644 index cb4126f556..0000000000 --- a/docs/apidoc.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "name": "Augur API", - "version": "0.0.2", - "description": "REST API for Health and Sustainability Metrics", - "title": "Augur API", - "url" : "http://localhost:5000/api/unstable" -} \ No newline at end of file diff --git a/docs/source/development-guide/installation.rst b/docs/source/development-guide/installation.rst index 5f2a07ac63..eb277aebf1 100644 --- a/docs/source/development-guide/installation.rst +++ b/docs/source/development-guide/installation.rst @@ -14,10 +14,10 @@ However, during the course of development, you might find that you need to reset More information about Augur's Docker images can be found `here <../docker/docker.html>`_. If you're new to our Docker process, we recommend following the `introduction section <../docker/toc.html>`_ first. -Installing the source code +Installing from source ---------------------------- -The process for installing Augur's source code for development is essentially the same as detailed in the `installation <../getting-started/installation.html>`_ section of the Getting Started guide. +The process for installing Augur's source code for development is essentially the same as detailed in the `installation <../getting-started/installation.html>`_ section of the Getting Started guide. **However**, when running the installation script, use the following command instead: @@ -25,15 +25,16 @@ The process for installing Augur's source code for development is essentially th $ make install-dev -This will install a few extra dependencies for testing and documentation, as well as install all the Python packages in `edit mode `_. -This essentially means you will not have to run ``pip install .`` everytime you make a change to the backend. +This will install a few extra dependencies for testing and documentation, as well as install all the Python packages in `editable mode `_. This means you will not have to reinstall the package everytime you make a change to the Python source code. + +This command will also create your ``augur.config.json`` file in the root of your cloned source code directory **instead of** the default location in ``$HOME/.augur/``. This is purely for convenience sake, as it will allow you to open this file in your text editor with all the other source code files, and also allows you to have multiple developer installations of Augur on the same machine if needed. If Augur finds a config file in both the root of the cloned directory AND in the default location, it will always use the one in the root of the cloned directory. .. note:: - You can still use ``make clean`` to get rid of the installed binaries if something we wrong and you want to try again. + You can still use ``make clean`` to get rid of the installed binaries if something went wrong and you want to try again. Conclusion ----------- -Like I said, it's pretty similar. For (optional) further reading, the `Makefile `_ documentation and the `Creating a Metric guide `_ are good places to start. +All in all, it's pretty similar. For further reading, the `Makefile `_ documentation and the `Creating a Metric guide `_ are good places to start. Happy hacking! \ No newline at end of file diff --git a/docs/source/development-guide/workers/writing-docs.rst b/docs/source/development-guide/workers/writing-docs.rst deleted file mode 100644 index 93bc3926b3..0000000000 --- a/docs/source/development-guide/workers/writing-docs.rst +++ /dev/null @@ -1,53 +0,0 @@ -Writing documentation -====================== - -Currently, we maintain a set of library and usage documentation (which is what you are reading!) that -we update with each release. The following sections briefly outline how to contribute to our documentation. - -.. note:: - - **All** PRs which require a documentation change will not be merged until that change has been made. - -Library and Usage Documentation --------------------------------- - -The library documenation is written using `reStructuredText `_ for the raw markdown, and then built into web pages using `Sphinx `_. - -We'll avoid going over reStructuredText in detail here, -but `here `__ is a good reference document. - -Similarly, we'll avoid going over Sphinx in great detail as well; `here `__ is a good reference document for the -most commonly used directives. - -Building -~~~~~~~~ -To see your changes and make sure everything rendered correctly, run ``make docs`` in the root -``augur/`` directory, and then open ``docs/build/html/index.html`` in your web browser to view it. - -.. code-block:: bash - - $ make docs - $ open docs/build/html/index.html - -Or, you can use the shorcut which does exactly this: - -.. code-block:: bash - - # to build and then open to the locally built documentation - $ make docs-view - - -After opening it once, just run ``make docs`` and refresh the page in your browser to see the changes: - -.. code-block:: bash - - # after opening the documentation - $ make docs - -Hosting -~~~~~~~ -Our documentation is graciously hosted by `Read the Docs `_. - -Enabled branches of the main ``chaoss/augur`` repository will each have their own documentation, with the -default ``master`` corresponding to ``master`` on the readthedocs. The documentation will automatically be -built and deployed on a push to one of these branches or on any incoming PR, but please don't forget to check before you push! \ No newline at end of file diff --git a/docs/source/getting-started/collecting-data.rst b/docs/source/getting-started/collecting-data.rst index 8e91f29aab..ce74664199 100644 --- a/docs/source/getting-started/collecting-data.rst +++ b/docs/source/getting-started/collecting-data.rst @@ -21,7 +21,7 @@ There are a few workers that ship ready to collect out of the box: - ``linux_badge_worker`` (collects `CII badging `_ data from the CII API) - ``insight_worker`` (queries Augur's metrics API to find interesting anomalies in the collected data) -All worker configuration options are found in the ``Workers`` block of the ``augur.config.json`` file (which you generated at the end of the previous section) with each worker having its own subsection with same title as the the worker's name. +All worker configuration options are found in the ``Workers`` block of the ``augur.config.json`` file (which was generated for you at the end of the previous section). This file is located at ``$HOME/.augur/augur.config.json``. Each worker has its own subsection with same title as the the worker's name. A full configuration file reference can be found on the next page, but we recommend leaving the defaults and only changing them when necessary; read on for more on how to make sure your workers are properly configured. @@ -51,7 +51,7 @@ Next up are the configuration options specific to each worker (but some workers ``insight_worker`` :::::::::::::::::: -We recommend leaving the defaults in place for the insight worker unless you interested in other metrics, or anomalies for a different time period. +We recommend leaving the defaults in place for the insight worker unless you interested in other metrics, or anomalies for a different time period. - ``training_days``, which specifies the date range that the ``insight_worker`` should use as its baseline for the statistical comparison. Defaults to ``365``, meaning that the worker will identify metrics that have had anomalies compared to their values over the course of the past year, starting at the current date. @@ -66,15 +66,15 @@ We recommend leaving the defaults in place for the insight worker unless you int 'endpoint_name_1': 'field_2_of_endpoint', 'endpoint_name_2': 'field_1_of_endpoint', ... - } + } # defaults to the following { - "issues-new": "issues", - "code-changes": "commit_count", - "code-changes-lines": "added", - "reviews": "pull_requests", + "issues-new": "issues", + "code-changes": "commit_count", + "code-changes-lines": "added", + "reviews": "pull_requests", "contributors-new": "new_contributors" } @@ -114,7 +114,7 @@ If you're using the Docker container, you can use the `provided UI <../docker/us Running collections -------------------- -Congratuations! At this point you (hopefully) have a fully functioning and configured Augur instance. +Congratuations! At this point you (hopefully) have a fully functioning and configured Augur instance. After you've loaded your repos, you're ready for your first collection run. We recommend running only the default workers first to gather the initial data. If you're collecting data for a lot of repositories, or repositories with a lot of data, we recommend increasing the number of ``github_workers`` and ``pull_request_workers``. diff --git a/docs/source/getting-started/command-line-interface/util.rst b/docs/source/getting-started/command-line-interface/util.rst index aac7b2d17b..0ed98a6bf6 100644 --- a/docs/source/getting-started/command-line-interface/util.rst +++ b/docs/source/getting-started/command-line-interface/util.rst @@ -48,9 +48,42 @@ Example usage:: AUGUR_DB_USER="your_db_user" AUGUR_DB_PASSWORD="your_db_password" +``stop`` +--------- +Gracefully terminates all currently running backend Augur processes, including any workers. Will only work in a virtual environment. + +Example usage:: + + # to stop the server and workers + $ augur util stop + + # successful output looks like: + > CLI: [util.cli_stop_processes] [INFO] Killing process 33607 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33775 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33776 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33777 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33778 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33780 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33781 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33782 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33783 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33784 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33785 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33786 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33787 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33788 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33789 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33790 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33792 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33793 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33794 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33795 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33798 + > CLI: [util.cli_stop_processes] [INFO] Killing process 33962 + ``kill`` --------- -Terminates all currently running backend Augur processes, including any workers. Will only work in a virtual environment. +Forcefully terminates all currently running backend Augur processes, including any workers, by using ``SIGKILL``. Will only work in a virtual environment. Example usage:: @@ -58,28 +91,12 @@ Example usage:: $ augur util kill # successful output looks like: - > CLI: [util.cli_kill_processes] [INFO] Killing process 33607 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33775 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33776 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33777 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33778 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33780 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33781 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33782 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33783 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33784 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33785 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33786 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33787 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33788 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33789 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33790 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33792 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33793 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33794 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33795 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33798 - > CLI: [util.cli_kill_processes] [INFO] Killing process 33962 + > CLI: [util.kill_processes] [INFO] Killing process 87340 + > CLI: [util.kill_processes] [INFO] Killing process 87573 + > CLI: [util.kill_processes] [INFO] Killing process 87574 + > CLI: [util.kill_processes] [INFO] Killing process 87575 + > CLI: [util.kill_processes] [INFO] Killing process 87576 + ``list`` --------- diff --git a/docs/source/getting-started/dev-start.md b/docs/source/getting-started/dev-start.md deleted file mode 100644 index 8f7a22eb88..0000000000 --- a/docs/source/getting-started/dev-start.md +++ /dev/null @@ -1,20 +0,0 @@ -# Getting Started with Development -1. Augur Documentation: https://oss-augur.readthedocs.io/en/dev/ (Use the dev branch) -2. Follow documentation to install on whatever OS you are most comfortable with (other than windows). -3. Use the "oh my zsh" shell. It has a lot of nice Git features. https://ohmyz.sh/ -4. Postgres.app download for mac: https://postgresapp.com/downloads.html -5. git clone https://github.com/chaoss/augur augur-rdohm - - `pwd` will tell you the current working directory. - - https://ma.ttias.be/mac-os-xcrun-error-invalid-active-developer-path-missing-xcrun/ -6. For the first week or so, if you encounter an issue that is not clear? Its clear what you're supposed to do as a next step: Enter an issue: https://github.com/chaoss/augur/issues -7. I recommmend getting a copy of Navicat for Postgres -8. Create a password on the command line for postgres: -``` -sudo -u postgres psql postgres - -# \password postgres - -Enter new password: - -``` -9. diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst index e36d4b3037..df782a2047 100644 --- a/docs/source/getting-started/installation.rst +++ b/docs/source/getting-started/installation.rst @@ -12,7 +12,7 @@ If you're running Augur on macOS, we strongly suggest adding the following line macOS takes "helpful" measures to prevent Python subprocesses (which Augur uses) from forking cleanly, and setting this environment variable disables these safety measures to restore normal Python functionality. .. warning:: - If you skip this step, you'll likely see all housekeeer jobs randomly exiting for no reason, and the Gunicorn server will not behave nicely either. Don't say we didn't warn you! + If you skip this step, you'll likely see all housekeeer jobs randomly exiting for no reason, and the Gunicorn server will not behave nicely either. Skip this step at your peril! Dependencies @@ -23,14 +23,14 @@ Backend Required: - `GitHub Access Token `__ (``repo`` and all ``read`` scopes except ``enterprise``) -- `GitLab Access Token `__ +- `GitLab Access Token `__ - `Python 3.6 or later `__ Our REST API & data collection workers are written in Python 3.6. We query the GitHub & GitLab API to collect data about issues, pull requests, contributors, and other information about a repository, so GitLab and GitHub access tokens are **required** for data collection. Optional: -- `Go 1.12 or later `__ +- `Go 1.12 or later `__ The ``value_worker`` uses a Go package called `scc `_ to run COCOMO calculations. Once you've installed Go, follow the appropriate steps for your system to install the ``scc`` package. @@ -56,6 +56,9 @@ after which you'll move on to the next section to configure the workers. .. note:: Lines that start with a ``$`` denote a command to be run in an interactive terminal. +.. warning:: + Do **NOT** install or run Augur using ``sudo``. It is not required, and using it will inevitably cause some permissions trouble. Don't say we didn't warn you! + 0. Clone the repository and change to the newly created directory. .. code-block:: bash @@ -63,7 +66,7 @@ after which you'll move on to the next section to configure the workers. $ git clone https://github.com/chaoss/augur.git $ cd augur/ -1. Create a virtual environment in a directory of your choosing. Be sure to use the correct ``python`` command for +1. Create a virtual environment in a directory of your choosing. Be sure to use the correct ``python`` command for your installation of Python 3: on most systems, this is ``python3``, but yours may differ (you can use ``python -V`` or ``python3 -V`` to check). .. code-block:: bash @@ -92,7 +95,7 @@ your installation of Python 3: on most systems, this is ``python3``, but yours m $ make install -If you think something went wrong, check the log files under ``logs/install/``. If you want to try again, you can use ``make clean`` to delete any build files before running ``make install`` again. +If you think something went wrong, check the log files in ``logs/``. If you want to try again, you can use ``make clean`` to delete any build files before running ``make install`` again. .. note:: diff --git a/metadata.py b/metadata.py index 0b47499f56..7ef300e746 100644 --- a/metadata.py +++ b/metadata.py @@ -4,8 +4,8 @@ __short_description__ = "Python 3 package for free/libre and open-source software community metrics & data collection" -__version__ = "0.13.0" -__release__ = "v0.13.0" +__version__ = "0.13.1" +__release__ = "v0.13.1" __license__ = "MIT" __copyright__ = "CHAOSS & Augurlabs 2020" diff --git a/schema/generate/draft.md b/schema/generate/draft.md deleted file mode 100644 index 3708e8bc91..0000000000 --- a/schema/generate/draft.md +++ /dev/null @@ -1,57 +0,0 @@ -```sql - - - -CREATE INDEX CONCURRENTLY "contributor_worker_issue_events_finder" ON "augur_data"."issue_events" USING brin ( - "cntrb_id" -); - - -CREATE INDEX CONCURRENTLY "contributor_worker_pull_request_events_finder" ON "augur_data"."pull_request_events" USING brin ( - "cntrb_id" -); - - - - - issue_events_result = self.db.execute(self.issue_events_table.update().where( - self.issue_events_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuples in the issue_events table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - pr_events_result = self.db.execute(self.pull_request_events_table.update().where( - self.pull_request_events_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuples in the pull_request_events table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - issues_cntrb_result = self.db.execute(self.issues_table.update().where( - self.issues_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuples in the issues table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - issues_reporter_result = self.db.execute(self.issues_table.update().where( - self.issues_table.c.reporter_id.in_(dupe_ids)).values(reporter_col)) - self.logger.info("Updated reporter_id column in the issues table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - issue_assignee_result = self.db.execute(self.issue_assignees_table.update().where( - self.issue_assignees_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuple in the issue_assignees table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - pr_assignee_result = self.db.execute(self.pull_request_assignees_table.update().where( - self.pull_request_assignees_table.c.contrib_id.in_(dupe_ids)).values(pr_assignee_col)) - self.logger.info("Updated contrib_id column for tuple in the pull_request_assignees table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - message_result = self.db.execute(self.message_table.update().where( - self.message_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuple in the message table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - pr_reviewers_result = self.db.execute(self.pull_request_reviewers_table.update().where( - self.pull_request_reviewers_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuple in the pull_request_reviewers table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - pr_meta_result = self.db.execute(self.pull_request_meta_table.update().where( - self.pull_request_meta_table.c.cntrb_id.in_(dupe_ids)).values(update_col)) - self.logger.info("Updated cntrb_id column for tuple in the pull_request_meta table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - - pr_repo_result = self.db.execute(self.pull_request_repo_table.update().where( - self.pull_request_repo_table.c.pr_cntrb_id.in_(dupe_ids)).values(pr_repo_col)) - self.logger.info("Updated cntrb_id column for tuple in the pull_request_repo table with value: {} replaced with new cntrb id: {}".format(new_id, self.cntrb_id_inc)) - -``` \ No newline at end of file diff --git a/scripts/install/api_key.sh b/scripts/install/api_key.sh index a9fc868830..4c767b5806 100755 --- a/scripts/install/api_key.sh +++ b/scripts/install/api_key.sh @@ -20,13 +20,11 @@ echo if [[ $existing_api_key != *"invalid_key"* ]]; then read -r -p "We noticed you have an Augur API key already. Would you like to overwrite it with a new one? [Y/n] " response case "$response" in - [yY][eE][sS]|[yY]) + [yY][eE][sS]|[yY]) echo get_api_key ;; *) - echo "Skipping API key generation process and resuming installation..." - echo ;; esac else diff --git a/scripts/install/backend.sh b/scripts/install/backend.sh index 7700a0e545..4d0e12a235 100755 --- a/scripts/install/backend.sh +++ b/scripts/install/backend.sh @@ -12,4 +12,3 @@ if [[ $target == *"prod"* ]]; then else pip install -e .[dev] fi - diff --git a/scripts/install/checks.sh b/scripts/install/checks.sh index 43f09933c7..942bcf46b2 100755 --- a/scripts/install/checks.sh +++ b/scripts/install/checks.sh @@ -4,7 +4,7 @@ if [[ -z "$VIRTUAL_ENV" ]]; then echo "*** We noticed you're not currently inside a virtual environment. Augur MUST be run inside a virtual environment. ***" read -r -p "*** Would you like us to generate a environment for you automatically? If you select no, you must create it yourself. [Y/n] " response case "$response" in - [yY][eE][sS]|[yY]) + [yY][eE][sS]|[yY]) echo $augur_python_command -m venv $HOME/.virtualenvs/augur_env echo "*** Your environment was installed to $HOME/.virtualenvs/augur_env/. Please activate your environment using your shell's appropriate command. ***" @@ -57,9 +57,4 @@ fi if [[ ! -d logs ]]; then mkdir logs - mkdir logs/install -fi - -if [[ ! -d logs/install ]]; then - mkdir logs/install fi diff --git a/scripts/install/config.sh b/scripts/install/config.sh index c635ff4b41..da70221ad9 100755 --- a/scripts/install/config.sh +++ b/scripts/install/config.sh @@ -4,6 +4,8 @@ PS3=" Please type the number corresponding to your selection and then press the Enter/Return key. Your choice: " +target=$1 + function get_api_key_and_repo_path() { echo echo "Please provide a valid GitHub API key." @@ -70,7 +72,8 @@ function set_db_credentials() { read -p "Database: " db_name read -p "User: " db_user - read -p "Password: " password + read -s -p "Password: " password + echo if [[ $install_locally == 'false' ]]; then read -p "Host: " host @@ -91,7 +94,14 @@ function save_credentials() { echo "**********************************" echo - augur configure generate --db_name $db_name --db_host $host --db_port $port --db_user $db_user --db_password $password --github_api_key $github_api_key --gitlab_api_key $gitlab_api_key --facade_repo_directory $facade_repo_directory + cmd=( augur configure generate --db_name $db_name --db_host $host --db_port $port --db_user $db_user --db_password $password --github_api_key $github_api_key --gitlab_api_key $gitlab_api_key --facade_repo_directory $facade_repo_directory ) + + if [[ $target == *"dev"* ]]; then + cmd+=( --write-to-src ) + fi + + "${cmd[@]}" + augur db check-pgpass } @@ -106,6 +116,7 @@ function create_db_schema() { echo } + echo echo "**********************************" echo "Setting up database credentials..." diff --git a/scripts/install/frontend.sh b/scripts/install/frontend.sh index 3b341a7b6b..28660fcac5 100755 --- a/scripts/install/frontend.sh +++ b/scripts/install/frontend.sh @@ -22,14 +22,9 @@ function install_deps() { read -r -p "Would you like to install Augur's frontend dependencies? [Y/n] " response case "$response" in - [yY][eE][sS]|[yY]) + [yY][eE][sS]|[yY]) echo "Installing..." - - if [[ ! -d logs/install ]]; then - mkdir logs/install - fi - - install_deps > logs/install/frontend.log 2>&1 + install_deps > logs/frontend-install.log echo "Done!" ;; *) diff --git a/scripts/install/install.sh b/scripts/install/install.sh index 77ad991e9f..06f9648ff7 100755 --- a/scripts/install/install.sh +++ b/scripts/install/install.sh @@ -18,29 +18,23 @@ else echo fi -echo "Installing the backend and its dependencies..." -scripts/install/backend.sh $target > logs/install/backend.log 2>&1 +scripts/install/backend.sh $target 2>&1 | tee logs/backend-install.log echo "Done!" -echo "Installing workers and their dependencies..." -scripts/install/workers.sh $target > logs/install/workers.log 2>&1 +scripts/install/workers.sh $target 2>&1 | tee logs/workers-install.log echo "Done!" -if [[ ! -e augur.config.json ]]; then +if [[ ! -e augur.config.json && ! -e $HOME/.augur/augur.config.json ]]; then echo "No config file found. Generating..." - scripts/install/config.sh - echo + scripts/install/config.sh $target else read -r -p "We noticed you have a config file already. Would you like to overwrite it with a new one? [Y/n] " response case "$response" in - [yY][eE][sS]|[yY]) + [yY][eE][sS]|[yY]) echo "Generating a config file..." - scripts/install/config.sh - echo + scripts/install/config.sh $target ;; *) - echo "Skipping config generation process and resuming installation..." - echo ;; esac fi diff --git a/workers/release_worker/release_worker.py b/workers/release_worker/release_worker.py index 5c70210311..d7db2de70a 100644 --- a/workers/release_worker/release_worker.py +++ b/workers/release_worker/release_worker.py @@ -10,6 +10,13 @@ #TODO - fully edit to match releases class ReleaseWorker(Worker): + """ + Worker that collects Repository Releases data from the Github API + and stores it in our database. + + :param task: most recent task the broker added to the worker's queue + :param config: holds info like api keys, descriptions, and database connection strings + """ def __init__(self, config={}): worker_type = "release_worker" @@ -30,39 +37,154 @@ def __init__(self, config={}): self.tool_version = '1.0.0' self.data_source = 'GitHub API' - def insert_release(self, repo_id, owner, release): - author = release['author']['name']+'_'+release['author']['company'] + def get_release_inf(self, repo_id, release, tag_only): + if not tag_only: + name = "" if release['author']['name'] is None else release['author']['name'] + company = "" if release['author']['company'] is None else release['author']['company'] + author = name + '_' + company + release_inf = { + 'release_id': release['id'], + 'repo_id': repo_id, + 'release_name': release['name'], + 'release_description': release['description'], + 'release_author': author, + 'release_created_at': release['createdAt'], + 'release_published_at': release['publishedAt'], + 'release_updated_at': release['updatedAt'], + 'release_is_draft': release['isDraft'], + 'release_is_prerelease': release['isPrerelease'], + 'release_tag_name': release['tagName'], + 'release_url': release['url'], + 'tag_only': tag_only, + 'tool_source': self.tool_source, + 'tool_version': self.tool_version, + 'data_source': self.data_source + } + else: + if 'tagger' in release['target']: + if 'name' in release['target']['tagger']: + name = release['target']['tagger']['name'] + else: + name = "" + if 'email' in release['target']['tagger']: + email = '_' + release['target']['tagger']['email'] + else: + email = "" + author = name + email + if 'date' in release['target']['tagger']: + date = release['target']['tagger']['date'] + else: + date = "" + else: + author = "" + date = "" + release_inf = { + 'release_id': release['id'], + 'repo_id': repo_id, + 'release_name': release['name'], + 'release_author': author, + 'release_tag_name': release['name'], + 'tag_only': tag_only, + 'tool_source': self.tool_source, + 'tool_version': self.tool_version, + 'data_source': self.data_source + } + if date: + release_inf['release_created_at'] = date + + return release_inf + + + def insert_release(self, task, repo_id, owner, release, tag_only = False): + + # Get current table values + release_id_data_sql = s.sql.text(""" + SELECT releases.release_id + FROM releases + WHERE repo_id = :repo_id + """) + self.logger.info(f'Getting release table values with the following PSQL query: \n{release_id_data_sql}\n') + release_id_data = pd.read_sql(release_id_data_sql, self.db, params={'repo_id': repo_id}) + release_id_data = release_id_data.apply(lambda x: x.str.strip()) + # Put all data together in format of the table self.logger.info(f'Inserting release for repo with id:{repo_id}, owner:{owner}, release name:{release["name"]}\n') - release_inf = { - 'release_id': release['id'], - 'repo_id': repo_id, - 'release_name': release['name'], - 'release_description': release['description'], - 'release_author': release['author'], - 'release_created_at': release['createdAt'], - 'release_published_at': release['publishedAt'], - 'release_updated_at': release['updatedAt'], - 'release_is_draft': release['isDraft'], - 'release_is_prerelease': release['isPrerelease'], - 'release_tag_name': release['tagName'], - 'release_url': release['url'], - 'tool_source': self.tool_source, - 'tool_version': self.tool_version, - 'data_source': self.data_source - } - - result = self.db.execute(self.releases_table.insert().values(release_inf)) - self.logger.info(f"Primary Key inserted into releases table: {result.inserted_primary_key}\n") + release_inf = self.get_release_inf(repo_id, release, tag_only) + + if release_id_data.size > 0 and release['id'] in release_id_data.values: + result = self.db.execute(self.releases_table.update().where( + self.releases_table.c.release_id==release['id']).values(release_inf)) + self.logger.info(f"Release {release['id']} updated into releases table\n") + else: + result = self.db.execute(self.releases_table.insert().values(release_inf)) + self.logger.info(f"Release {release['id']} inserted into releases table\n") + self.logger.info(f"Primary Key inserted into releases table: {result.inserted_primary_key}\n") self.results_counter += 1 - self.logger.info(f"Inserted info for {owner}/{repo}/{release['name']}\n") + self.logger.info(f"Inserted info for {owner}/{repo_id}/{release['name']}\n") #Register this task as completed - self.register_task_completion(task, release_id, "releases") + self.register_task_completion(task, repo_id, "releases") return - def releases_model(self, task, repo_id): + def get_query(self, owner, repo, tag_only): + if not tag_only: + query = """ + { + repository(owner:"%s", name:"%s"){ + id + releases(orderBy: {field: CREATED_AT, direction: ASC}, last: %d) { + edges { + node { + name + publishedAt + createdAt + description + id + isDraft + isPrerelease + tagName + url + updatedAt + author { + name + company + } + } + } + } + } + } + """ % (owner, repo, 10) + else: + query = """ + { + repository(owner:"%s", name:"%s"){ + id + refs(refPrefix: "refs/tags/", last: %d){ + edges { + node { + name + id + target { + ... on Tag { + tagger { + name + email + date + } + } + } + } + } + } + } + } + """ % (owner, repo, 10) + + return query + + def fetch_data(self, task, repo_id, tag_only = False): github_url = task['given']['github_url'] @@ -72,33 +194,7 @@ def releases_model(self, task, repo_id): url = 'https://api.github.com/graphql' - query = """ - { - repository(owner:"%s", name:"%s"){ - id - releases(orderBy: {field: CREATED_AT, direction: ASC}, last: %d) { - edges { - node { - name - publishedAt - createdAt - description - id - isDraft - isPrerelease - tagName - url - updatedAt - author { - name - company - } - } - } - } - } - } - """ % (owner, repo, 10) + query = self.get_query(owner, repo, tag_only) # Hit the graphql endpoint and retry 3 times in case of failure num_attempts = 0 @@ -139,19 +235,41 @@ def releases_model(self, task, repo_id): self.register_task_failure(task, repo_id, "Failed to hit endpoint: {}".format(url)) return - self.logger.info("repository value is: {}\n".format(data)) + data['owner'] = owner + + return data + + + def releases_model(self, task, repo_id): + data = self.fetch_data(task, repo_id) + + self.logger.info("repository value is: {}\n".format(data)) if 'releases' in data: - if 'edges' in data['releases']: + if 'edges' in data['releases'] and data['releases']['edges']: for n in data['releases']['edges']: if 'node' in n: release = n['node'] - self.insert_release(self, repo_id, owner, release) + self.insert_release(task, repo_id, data['owner'], release) else: self.logger.info("There's no release to insert. Current node is not available in releases: {}\n".format(n)) + elif 'edges' in data['releases'] and not data['releases']['edges']: + self.logger.info("Searching for tags instead of releases...") + data = self.fetch_data(task, repo_id, True) + self.logger.info("refs value is: {}\n".format(data)) + if 'refs' in data: + if 'edges' in data['refs']: + for n in data['refs']['edges']: + if 'node' in n: + release = n['node'] + self.insert_release(task, repo_id, data['owner'], release, True) + else: + self.logger.info("There's no release to insert. Current node is not available in releases: {}\n".format(n)) + else: + self.logger.info("There are no releases to insert for current repository: {}\n".format(data)) + else: + self.logger.info("There are no refs in data: {}\n".format(data)) else: self.logger.info("There are no releases to insert for current repository: {}\n".format(data)) else: self.logger.info("Graphql response does not contain repository: {}\n".format(data)) - -