diff --git a/augur/tasks/github/traffic/tasks.py b/augur/tasks/github/traffic/tasks.py index 8b8652ec43..22bae688ec 100644 --- a/augur/tasks/github/traffic/tasks.py +++ b/augur/tasks/github/traffic/tasks.py @@ -4,7 +4,7 @@ from augur.tasks.init.celery_app import celery_app as celery, engine from augur.application.db.data_parse import * from augur.tasks.github.util.github_paginator import GithubPaginator -from augur.tasks.github.util.github_task_session import GithubTaskSession +from augur.tasks.github.util.github_task_session import GithubTaskManifest from augur.tasks.util.worker_util import remove_duplicate_dicts from augur.tasks.github.util.util import get_owner_repo from augur.application.db.models import RepoClone, Repo @@ -16,9 +16,9 @@ def collect_github_repo_clones_data(repo_git: str) -> None: logger = logging.getLogger(collect_github_repo_clones_data.__name__) # using GithubTaskSession to get our repo_obj for which we will store data of clones - with GithubTaskSession(logger) as session: + with GithubTaskManifest(logger) as manifest: - query = session.query(Repo).filter(Repo.repo_git == repo_git) + query = manifest.augur_db.query(Repo).filter(Repo.repo_git == repo_git) repo_obj = execute_session_query(query, 'one') repo_id = repo_obj.repo_id @@ -26,23 +26,21 @@ def collect_github_repo_clones_data(repo_git: str) -> None: logger.info(f"Collecting Github repository clone data for {owner}/{repo}") - clones_data = retrieve_all_clones_data(repo_git, logger) + clones_data = retrieve_all_clones_data(repo_git, logger, manifest.key_auth) - if clones_data: - process_clones_data(clones_data, f"{owner}/{repo}: Traffic task", repo_id, logger) - else: - logger.info(f"{owner}/{repo} has no clones") + if clones_data: + process_clones_data(clones_data, f"{owner}/{repo}: Traffic task", repo_id, manifest.augur_db) + else: + logger.info(f"{owner}/{repo} has no clones") -def retrieve_all_clones_data(repo_git: str, logger): +def retrieve_all_clones_data(repo_git: str, logger, key_auth + ): owner, repo = get_owner_repo(repo_git) url = f"https://api.github.com/repos/{owner}/{repo}/traffic/clones" - # define GithubTaskSession to handle insertions, and store oauth keys - with GithubTaskSession(logger, engine) as session: - - clones = GithubPaginator(url, session.oauths, logger) + clones = GithubPaginator(url, key_auth, logger) num_pages = clones.get_num_pages() all_data = [] @@ -63,14 +61,12 @@ def retrieve_all_clones_data(repo_git: str, logger): return all_data -def process_clones_data(clones_data, task_name, repo_id, logger) -> None: +def process_clones_data(clones_data, task_name, repo_id, logger, augur_db) -> None: clone_history_data = clones_data[0]['clones'] clone_history_data_dicts = extract_needed_clone_history_data(clone_history_data, repo_id) - with GithubTaskSession(logger, engine) as session: - - clone_history_data = remove_duplicate_dicts(clone_history_data_dicts, 'clone_data_timestamp') - logger.info(f"{task_name}: Inserting {len(clone_history_data_dicts)} clone history records") - - session.insert_data(clone_history_data_dicts, RepoClone, ['repo_id']) \ No newline at end of file + clone_history_data = remove_duplicate_dicts(clone_history_data_dicts, 'clone_data_timestamp') + logger.info(f"{task_name}: Inserting {len(clone_history_data_dicts)} clone history records") + + augur_db.insert_data(clone_history_data_dicts, RepoClone, ['repo_id'])