Skip to content

Commit

Permalink
Merge pull request #2353 from chaoss/main-db-leak-patch
Browse files Browse the repository at this point in the history
Database connection leak patch
  • Loading branch information
Ulincsys committed Apr 25, 2023
2 parents ee6c91f + ed0bd08 commit e71b050
Showing 1 changed file with 16 additions and 20 deletions.
36 changes: 16 additions & 20 deletions augur/tasks/github/traffic/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from augur.tasks.init.celery_app import celery_app as celery, engine
from augur.application.db.data_parse import *
from augur.tasks.github.util.github_paginator import GithubPaginator
from augur.tasks.github.util.github_task_session import GithubTaskSession
from augur.tasks.github.util.github_task_session import GithubTaskManifest
from augur.tasks.util.worker_util import remove_duplicate_dicts
from augur.tasks.github.util.util import get_owner_repo
from augur.application.db.models import RepoClone, Repo
Expand All @@ -16,33 +16,31 @@ def collect_github_repo_clones_data(repo_git: str) -> None:
logger = logging.getLogger(collect_github_repo_clones_data.__name__)

# using GithubTaskSession to get our repo_obj for which we will store data of clones
with GithubTaskSession(logger) as session:
with GithubTaskManifest(logger) as manifest:

query = session.query(Repo).filter(Repo.repo_git == repo_git)
query = manifest.augur_db.query(Repo).filter(Repo.repo_git == repo_git)
repo_obj = execute_session_query(query, 'one')
repo_id = repo_obj.repo_id

owner, repo = get_owner_repo(repo_git)

logger.info(f"Collecting Github repository clone data for {owner}/{repo}")

clones_data = retrieve_all_clones_data(repo_git, logger)
clones_data = retrieve_all_clones_data(repo_git, logger, manifest.key_auth)

if clones_data:
process_clones_data(clones_data, f"{owner}/{repo}: Traffic task", repo_id, logger)
else:
logger.info(f"{owner}/{repo} has no clones")
if clones_data:
process_clones_data(clones_data, f"{owner}/{repo}: Traffic task", repo_id, manifest.augur_db)
else:
logger.info(f"{owner}/{repo} has no clones")


def retrieve_all_clones_data(repo_git: str, logger):
def retrieve_all_clones_data(repo_git: str, logger, key_auth
):
owner, repo = get_owner_repo(repo_git)

url = f"https://api.github.com/repos/{owner}/{repo}/traffic/clones"

# define GithubTaskSession to handle insertions, and store oauth keys
with GithubTaskSession(logger, engine) as session:

clones = GithubPaginator(url, session.oauths, logger)
clones = GithubPaginator(url, key_auth, logger)

num_pages = clones.get_num_pages()
all_data = []
Expand All @@ -63,14 +61,12 @@ def retrieve_all_clones_data(repo_git: str, logger):
return all_data


def process_clones_data(clones_data, task_name, repo_id, logger) -> None:
def process_clones_data(clones_data, task_name, repo_id, logger, augur_db) -> None:
clone_history_data = clones_data[0]['clones']

clone_history_data_dicts = extract_needed_clone_history_data(clone_history_data, repo_id)

with GithubTaskSession(logger, engine) as session:

clone_history_data = remove_duplicate_dicts(clone_history_data_dicts, 'clone_data_timestamp')
logger.info(f"{task_name}: Inserting {len(clone_history_data_dicts)} clone history records")

session.insert_data(clone_history_data_dicts, RepoClone, ['repo_id'])
clone_history_data = remove_duplicate_dicts(clone_history_data_dicts, 'clone_data_timestamp')
logger.info(f"{task_name}: Inserting {len(clone_history_data_dicts)} clone history records")

augur_db.insert_data(clone_history_data_dicts, RepoClone, ['repo_id'])

0 comments on commit e71b050

Please sign in to comment.